import 'dart:math'; import 'package:cloud_firestore/cloud_firestore.dart'; import '../models/content_chunk.dart'; import '../utils/logger.dart'; /// Service for vector embeddings and similarity search class VectorService { static final FirebaseFirestore _firestore = FirebaseFirestore.instance; /// Generate embedding for text (mock implementation - in production would use real embedding model) static List generateEmbedding(String text) { try { Logger.info('Generating embedding for text of length: ${text.length}'); // Mock embedding generation - in production would use OpenAI, Cohere, or local model // This creates a deterministic embedding based on text content final hash = text.hashCode; final random = Random(hash.abs()); // Generate 384-dimensional embedding with semantic similarity final embedding = List.generate(384, (i) { // Create deterministic values based on text hash and position final seed = (hash * (i + 1)) % 1000; final value = (seed / 1000.0 - 0.5) * 2.0; // Add some semantic similarity for common words double semanticBoost = 0.0; final textLower = text.toLowerCase(); // Boost for common educational terms if (textLower.contains('fotossíntese') || textLower.contains('plantas')) { semanticBoost += 0.3 * (i % 10) / 10.0; } if (textLower.contains('energia') || textLower.contains('luz')) { semanticBoost += 0.2 * (i % 8) / 8.0; } if (textLower.contains('biologia') || textLower.contains('processo')) { semanticBoost += 0.1 * (i % 12) / 12.0; } return value + semanticBoost; }); // Normalize the vector final norm = sqrt(embedding.map((x) => x * x).reduce((a, b) => a + b)); return embedding.map((x) => x / norm).toList(); } catch (e) { Logger.error('Error generating embedding: $e'); // Return zero vector as fallback return List.filled(384, 0.0); } } /// Calculate cosine similarity between two vectors static double cosineSimilarity(List vec1, List vec2) { if (vec1.length != vec2.length) { throw ArgumentError('Vectors must be of same length'); } double dotProduct = 0.0; double norm1 = 0.0; double norm2 = 0.0; for (int i = 0; i < vec1.length; i++) { dotProduct += vec1[i] * vec2[i]; norm1 += vec1[i] * vec1[i]; norm2 += vec2[i] * vec2[i]; } if (norm1 == 0 || norm2 == 0) return 0.0; return dotProduct / (sqrt(norm1) * sqrt(norm2)); } /// Search for similar content chunks static Future> searchSimilar({ required List queryEmbedding, String? subject, String? concept, int? grade, double? minDifficulty, double? maxDifficulty, int k = 5, double threshold = 0.3, }) async { try { Logger.info( 'Searching for similar content with k=$k, threshold=$threshold', ); Query query = _firestore .collection('contentChunks') .where('isActive', isEqualTo: true) .limit(100); // Get more candidates for better filtering // Apply filters if (subject != null) { query = query.where('subject', isEqualTo: subject); } if (concept != null) { query = query.where('concept', isEqualTo: concept); } if (grade != null) { query = query.where('grade', isEqualTo: grade); } if (minDifficulty != null) { query = query.where( 'difficulty', isGreaterThanOrEqualTo: minDifficulty, ); } if (maxDifficulty != null) { query = query.where('difficulty', isLessThanOrEqualTo: maxDifficulty); } final querySnapshot = await query.get(); // Calculate similarities and sort final scoredChunks = {}; for (final doc in querySnapshot.docs) { final chunk = ContentChunk.fromFirestore( doc.data() as Map, doc.id, ); final similarity = cosineSimilarity(queryEmbedding, chunk.embedding); if (similarity >= threshold) { scoredChunks[chunk] = similarity; } } // Sort by similarity and take top k final sortedChunks = scoredChunks.entries.toList() ..sort((a, b) => b.value.compareTo(a.value)); return sortedChunks.take(k).map((entry) => entry.key).toList(); } catch (e) { Logger.error('Error searching similar content: $e'); return []; } } /// Search by text query (generates embedding and searches) static Future> searchByText({ required String query, String? subject, String? concept, int? grade, double? minDifficulty, double? maxDifficulty, int k = 5, }) async { try { Logger.info('Searching by text: "${query.substring(0, 50)}..."'); // Generate embedding for query final queryEmbedding = generateEmbedding(query); // Search for similar content return await searchSimilar( queryEmbedding: queryEmbedding, subject: subject, concept: concept, grade: grade, minDifficulty: minDifficulty, maxDifficulty: maxDifficulty, k: k, ); } catch (e) { Logger.error('Error searching by text: $e'); return []; } } /// Batch generate embeddings for multiple texts static Future>> batchGenerateEmbeddings( List texts, ) async { try { Logger.info('Generating embeddings for ${texts.length} texts'); final embeddings = >[]; for (final text in texts) { final embedding = generateEmbedding(text); embeddings.add(embedding); } return embeddings; } catch (e) { Logger.error('Error generating batch embeddings: $e'); return List.filled(texts.length, List.filled(384, 0.0)); } } /// Update embedding for a content chunk static Future updateChunkEmbedding(String chunkId, String text) async { try { Logger.info('Updating embedding for chunk: $chunkId'); final embedding = generateEmbedding(text); await _firestore.collection('contentChunks').doc(chunkId).update({ 'embedding': embedding, 'lastUpdated': FieldValue.serverTimestamp(), }); Logger.info('Embedding updated for chunk: $chunkId'); } catch (e) { Logger.error('Error updating chunk embedding: $e'); throw Exception('Failed to update chunk embedding: $e'); } } /// Get content chunks for a specific content static Future> getContentChunks(String contentId) async { try { Logger.info('Getting chunks for content: $contentId'); final querySnapshot = await _firestore .collection('contentChunks') .where('contentId', isEqualTo: contentId) .where('isActive', isEqualTo: true) .orderBy('createdAt') .get(); return querySnapshot.docs .map((doc) => ContentChunk.fromFirestore(doc.data(), doc.id)) .toList(); } catch (e) { Logger.error('Error getting content chunks: $e'); return []; } } /// Create content chunk with embedding static Future createContentChunk({ required String contentId, required String text, required String subject, required String concept, String? subConcept, required String unit, required double difficulty, required int grade, required String sourceDocument, Map? metadata, int? pageNumber, String? section, }) async { try { Logger.info('Creating content chunk for: $concept'); // Generate embedding final embedding = generateEmbedding(text); // Create chunk document final chunkData = { 'contentId': contentId, 'text': text, 'subject': subject, 'concept': concept, if (subConcept != null) 'subConcept': subConcept, 'unit': unit, 'difficulty': difficulty, 'grade': grade, 'embedding': embedding, 'sourceDocument': sourceDocument, 'metadata': metadata ?? {}, 'createdAt': FieldValue.serverTimestamp(), 'isActive': true, if (pageNumber != null) 'pageNumber': pageNumber, if (section != null) 'section': section, }; final docRef = await _firestore .collection('contentChunks') .add(chunkData); final chunkId = docRef.id; Logger.info('Content chunk created: $chunkId'); return chunkId; } catch (e) { Logger.error('Error creating content chunk: $e'); throw Exception('Failed to create content chunk: $e'); } } /// Delete content chunks for a content static Future deleteContentChunks(String contentId) async { try { Logger.info('Deleting chunks for content: $contentId'); final querySnapshot = await _firestore .collection('contentChunks') .where('contentId', isEqualTo: contentId) .get(); final batch = _firestore.batch(); for (final doc in querySnapshot.docs) { batch.delete(doc.reference); } await batch.commit(); Logger.info('Content chunks deleted: ${querySnapshot.docs.length}'); } catch (e) { Logger.error('Error deleting content chunks: $e'); throw Exception('Failed to delete content chunks: $e'); } } /// Get vector statistics static Future> getVectorStats() async { try { Logger.info('Getting vector statistics'); final querySnapshot = await _firestore .collection('contentChunks') .where('isActive', isEqualTo: true) .get(); final totalChunks = querySnapshot.docs.length; final subjects = {}; final concepts = {}; final grades = {}; for (final doc in querySnapshot.docs) { final data = doc.data(); final subject = data['subject'] as String? ?? 'Unknown'; final concept = data['concept'] as String? ?? 'Unknown'; final grade = data['grade'] as int? ?? 0; subjects[subject] = (subjects[subject] ?? 0) + 1; concepts[concept] = (concepts[concept] ?? 0) + 1; grades[grade] = (grades[grade] ?? 0) + 1; } return { 'totalChunks': totalChunks, 'subjects': subjects, 'concepts': concepts, 'grades': grades, 'embeddingDimension': 384, }; } catch (e) { Logger.error('Error getting vector stats: $e'); return { 'totalChunks': 0, 'subjects': {}, 'concepts': {}, 'grades': {}, 'embeddingDimension': 384, }; } } }