IA e pequenas coisas a funcionar
This commit is contained in:
358
lib/core/services/vector_service.dart
Normal file
358
lib/core/services/vector_service.dart
Normal file
@@ -0,0 +1,358 @@
|
||||
import 'dart:math';
|
||||
import 'package:cloud_firestore/cloud_firestore.dart';
|
||||
import '../models/content_chunk.dart';
|
||||
import '../utils/logger.dart';
|
||||
|
||||
/// Service for vector embeddings and similarity search
|
||||
class VectorService {
|
||||
static final FirebaseFirestore _firestore = FirebaseFirestore.instance;
|
||||
|
||||
/// Generate embedding for text (mock implementation - in production would use real embedding model)
|
||||
static List<double> generateEmbedding(String text) {
|
||||
try {
|
||||
Logger.info('Generating embedding for text of length: ${text.length}');
|
||||
|
||||
// Mock embedding generation - in production would use OpenAI, Cohere, or local model
|
||||
// This creates a deterministic embedding based on text content
|
||||
final hash = text.hashCode;
|
||||
final random = Random(hash.abs());
|
||||
|
||||
// Generate 384-dimensional embedding with semantic similarity
|
||||
final embedding = List.generate(384, (i) {
|
||||
// Create deterministic values based on text hash and position
|
||||
final seed = (hash * (i + 1)) % 1000;
|
||||
final value = (seed / 1000.0 - 0.5) * 2.0;
|
||||
|
||||
// Add some semantic similarity for common words
|
||||
double semanticBoost = 0.0;
|
||||
final textLower = text.toLowerCase();
|
||||
|
||||
// Boost for common educational terms
|
||||
if (textLower.contains('fotossíntese') ||
|
||||
textLower.contains('plantas')) {
|
||||
semanticBoost += 0.3 * (i % 10) / 10.0;
|
||||
}
|
||||
if (textLower.contains('energia') || textLower.contains('luz')) {
|
||||
semanticBoost += 0.2 * (i % 8) / 8.0;
|
||||
}
|
||||
if (textLower.contains('biologia') || textLower.contains('processo')) {
|
||||
semanticBoost += 0.1 * (i % 12) / 12.0;
|
||||
}
|
||||
|
||||
return value + semanticBoost;
|
||||
});
|
||||
|
||||
// Normalize the vector
|
||||
final norm = sqrt(embedding.map((x) => x * x).reduce((a, b) => a + b));
|
||||
return embedding.map((x) => x / norm).toList();
|
||||
} catch (e) {
|
||||
Logger.error('Error generating embedding: $e');
|
||||
// Return zero vector as fallback
|
||||
return List.filled(384, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate cosine similarity between two vectors
|
||||
static double cosineSimilarity(List<double> vec1, List<double> vec2) {
|
||||
if (vec1.length != vec2.length) {
|
||||
throw ArgumentError('Vectors must be of same length');
|
||||
}
|
||||
|
||||
double dotProduct = 0.0;
|
||||
double norm1 = 0.0;
|
||||
double norm2 = 0.0;
|
||||
|
||||
for (int i = 0; i < vec1.length; i++) {
|
||||
dotProduct += vec1[i] * vec2[i];
|
||||
norm1 += vec1[i] * vec1[i];
|
||||
norm2 += vec2[i] * vec2[i];
|
||||
}
|
||||
|
||||
if (norm1 == 0 || norm2 == 0) return 0.0;
|
||||
|
||||
return dotProduct / (sqrt(norm1) * sqrt(norm2));
|
||||
}
|
||||
|
||||
/// Search for similar content chunks
|
||||
static Future<List<ContentChunk>> searchSimilar({
|
||||
required List<double> queryEmbedding,
|
||||
String? subject,
|
||||
String? concept,
|
||||
int? grade,
|
||||
double? minDifficulty,
|
||||
double? maxDifficulty,
|
||||
int k = 5,
|
||||
double threshold = 0.3,
|
||||
}) async {
|
||||
try {
|
||||
Logger.info(
|
||||
'Searching for similar content with k=$k, threshold=$threshold',
|
||||
);
|
||||
|
||||
Query query = _firestore
|
||||
.collection('contentChunks')
|
||||
.where('isActive', isEqualTo: true)
|
||||
.limit(100); // Get more candidates for better filtering
|
||||
|
||||
// Apply filters
|
||||
if (subject != null) {
|
||||
query = query.where('subject', isEqualTo: subject);
|
||||
}
|
||||
if (concept != null) {
|
||||
query = query.where('concept', isEqualTo: concept);
|
||||
}
|
||||
if (grade != null) {
|
||||
query = query.where('grade', isEqualTo: grade);
|
||||
}
|
||||
if (minDifficulty != null) {
|
||||
query = query.where(
|
||||
'difficulty',
|
||||
isGreaterThanOrEqualTo: minDifficulty,
|
||||
);
|
||||
}
|
||||
if (maxDifficulty != null) {
|
||||
query = query.where('difficulty', isLessThanOrEqualTo: maxDifficulty);
|
||||
}
|
||||
|
||||
final querySnapshot = await query.get();
|
||||
|
||||
// Calculate similarities and sort
|
||||
final scoredChunks = <ContentChunk, double>{};
|
||||
|
||||
for (final doc in querySnapshot.docs) {
|
||||
final chunk = ContentChunk.fromFirestore(
|
||||
doc.data() as Map<String, dynamic>,
|
||||
doc.id,
|
||||
);
|
||||
final similarity = cosineSimilarity(queryEmbedding, chunk.embedding);
|
||||
|
||||
if (similarity >= threshold) {
|
||||
scoredChunks[chunk] = similarity;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by similarity and take top k
|
||||
final sortedChunks = scoredChunks.entries.toList()
|
||||
..sort((a, b) => b.value.compareTo(a.value));
|
||||
|
||||
return sortedChunks.take(k).map((entry) => entry.key).toList();
|
||||
} catch (e) {
|
||||
Logger.error('Error searching similar content: $e');
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/// Search by text query (generates embedding and searches)
|
||||
static Future<List<ContentChunk>> searchByText({
|
||||
required String query,
|
||||
String? subject,
|
||||
String? concept,
|
||||
int? grade,
|
||||
double? minDifficulty,
|
||||
double? maxDifficulty,
|
||||
int k = 5,
|
||||
}) async {
|
||||
try {
|
||||
Logger.info('Searching by text: "${query.substring(0, 50)}..."');
|
||||
|
||||
// Generate embedding for query
|
||||
final queryEmbedding = generateEmbedding(query);
|
||||
|
||||
// Search for similar content
|
||||
return await searchSimilar(
|
||||
queryEmbedding: queryEmbedding,
|
||||
subject: subject,
|
||||
concept: concept,
|
||||
grade: grade,
|
||||
minDifficulty: minDifficulty,
|
||||
maxDifficulty: maxDifficulty,
|
||||
k: k,
|
||||
);
|
||||
} catch (e) {
|
||||
Logger.error('Error searching by text: $e');
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/// Batch generate embeddings for multiple texts
|
||||
static Future<List<List<double>>> batchGenerateEmbeddings(
|
||||
List<String> texts,
|
||||
) async {
|
||||
try {
|
||||
Logger.info('Generating embeddings for ${texts.length} texts');
|
||||
|
||||
final embeddings = <List<double>>[];
|
||||
|
||||
for (final text in texts) {
|
||||
final embedding = generateEmbedding(text);
|
||||
embeddings.add(embedding);
|
||||
}
|
||||
|
||||
return embeddings;
|
||||
} catch (e) {
|
||||
Logger.error('Error generating batch embeddings: $e');
|
||||
return List.filled(texts.length, List.filled(384, 0.0));
|
||||
}
|
||||
}
|
||||
|
||||
/// Update embedding for a content chunk
|
||||
static Future<void> updateChunkEmbedding(String chunkId, String text) async {
|
||||
try {
|
||||
Logger.info('Updating embedding for chunk: $chunkId');
|
||||
|
||||
final embedding = generateEmbedding(text);
|
||||
|
||||
await _firestore.collection('contentChunks').doc(chunkId).update({
|
||||
'embedding': embedding,
|
||||
'lastUpdated': FieldValue.serverTimestamp(),
|
||||
});
|
||||
|
||||
Logger.info('Embedding updated for chunk: $chunkId');
|
||||
} catch (e) {
|
||||
Logger.error('Error updating chunk embedding: $e');
|
||||
throw Exception('Failed to update chunk embedding: $e');
|
||||
}
|
||||
}
|
||||
|
||||
/// Get content chunks for a specific content
|
||||
static Future<List<ContentChunk>> getContentChunks(String contentId) async {
|
||||
try {
|
||||
Logger.info('Getting chunks for content: $contentId');
|
||||
|
||||
final querySnapshot = await _firestore
|
||||
.collection('contentChunks')
|
||||
.where('contentId', isEqualTo: contentId)
|
||||
.where('isActive', isEqualTo: true)
|
||||
.orderBy('createdAt')
|
||||
.get();
|
||||
|
||||
return querySnapshot.docs
|
||||
.map((doc) => ContentChunk.fromFirestore(doc.data(), doc.id))
|
||||
.toList();
|
||||
} catch (e) {
|
||||
Logger.error('Error getting content chunks: $e');
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/// Create content chunk with embedding
|
||||
static Future<String> createContentChunk({
|
||||
required String contentId,
|
||||
required String text,
|
||||
required String subject,
|
||||
required String concept,
|
||||
String? subConcept,
|
||||
required String unit,
|
||||
required double difficulty,
|
||||
required int grade,
|
||||
required String sourceDocument,
|
||||
Map<String, dynamic>? metadata,
|
||||
int? pageNumber,
|
||||
String? section,
|
||||
}) async {
|
||||
try {
|
||||
Logger.info('Creating content chunk for: $concept');
|
||||
|
||||
// Generate embedding
|
||||
final embedding = generateEmbedding(text);
|
||||
|
||||
// Create chunk document
|
||||
final chunkData = {
|
||||
'contentId': contentId,
|
||||
'text': text,
|
||||
'subject': subject,
|
||||
'concept': concept,
|
||||
if (subConcept != null) 'subConcept': subConcept,
|
||||
'unit': unit,
|
||||
'difficulty': difficulty,
|
||||
'grade': grade,
|
||||
'embedding': embedding,
|
||||
'sourceDocument': sourceDocument,
|
||||
'metadata': metadata ?? {},
|
||||
'createdAt': FieldValue.serverTimestamp(),
|
||||
'isActive': true,
|
||||
if (pageNumber != null) 'pageNumber': pageNumber,
|
||||
if (section != null) 'section': section,
|
||||
};
|
||||
|
||||
final docRef = await _firestore
|
||||
.collection('contentChunks')
|
||||
.add(chunkData);
|
||||
final chunkId = docRef.id;
|
||||
|
||||
Logger.info('Content chunk created: $chunkId');
|
||||
return chunkId;
|
||||
} catch (e) {
|
||||
Logger.error('Error creating content chunk: $e');
|
||||
throw Exception('Failed to create content chunk: $e');
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete content chunks for a content
|
||||
static Future<void> deleteContentChunks(String contentId) async {
|
||||
try {
|
||||
Logger.info('Deleting chunks for content: $contentId');
|
||||
|
||||
final querySnapshot = await _firestore
|
||||
.collection('contentChunks')
|
||||
.where('contentId', isEqualTo: contentId)
|
||||
.get();
|
||||
|
||||
final batch = _firestore.batch();
|
||||
for (final doc in querySnapshot.docs) {
|
||||
batch.delete(doc.reference);
|
||||
}
|
||||
|
||||
await batch.commit();
|
||||
Logger.info('Content chunks deleted: ${querySnapshot.docs.length}');
|
||||
} catch (e) {
|
||||
Logger.error('Error deleting content chunks: $e');
|
||||
throw Exception('Failed to delete content chunks: $e');
|
||||
}
|
||||
}
|
||||
|
||||
/// Get vector statistics
|
||||
static Future<Map<String, dynamic>> getVectorStats() async {
|
||||
try {
|
||||
Logger.info('Getting vector statistics');
|
||||
|
||||
final querySnapshot = await _firestore
|
||||
.collection('contentChunks')
|
||||
.where('isActive', isEqualTo: true)
|
||||
.get();
|
||||
|
||||
final totalChunks = querySnapshot.docs.length;
|
||||
final subjects = <String, int>{};
|
||||
final concepts = <String, int>{};
|
||||
final grades = <int, int>{};
|
||||
|
||||
for (final doc in querySnapshot.docs) {
|
||||
final data = doc.data();
|
||||
final subject = data['subject'] as String? ?? 'Unknown';
|
||||
final concept = data['concept'] as String? ?? 'Unknown';
|
||||
final grade = data['grade'] as int? ?? 0;
|
||||
|
||||
subjects[subject] = (subjects[subject] ?? 0) + 1;
|
||||
concepts[concept] = (concepts[concept] ?? 0) + 1;
|
||||
grades[grade] = (grades[grade] ?? 0) + 1;
|
||||
}
|
||||
|
||||
return {
|
||||
'totalChunks': totalChunks,
|
||||
'subjects': subjects,
|
||||
'concepts': concepts,
|
||||
'grades': grades,
|
||||
'embeddingDimension': 384,
|
||||
};
|
||||
} catch (e) {
|
||||
Logger.error('Error getting vector stats: $e');
|
||||
return {
|
||||
'totalChunks': 0,
|
||||
'subjects': <String, int>{},
|
||||
'concepts': <String, int>{},
|
||||
'grades': <int, int>{},
|
||||
'embeddingDimension': 384,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user