IA e pequenas coisas a funcionar

This commit is contained in:
2026-05-10 18:45:00 +01:00
parent 0f382e970b
commit 3475b57036
21 changed files with 4484 additions and 72 deletions

View File

@@ -0,0 +1,358 @@
import 'dart:math';
import 'package:cloud_firestore/cloud_firestore.dart';
import '../models/content_chunk.dart';
import '../utils/logger.dart';
/// Service for vector embeddings and similarity search
class VectorService {
static final FirebaseFirestore _firestore = FirebaseFirestore.instance;
/// Generate embedding for text (mock implementation - in production would use real embedding model)
static List<double> generateEmbedding(String text) {
try {
Logger.info('Generating embedding for text of length: ${text.length}');
// Mock embedding generation - in production would use OpenAI, Cohere, or local model
// This creates a deterministic embedding based on text content
final hash = text.hashCode;
final random = Random(hash.abs());
// Generate 384-dimensional embedding with semantic similarity
final embedding = List.generate(384, (i) {
// Create deterministic values based on text hash and position
final seed = (hash * (i + 1)) % 1000;
final value = (seed / 1000.0 - 0.5) * 2.0;
// Add some semantic similarity for common words
double semanticBoost = 0.0;
final textLower = text.toLowerCase();
// Boost for common educational terms
if (textLower.contains('fotossíntese') ||
textLower.contains('plantas')) {
semanticBoost += 0.3 * (i % 10) / 10.0;
}
if (textLower.contains('energia') || textLower.contains('luz')) {
semanticBoost += 0.2 * (i % 8) / 8.0;
}
if (textLower.contains('biologia') || textLower.contains('processo')) {
semanticBoost += 0.1 * (i % 12) / 12.0;
}
return value + semanticBoost;
});
// Normalize the vector
final norm = sqrt(embedding.map((x) => x * x).reduce((a, b) => a + b));
return embedding.map((x) => x / norm).toList();
} catch (e) {
Logger.error('Error generating embedding: $e');
// Return zero vector as fallback
return List.filled(384, 0.0);
}
}
/// Calculate cosine similarity between two vectors
static double cosineSimilarity(List<double> vec1, List<double> vec2) {
if (vec1.length != vec2.length) {
throw ArgumentError('Vectors must be of same length');
}
double dotProduct = 0.0;
double norm1 = 0.0;
double norm2 = 0.0;
for (int i = 0; i < vec1.length; i++) {
dotProduct += vec1[i] * vec2[i];
norm1 += vec1[i] * vec1[i];
norm2 += vec2[i] * vec2[i];
}
if (norm1 == 0 || norm2 == 0) return 0.0;
return dotProduct / (sqrt(norm1) * sqrt(norm2));
}
/// Search for similar content chunks
static Future<List<ContentChunk>> searchSimilar({
required List<double> queryEmbedding,
String? subject,
String? concept,
int? grade,
double? minDifficulty,
double? maxDifficulty,
int k = 5,
double threshold = 0.3,
}) async {
try {
Logger.info(
'Searching for similar content with k=$k, threshold=$threshold',
);
Query query = _firestore
.collection('contentChunks')
.where('isActive', isEqualTo: true)
.limit(100); // Get more candidates for better filtering
// Apply filters
if (subject != null) {
query = query.where('subject', isEqualTo: subject);
}
if (concept != null) {
query = query.where('concept', isEqualTo: concept);
}
if (grade != null) {
query = query.where('grade', isEqualTo: grade);
}
if (minDifficulty != null) {
query = query.where(
'difficulty',
isGreaterThanOrEqualTo: minDifficulty,
);
}
if (maxDifficulty != null) {
query = query.where('difficulty', isLessThanOrEqualTo: maxDifficulty);
}
final querySnapshot = await query.get();
// Calculate similarities and sort
final scoredChunks = <ContentChunk, double>{};
for (final doc in querySnapshot.docs) {
final chunk = ContentChunk.fromFirestore(
doc.data() as Map<String, dynamic>,
doc.id,
);
final similarity = cosineSimilarity(queryEmbedding, chunk.embedding);
if (similarity >= threshold) {
scoredChunks[chunk] = similarity;
}
}
// Sort by similarity and take top k
final sortedChunks = scoredChunks.entries.toList()
..sort((a, b) => b.value.compareTo(a.value));
return sortedChunks.take(k).map((entry) => entry.key).toList();
} catch (e) {
Logger.error('Error searching similar content: $e');
return [];
}
}
/// Search by text query (generates embedding and searches)
static Future<List<ContentChunk>> searchByText({
required String query,
String? subject,
String? concept,
int? grade,
double? minDifficulty,
double? maxDifficulty,
int k = 5,
}) async {
try {
Logger.info('Searching by text: "${query.substring(0, 50)}..."');
// Generate embedding for query
final queryEmbedding = generateEmbedding(query);
// Search for similar content
return await searchSimilar(
queryEmbedding: queryEmbedding,
subject: subject,
concept: concept,
grade: grade,
minDifficulty: minDifficulty,
maxDifficulty: maxDifficulty,
k: k,
);
} catch (e) {
Logger.error('Error searching by text: $e');
return [];
}
}
/// Batch generate embeddings for multiple texts
static Future<List<List<double>>> batchGenerateEmbeddings(
List<String> texts,
) async {
try {
Logger.info('Generating embeddings for ${texts.length} texts');
final embeddings = <List<double>>[];
for (final text in texts) {
final embedding = generateEmbedding(text);
embeddings.add(embedding);
}
return embeddings;
} catch (e) {
Logger.error('Error generating batch embeddings: $e');
return List.filled(texts.length, List.filled(384, 0.0));
}
}
/// Update embedding for a content chunk
static Future<void> updateChunkEmbedding(String chunkId, String text) async {
try {
Logger.info('Updating embedding for chunk: $chunkId');
final embedding = generateEmbedding(text);
await _firestore.collection('contentChunks').doc(chunkId).update({
'embedding': embedding,
'lastUpdated': FieldValue.serverTimestamp(),
});
Logger.info('Embedding updated for chunk: $chunkId');
} catch (e) {
Logger.error('Error updating chunk embedding: $e');
throw Exception('Failed to update chunk embedding: $e');
}
}
/// Get content chunks for a specific content
static Future<List<ContentChunk>> getContentChunks(String contentId) async {
try {
Logger.info('Getting chunks for content: $contentId');
final querySnapshot = await _firestore
.collection('contentChunks')
.where('contentId', isEqualTo: contentId)
.where('isActive', isEqualTo: true)
.orderBy('createdAt')
.get();
return querySnapshot.docs
.map((doc) => ContentChunk.fromFirestore(doc.data(), doc.id))
.toList();
} catch (e) {
Logger.error('Error getting content chunks: $e');
return [];
}
}
/// Create content chunk with embedding
static Future<String> createContentChunk({
required String contentId,
required String text,
required String subject,
required String concept,
String? subConcept,
required String unit,
required double difficulty,
required int grade,
required String sourceDocument,
Map<String, dynamic>? metadata,
int? pageNumber,
String? section,
}) async {
try {
Logger.info('Creating content chunk for: $concept');
// Generate embedding
final embedding = generateEmbedding(text);
// Create chunk document
final chunkData = {
'contentId': contentId,
'text': text,
'subject': subject,
'concept': concept,
if (subConcept != null) 'subConcept': subConcept,
'unit': unit,
'difficulty': difficulty,
'grade': grade,
'embedding': embedding,
'sourceDocument': sourceDocument,
'metadata': metadata ?? {},
'createdAt': FieldValue.serverTimestamp(),
'isActive': true,
if (pageNumber != null) 'pageNumber': pageNumber,
if (section != null) 'section': section,
};
final docRef = await _firestore
.collection('contentChunks')
.add(chunkData);
final chunkId = docRef.id;
Logger.info('Content chunk created: $chunkId');
return chunkId;
} catch (e) {
Logger.error('Error creating content chunk: $e');
throw Exception('Failed to create content chunk: $e');
}
}
/// Delete content chunks for a content
static Future<void> deleteContentChunks(String contentId) async {
try {
Logger.info('Deleting chunks for content: $contentId');
final querySnapshot = await _firestore
.collection('contentChunks')
.where('contentId', isEqualTo: contentId)
.get();
final batch = _firestore.batch();
for (final doc in querySnapshot.docs) {
batch.delete(doc.reference);
}
await batch.commit();
Logger.info('Content chunks deleted: ${querySnapshot.docs.length}');
} catch (e) {
Logger.error('Error deleting content chunks: $e');
throw Exception('Failed to delete content chunks: $e');
}
}
/// Get vector statistics
static Future<Map<String, dynamic>> getVectorStats() async {
try {
Logger.info('Getting vector statistics');
final querySnapshot = await _firestore
.collection('contentChunks')
.where('isActive', isEqualTo: true)
.get();
final totalChunks = querySnapshot.docs.length;
final subjects = <String, int>{};
final concepts = <String, int>{};
final grades = <int, int>{};
for (final doc in querySnapshot.docs) {
final data = doc.data();
final subject = data['subject'] as String? ?? 'Unknown';
final concept = data['concept'] as String? ?? 'Unknown';
final grade = data['grade'] as int? ?? 0;
subjects[subject] = (subjects[subject] ?? 0) + 1;
concepts[concept] = (concepts[concept] ?? 0) + 1;
grades[grade] = (grades[grade] ?? 0) + 1;
}
return {
'totalChunks': totalChunks,
'subjects': subjects,
'concepts': concepts,
'grades': grades,
'embeddingDimension': 384,
};
} catch (e) {
Logger.error('Error getting vector stats: $e');
return {
'totalChunks': 0,
'subjects': <String, int>{},
'concepts': <String, int>{},
'grades': <int, int>{},
'embeddingDimension': 384,
};
}
}
}