import 'package:cloud_firestore/cloud_firestore.dart'; import 'package:firebase_auth/firebase_auth.dart'; import 'package:firebase_storage/firebase_storage.dart'; import '../utils/logger.dart'; /// Service for RAG chunk retrieval from teacher PDFs /// CORRETO: Divide PDFs em chunks e seleciona relevantes por keyword matching class MaterialsRAGService { static final FirebaseFirestore _firestore = FirebaseFirestore.instance; static final FirebaseStorage _storage = FirebaseStorage.instanceFor( bucket: 'teachit-app.firebasestorage.app', ); static final FirebaseAuth _auth = FirebaseAuth.instance; /// Cache de chunks extraídos dos PDFs: {fileName: [chunk1, chunk2, ...]} static final Map> _chunksCache = {}; /// Tamanho de cada chunk em caracteres static const int _chunkSize = 1000; /// Overlap entre chunks para manter contexto static const int _chunkOverlap = 100; /// RAG CHUNK RETRIEVAL - Versão correta /// Busca chunks relevantes dos PDFs com base na query do usuário static Future getRelevantChunks({ required String userQuery, int maxMaterials = 5, int maxChunks = 5, }) async { try { final user = _auth.currentUser; if (user == null) { Logger.warning('No authenticated user for materials context'); return ''; } final uid = user.uid; // 1. Buscar teacher IDs das turmas do estudante final teacherIds = await _getTeacherIdsForStudent(uid); Logger.info('Teacher IDs for this student: $teacherIds'); if (teacherIds.isEmpty) { Logger.info('No teachers found for student $uid'); return ''; } // 2. Buscar materials dos teachers encontrados final teacherIdList = teacherIds.take(10).toList(); final snapshot = await _firestore .collection('materials') .where('teacherId', whereIn: teacherIdList) .orderBy('createdAt', descending: true) .limit(maxMaterials) .get(); Logger.info('Materials found: ${snapshot.docs.length}'); if (snapshot.docs.isEmpty) { Logger.info('No materials found for teachers: $teacherIdList'); return ''; } // 3. Extrair chunks de cada PDF List allChunks = []; for (final doc in snapshot.docs) { final data = doc.data(); final fileName = data['fileName'] as String?; if (fileName == null) continue; if (!fileName.toLowerCase().endsWith('.pdf')) continue; // Verificar cache de chunks if (_chunksCache.containsKey(fileName)) { allChunks.addAll(_chunksCache[fileName]!); continue; } // Extrair texto completo do PDF try { final teacherId = data['teacherId'] as String?; if (teacherId == null) continue; final fullText = await _extractFullText(fileName, teacherId); if (fullText.isNotEmpty) { // Dividir em chunks final chunks = _chunkText(fullText, _chunkSize, _chunkOverlap); _chunksCache[fileName] = chunks; allChunks.addAll(chunks); Logger.info('PDF "$fileName" -> ${chunks.length} chunks (${fullText.length} chars total)'); } } catch (e) { Logger.error('Error extracting text from $fileName: $e'); continue; } } if (allChunks.isEmpty) { return ''; } // 4. Calcular similaridade e selecionar chunks mais relevantes final relevantChunks = _selectRelevantChunks(allChunks, userQuery, maxChunks); Logger.info('Total chunks: ${allChunks.length}, Selected: ${relevantChunks.length}'); // 5. Formatar contexto para o modelo final contextBuffer = StringBuffer(); contextBuffer.writeln('Contexto dos materiais do professor:'); for (int i = 0; i < relevantChunks.length; i++) { contextBuffer.writeln('\n[CHUNK ${i + 1}]'); contextBuffer.writeln(relevantChunks[i]); } final result = contextBuffer.toString(); Logger.info('RAG context size: ${result.length} chars (${relevantChunks.length} chunks)'); return result; } catch (e) { Logger.error('Error in RAG chunk retrieval: $e'); return ''; } } /// Método legacy - mantido para compatibilidade mas usa chunk retrieval @Deprecated('Use getRelevantChunks with userQuery instead') static Future getMaterialsContext({int maxMaterials = 5}) async { return getRelevantChunks(userQuery: '', maxMaterials: maxMaterials, maxChunks: 3); } /// Get teacher IDs from student's enrolled classes /// Busca inscrições do estudante e obtém teacherIds das turmas static Future> _getTeacherIdsForStudent(String studentId) async { try { // 1. Buscar inscrições do estudante final enrollmentSnapshot = await _firestore .collection('enrollments') .where('studentId', isEqualTo: studentId) .get(); if (enrollmentSnapshot.docs.isEmpty) { Logger.info('No enrollments found for student $studentId'); return []; } // 2. Extrair classIds das inscrições final classIds = enrollmentSnapshot.docs .map((doc) => doc.data()['classId'] as String?) .where((id) => id != null) .cast() .toList(); if (classIds.isEmpty) { Logger.info('No class IDs found in enrollments'); return []; } Logger.info('Found ${classIds.length} classes for student'); // 3. Buscar turmas e extrair teacherIds final Set teacherIds = {}; // Firestore whereIn limit is 10, so process in batches if needed for (int i = 0; i < classIds.length; i += 10) { final batch = classIds.skip(i).take(10).toList(); final classSnapshot = await _firestore .collection('classes') .where(FieldPath.documentId, whereIn: batch) .get(); for (final doc in classSnapshot.docs) { final teacherId = doc.data()['teacherId'] as String?; if (teacherId != null && teacherId.isNotEmpty) { teacherIds.add(teacherId); } } } Logger.info('Found ${teacherIds.length} unique teachers'); return teacherIds.toList(); } catch (e) { Logger.error('Error getting teacher IDs for student: $e'); return []; } } /// Extrair TODO o texto do PDF /// CORRETO: Retorna texto completo, não resumo static Future _extractFullText(String fileName, String teacherId) async { try { // Get download URL from Storage final ref = _storage .ref() .child('teachers') .child(teacherId) .child('materials') .child(fileName); final downloadUrl = await ref.getDownloadURL(); // TODO: Implementar extração real de texto do PDF // Por agora, simulamos conteúdo extenso para testar o chunking // Em produção, usar: pdf_text_extract ou similar para baixar e extrair Logger.info('PDF available for extraction: $fileName at $downloadUrl'); // Simulação: retornar texto representativo do PDF // Na implementação real, baixar o PDF e extrair todo o texto return _simulatePdfContent(fileName); } catch (e) { Logger.error('Error extracting full text from PDF $fileName: $e'); return ''; } } /// Simular conteúdo de PDF para testar chunking /// REMOVER em produção - substituir por extração real static String _simulatePdfContent(String fileName) { // Conteúdo simulado extenso para testar chunk retrieval final buffer = StringBuffer(); buffer.writeln('CONTEÚDO DO PDF: $fileName'); buffer.writeln(); buffer.writeln('INTRODUÇÃO'); buffer.writeln('Este documento contém material educacional completo para os estudantes. ' 'O objetivo é fornecer conhecimento aprofundado sobre os temas abordados.'); buffer.writeln(); // Gerar conteúdo extenso para testar chunking for (int i = 1; i <= 20; i++) { buffer.writeln('SECÇÃO $i - CONCEITO FUNDAMENTAL $i'); buffer.writeln('Nesta secção exploramos o conceito número $i de forma detalhada. ' 'Os estudantes devem compreender os princípios fundamentais e as aplicações práticas. ' 'A análise teórica é complementada com exemplos concretos e exercícios resolvidos. ' 'A compreensão deste conceito é essencial para o progresso na disciplina. ' 'Os professores recomendam a revisão cuidadosa de todos os pontos apresentados aqui. ' 'Este material foi preparado especificamente para apoiar a aprendizagem dos estudantes. ' 'Qualquer dúvida deve ser esclarecida com o professor durante as aulas. '); buffer.writeln(); buffer.writeln('Exemplo prático $i: Considere a aplicação deste conceito em situações reais. ' 'Os estudantes devem ser capazes de identificar e resolver problemas relacionados. ' 'A prática constante é fundamental para a consolidação do conhecimento. '); buffer.writeln(); } buffer.writeln('CONCLUSÃO'); buffer.writeln('Este documento cobre todos os aspetos essenciais do tema. ' 'Os estudantes devem rever regularmente o material para garantir compreensão completa.'); return buffer.toString(); } /// Dividir texto em chunks com overlap static List _chunkText(String text, int chunkSize, int overlap) { final List chunks = []; final int textLength = text.length; if (textLength <= chunkSize) { return [text]; } int start = 0; while (start < textLength) { int end = start + chunkSize; if (end >= textLength) { end = textLength; } else { // Tentar quebrar num espaço para não cortar palavras while (end > start && text[end] != ' ' && text[end] != '\n') { end--; } if (end == start) { end = start + chunkSize; // Forçar quebra se não encontrar espaço } } chunks.add(text.substring(start, end).trim()); // Avançar com overlap start = end - overlap; if (start >= end) break; // Prevenir loop infinito } return chunks; } /// Selecionar chunks mais relevantes usando keyword matching simples static List _selectRelevantChunks( List chunks, String userQuery, int maxChunks, ) { if (userQuery.isEmpty || chunks.isEmpty) { // Se não há query, retornar primeiros chunks return chunks.take(maxChunks).toList(); } // Extrair keywords da query (palavras com mais de 3 caracteres) final queryWords = userQuery .toLowerCase() .split(RegExp(r'[^\w]')) .where((w) => w.length > 3) .toSet(); if (queryWords.isEmpty) { return chunks.take(maxChunks).toList(); } // Calcular score para cada chunk final List> scoredChunks = []; for (final chunk in chunks) { final chunkLower = chunk.toLowerCase(); int score = 0; for (final word in queryWords) { // Contar ocorrências da palavra no chunk final matches = word.allMatches(chunkLower).length; score += matches * 10; // Peso por ocorrência // Bonus se a palavra estiver no início do chunk if (chunkLower.startsWith(word)) { score += 5; } } // Bonus por tamanho do chunk (preferir chunks mais completos) score += (chunk.length / 100).floor(); scoredChunks.add(MapEntry(chunk, score)); } // Ordenar por score decrescente scoredChunks.sort((a, b) => b.value.compareTo(a.value)); Logger.info('Top chunk scores: ${scoredChunks.take(3).map((e) => e.value).toList()}'); // Retornar os N chunks mais relevantes return scoredChunks.take(maxChunks).map((e) => e.key).toList(); } /// Clear the chunks cache static void clearCache() { _chunksCache.clear(); Logger.info('Materials chunks cache cleared'); } }