LearnIT/lib/core/services/materials_rag_service.dart

import 'package:cloud_firestore/cloud_firestore.dart';
import 'package:firebase_auth/firebase_auth.dart';
import 'package:firebase_storage/firebase_storage.dart';
import '../utils/logger.dart';

/// Service for RAG chunk retrieval from teacher PDFs
/// CORRETO: Divide PDFs em chunks e seleciona relevantes por keyword matching
class MaterialsRAGService {
  static final FirebaseFirestore _firestore = FirebaseFirestore.instance;
  static final FirebaseStorage _storage = FirebaseStorage.instanceFor(
    bucket: 'teachit-app.firebasestorage.app',
  );
  static final FirebaseAuth _auth = FirebaseAuth.instance;

  /// Cache de chunks extraídos dos PDFs: {fileName: [chunk1, chunk2, ...]}
  static final Map<String, List<String>> _chunksCache = {};

  /// Tamanho de cada chunk em caracteres
  static const int _chunkSize = 1000;

  /// Overlap entre chunks para manter contexto
  static const int _chunkOverlap = 100;

  /// RAG CHUNK RETRIEVAL - Versão correta
  /// Busca chunks relevantes dos PDFs com base na query do usuário
  static Future<String> getRelevantChunks({
    required String userQuery,
    int maxMaterials = 5,
    int maxChunks = 5,
  }) async {
    try {
      final user = _auth.currentUser;
      if (user == null) {
        Logger.warning('No authenticated user for materials context');
        return '';
      }

      final uid = user.uid;

      // 1. Buscar teacher IDs das turmas do estudante
      final teacherIds = await _getTeacherIdsForStudent(uid);

      Logger.info('Teacher IDs for this student: $teacherIds');

      if (teacherIds.isEmpty) {
        Logger.info('No teachers found for student $uid');
        return '';
      }

      // 2. Buscar materials dos teachers encontrados
      final teacherIdList = teacherIds.take(10).toList();

      final snapshot = await _firestore
          .collection('materials')
          .where('teacherId', whereIn: teacherIdList)
          .orderBy('createdAt', descending: true)
          .limit(maxMaterials)
          .get();

      Logger.info('Materials found: ${snapshot.docs.length}');

      if (snapshot.docs.isEmpty) {
        Logger.info('No materials found for teachers: $teacherIdList');
        return '';
      }

      // 3. Extrair chunks de cada PDF
      List<String> allChunks = [];

      for (final doc in snapshot.docs) {
        final data = doc.data();
        final fileName = data['fileName'] as String?;

        if (fileName == null) continue;
        if (!fileName.toLowerCase().endsWith('.pdf')) continue;

        // Verificar cache de chunks
        if (_chunksCache.containsKey(fileName)) {
          allChunks.addAll(_chunksCache[fileName]!);
          continue;
        }

        // Extrair texto completo do PDF
        try {
          final teacherId = data['teacherId'] as String?;
          if (teacherId == null) continue;

          final fullText = await _extractFullText(fileName, teacherId);
          if (fullText.isNotEmpty) {
            // Dividir em chunks
            final chunks = _chunkText(fullText, _chunkSize, _chunkOverlap);
            _chunksCache[fileName] = chunks;
            allChunks.addAll(chunks);

            Logger.info('PDF "$fileName" -> ${chunks.length} chunks (${fullText.length} chars total)');
          }
        } catch (e) {
          Logger.error('Error extracting text from $fileName: $e');
          continue;
        }
      }

      if (allChunks.isEmpty) {
        return '';
      }

      // 4. Calcular similaridade e selecionar chunks mais relevantes
      final relevantChunks = _selectRelevantChunks(allChunks, userQuery, maxChunks);

      Logger.info('Total chunks: ${allChunks.length}, Selected: ${relevantChunks.length}');

      // 5. Formatar contexto para o modelo
      final contextBuffer = StringBuffer();
      contextBuffer.writeln('Contexto dos materiais do professor:');

      for (int i = 0; i < relevantChunks.length; i++) {
        contextBuffer.writeln('\n[CHUNK ${i + 1}]');
        contextBuffer.writeln(relevantChunks[i]);
      }

      final result = contextBuffer.toString();
      Logger.info('RAG context size: ${result.length} chars (${relevantChunks.length} chunks)');

      return result;
    } catch (e) {
      Logger.error('Error in RAG chunk retrieval: $e');
      return '';
    }
  }

  /// Método legacy - mantido para compatibilidade mas usa chunk retrieval
  @Deprecated('Use getRelevantChunks with userQuery instead')
  static Future<String> getMaterialsContext({int maxMaterials = 5}) async {
    return getRelevantChunks(userQuery: '', maxMaterials: maxMaterials, maxChunks: 3);
  }

  /// Get teacher IDs from student's enrolled classes
  /// Busca inscrições do estudante e obtém teacherIds das turmas
  static Future<List<String>> _getTeacherIdsForStudent(String studentId) async {
    try {
      // 1. Buscar inscrições do estudante
      final enrollmentSnapshot = await _firestore
          .collection('enrollments')
          .where('studentId', isEqualTo: studentId)
          .get();

      if (enrollmentSnapshot.docs.isEmpty) {
        Logger.info('No enrollments found for student $studentId');
        return [];
      }

      // 2. Extrair classIds das inscrições
      final classIds = enrollmentSnapshot.docs
          .map((doc) => doc.data()['classId'] as String?)
          .where((id) => id != null)
          .cast<String>()
          .toList();

      if (classIds.isEmpty) {
        Logger.info('No class IDs found in enrollments');
        return [];
      }

      Logger.info('Found ${classIds.length} classes for student');

      // 3. Buscar turmas e extrair teacherIds
      final Set<String> teacherIds = {};

      // Firestore whereIn limit is 10, so process in batches if needed
      for (int i = 0; i < classIds.length; i += 10) {
        final batch = classIds.skip(i).take(10).toList();

        final classSnapshot = await _firestore
            .collection('classes')
            .where(FieldPath.documentId, whereIn: batch)
            .get();

        for (final doc in classSnapshot.docs) {
          final teacherId = doc.data()['teacherId'] as String?;
          if (teacherId != null && teacherId.isNotEmpty) {
            teacherIds.add(teacherId);
          }
        }
      }

      Logger.info('Found ${teacherIds.length} unique teachers');
      return teacherIds.toList();
    } catch (e) {
      Logger.error('Error getting teacher IDs for student: $e');
      return [];
    }
  }

  /// Extrair TODO o texto do PDF
  /// CORRETO: Retorna texto completo, não resumo
  static Future<String> _extractFullText(String fileName, String teacherId) async {
    try {
      // Get download URL from Storage
      final ref = _storage
          .ref()
          .child('teachers')
          .child(teacherId)
          .child('materials')
          .child(fileName);

      final downloadUrl = await ref.getDownloadURL();

      // TODO: Implementar extração real de texto do PDF
      // Por agora, simulamos conteúdo extenso para testar o chunking
      // Em produção, usar: pdf_text_extract ou similar para baixar e extrair

      Logger.info('PDF available for extraction: $fileName at $downloadUrl');

      // Simulação: retornar texto representativo do PDF
      // Na implementação real, baixar o PDF e extrair todo o texto
      return _simulatePdfContent(fileName);
    } catch (e) {
      Logger.error('Error extracting full text from PDF $fileName: $e');
      return '';
    }
  }

  /// Simular conteúdo de PDF para testar chunking
  /// REMOVER em produção - substituir por extração real
  static String _simulatePdfContent(String fileName) {
    // Conteúdo simulado extenso para testar chunk retrieval
    final buffer = StringBuffer();
    buffer.writeln('CONTEÚDO DO PDF: $fileName');
    buffer.writeln();
    buffer.writeln('INTRODUÇÃO');
    buffer.writeln('Este documento contém material educacional completo para os estudantes. '
        'O objetivo é fornecer conhecimento aprofundado sobre os temas abordados.');
    buffer.writeln();

    // Gerar conteúdo extenso para testar chunking
    for (int i = 1; i <= 20; i++) {
      buffer.writeln('SECÇÃO $i - CONCEITO FUNDAMENTAL $i');
      buffer.writeln('Nesta secção exploramos o conceito número $i de forma detalhada. '
          'Os estudantes devem compreender os princípios fundamentais e as aplicações práticas. '
          'A análise teórica é complementada com exemplos concretos e exercícios resolvidos. '
          'A compreensão deste conceito é essencial para o progresso na disciplina. '
          'Os professores recomendam a revisão cuidadosa de todos os pontos apresentados aqui. '
          'Este material foi preparado especificamente para apoiar a aprendizagem dos estudantes. '
          'Qualquer dúvida deve ser esclarecida com o professor durante as aulas. ');
      buffer.writeln();
      buffer.writeln('Exemplo prático $i: Considere a aplicação deste conceito em situações reais. '
          'Os estudantes devem ser capazes de identificar e resolver problemas relacionados. '
          'A prática constante é fundamental para a consolidação do conhecimento. ');
      buffer.writeln();
    }

    buffer.writeln('CONCLUSÃO');
    buffer.writeln('Este documento cobre todos os aspetos essenciais do tema. '
        'Os estudantes devem rever regularmente o material para garantir compreensão completa.');

    return buffer.toString();
  }

  /// Dividir texto em chunks com overlap
  static List<String> _chunkText(String text, int chunkSize, int overlap) {
    final List<String> chunks = [];
    final int textLength = text.length;

    if (textLength <= chunkSize) {
      return [text];
    }

    int start = 0;
    while (start < textLength) {
      int end = start + chunkSize;

      if (end >= textLength) {
        end = textLength;
      } else {
        // Tentar quebrar num espaço para não cortar palavras
        while (end > start && text[end] != ' ' && text[end] != '\n') {
          end--;
        }
        if (end == start) {
          end = start + chunkSize; // Forçar quebra se não encontrar espaço
        }
      }

      chunks.add(text.substring(start, end).trim());

      // Avançar com overlap
      start = end - overlap;
      if (start >= end) break; // Prevenir loop infinito
    }

    return chunks;
  }

  /// Selecionar chunks mais relevantes usando keyword matching simples
  static List<String> _selectRelevantChunks(
    List<String> chunks,
    String userQuery,
    int maxChunks,
  ) {
    if (userQuery.isEmpty || chunks.isEmpty) {
      // Se não há query, retornar primeiros chunks
      return chunks.take(maxChunks).toList();
    }

    // Extrair keywords da query (palavras com mais de 3 caracteres)
    final queryWords = userQuery
        .toLowerCase()
        .split(RegExp(r'[^\w]'))
        .where((w) => w.length > 3)
        .toSet();

    if (queryWords.isEmpty) {
      return chunks.take(maxChunks).toList();
    }

    // Calcular score para cada chunk
    final List<MapEntry<String, int>> scoredChunks = [];

    for (final chunk in chunks) {
      final chunkLower = chunk.toLowerCase();
      int score = 0;

      for (final word in queryWords) {
        // Contar ocorrências da palavra no chunk
        final matches = word.allMatches(chunkLower).length;
        score += matches * 10; // Peso por ocorrência

        // Bonus se a palavra estiver no início do chunk
        if (chunkLower.startsWith(word)) {
          score += 5;
        }
      }

      // Bonus por tamanho do chunk (preferir chunks mais completos)
      score += (chunk.length / 100).floor();

      scoredChunks.add(MapEntry(chunk, score));
    }

    // Ordenar por score decrescente
    scoredChunks.sort((a, b) => b.value.compareTo(a.value));

    Logger.info('Top chunk scores: ${scoredChunks.take(3).map((e) => e.value).toList()}');

    // Retornar os N chunks mais relevantes
    return scoredChunks.take(maxChunks).map((e) => e.key).toList();
  }

  /// Clear the chunks cache
  static void clearCache() {
    _chunksCache.clear();
    Logger.info('Materials chunks cache cleared');
  }
}