llm_ticket3/agents/llama_vision/agent_report_generator.py

from ..base_agent import BaseAgent
from typing import Dict, Any, Optional
import logging
import os
import json
import traceback
from datetime import datetime
from ..utils.pipeline_logger import sauvegarder_donnees
from utils.translate_utils import fr_to_en, en_to_fr

# Configuration for detailed logging
logger = logging.getLogger("AgentReportGenerator")
logger.setLevel(logging.DEBUG)  # Increase logging level for debugging

class AgentReportGenerator(BaseAgent):
    def __init__(self, llm):
        super().__init__("AgentReportGenerator", llm)

        self.params = {
            "temperature": 0.2,
            "top_p": 0.8,
            "max_tokens": 8000
        }

        # System prompt in French (preserved for French-speaking models)
        self.system_prompt_fr = """Tu es un expert en support technique chargé de générer un rapport final à partir des analyses d'un ticket de support.
Ton rôle est de croiser les informations provenant :
- de l'analyse textuelle du ticket client
- des analyses détaillées de plusieurs captures d'écran

Tu dois structurer ta réponse en format question/réponse de manière claire, en gardant l'intégralité des points importants.

Ne propose jamais de solution. Ne reformule pas le contexte.
Ta seule mission est de croiser les données textuelles et visuelles et d'en tirer des observations claires, en listant les éléments factuels visibles dans les captures qui appuient ou complètent le texte du ticket.

Structure du rapport attendu :
1. Contexte général (résumé du ticket textuel en une phrase)
2. Problèmes ou questions identifiés (sous forme de questions claires)
3. Résumé croisé image/texte pour chaque question
4. Liste d'observations supplémentaires pertinentes (si applicable)
5. Tableau chronologique d'échanges
   - Inclure un tableau structuré des échanges entre client et support
   - Format : Émetteur | Type | Date | Contenu | Éléments visuels pertinents
   - Ne pas mentionner les noms réels des personnes, utiliser "CLIENT" et "SUPPORT"
   - Synthétiser le contenu tout en conservant les informations importantes
   - Conserver les compléments d'informations apportés par les images (exemple: client: essai bleu, analyse image: Essai au bleu de méthylène de méthylène (MB) - NF EN 933-9 (02-2022))
   - Conserver les liens utiles (documentation, FAQ, manuel, liens vers des pages web, etc.)
   - Associer les éléments visuels des captures d'écran aux échanges correspondants

Règles pour le tableau d'échanges :
- TYPE peut être : question, réponse, information, complément visuel
- Pour chaque échange du client mentionnant un problème, ajoute les éléments visuels des captures qui contextualisent ce problème
- Pour chaque réponse du support, ajoute les éléments visuels qui confirment ou infirment la réponse
- N'invente aucun contenu ni aucune date
- Utilise les données factuelles des images pour enrichir la compréhension des échanges

Reste strictement factuel. Ne fais aucune hypothèse. Ne suggère pas d'étapes ni d'interprétation."""

        # System prompt in English for LlamaVision
        self.system_prompt_en = """You are a technical support expert responsible for generating a final report from the analyses of a support ticket.
Your role is to cross-reference information from:
- textual analysis of the customer ticket
- detailed analyses of multiple screenshots

You must structure your response in a clear question/answer format, keeping all important points.

Never propose a solution. Do not reformulate the context.
Your only mission is to cross-reference textual and visual data to draw clear observations, listing factual elements visible in the screenshots that support or complement the ticket text.

Expected report structure:
1. General context (textual ticket summary in one sentence)
2. Identified problems or questions (in the form of clear questions)
3. Cross-referenced image/text summary for each question
4. List of additional relevant observations (if applicable)
5. Chronological exchange table
   - Include a structured table of exchanges between client and support
   - Format: Sender | Type | Date | Content | Relevant visual elements
   - Do not mention real names, use "CLIENT" and "SUPPORT"
   - Synthesize content while preserving important information
   - Preserve additional information provided by images (example: client: blue test, image analysis: Methylene blue test (MB) - NF EN 933-9 (02-2022))
   - Preserve useful links (documentation, FAQ, manual, links to web pages, etc.)
   - Associate visual elements from screenshots with corresponding exchanges

Rules for the exchange table:
- TYPE can be: question, answer, information, visual complement
- For each client exchange mentioning a problem, add visual elements from screenshots that contextualize the problem
- For each support response, add visual elements that confirm or contradict the response
- Do not invent any content or date
- Use factual data from images to enrich understanding of exchanges

Stay strictly factual. Make no assumptions. Do not suggest steps or interpretation."""

        self._appliquer_config_locale()
        logger.info("AgentReportGenerator initialized")

    def _appliquer_config_locale(self) -> None:
        """
        Applies local configuration based on the model used (LlamaVision or other)
        """
        if hasattr(self.llm, "prompt_system"):
            model_name = getattr(self.llm, "modele", "").lower()
            # Use English prompt for LlamaVision
            if "llama" in model_name or "vision" in model_name:
                self.llm.prompt_system = self.system_prompt_en
                logger.info("LlamaVision mode detected: using English system prompt")
            else:
                self.llm.prompt_system = self.system_prompt_fr
                logger.info("Using French system prompt")

        if hasattr(self.llm, "configurer"):
            self.llm.configurer(**self.params)

    def _verifier_donnees_entree(self, rapport_data: Dict[str, Any]) -> bool:
        """
        Verifies that the input data contains the necessary elements.

        Args:
            rapport_data: Data for generating the report

        Returns:
            bool: True if data is valid, False otherwise
        """
        ticket_id = rapport_data.get("ticket_id")
        if not ticket_id:
            logger.error("Validation error: missing ticket_id")
            return False

        ticket_analyse = rapport_data.get("ticket_analyse")
        if not ticket_analyse:
            logger.error(f"Validation error for {ticket_id}: missing ticket analysis")
            return False

        analyses_images = rapport_data.get("analyse_images", {})
        if not analyses_images:
            logger.warning(f"Warning for {ticket_id}: no image analysis available")
            # Continue anyway because we can generate a report without images

        # Check if at least one image has been analyzed
        images_analysees = 0
        for img_path, img_data in analyses_images.items():
            if img_data.get("analysis") and img_data["analysis"].get("analyse"):
                images_analysees += 1

        if images_analysees == 0 and analyses_images:
            logger.warning(f"Warning for {ticket_id}: {len(analyses_images)} images found but none analyzed")

        logger.info(f"Validation for {ticket_id}: OK, {images_analysees} images analyzed out of {len(analyses_images)} images")
        return True

    def executer(self, rapport_data: Dict[str, Any]) -> str:
        ticket_id = rapport_data.get("ticket_id", "Unknown")
        print(f"AgentReportGenerator: generating report for ticket {ticket_id}")

        try:
            # Check and log input data for debugging
            logger.debug(f"Data received for {ticket_id}: {json.dumps(rapport_data, default=str)[:500]}...")

            # Verify that input data is valid
            if not self._verifier_donnees_entree(rapport_data):
                error_msg = f"Unable to generate report: invalid input data for {ticket_id}"
                print(f"ERROR: {error_msg}")
                return f"ERROR: {error_msg}"

            print(f"Preparing prompt for ticket {ticket_id}...")
            prompt = self._generer_prompt(rapport_data)
            logger.debug(f"Generated prompt ({len(prompt)} characters): {prompt[:500]}...")

            # Determine if the model is LlamaVision and translate if necessary
            model_name = getattr(self.llm, "modele", "").lower()
            need_translation = "llama" in model_name or "vision" in model_name

            if need_translation:
                # Add explicit marker and translate prompt
                prompt_en = f"[ENGLISH RESPONSE REQUESTED]\n\n{fr_to_en(prompt)}"
                logger.info(f"Translating prompt to English for LlamaVision ({len(prompt_en)} characters)")
            else:
                prompt_en = prompt

            print(f"Analysis in progress for ticket {ticket_id}...")
            response = self.llm.interroger(prompt_en)

            # Translate response to French if necessary
            if need_translation:
                response_fr = en_to_fr(response)
                logger.info(f"Translating response to French ({len(response_fr)} characters)")
            else:
                response_fr = response

            print(f"Analysis completed: {len(response_fr)} characters")
            logger.debug(f"Response received ({len(response_fr)} characters): {response_fr[:500]}...")

            # Create complete result with metadata
            result = {
                "prompt": prompt,
                "prompt_en": prompt_en if need_translation else None,
                "response": response_fr,  # French version for compatibility
                "response_en": response if need_translation else None,  # Original English version
                "metadata": {
                    "ticket_id": ticket_id,
                    "timestamp": self._get_timestamp(),
                    "source_agent": self.nom,
                    "model_info": {
                        "model": getattr(self.llm, "modele", str(type(self.llm))),
                        **getattr(self.llm, "params", {})
                    },
                    "language": "en-fr" if need_translation else "fr"  # Indicates the language used
                }
            }

            # Utiliser sauvegarder_donnees pour centraliser la logique de sauvegarde
            # et éviter les doublons tout en respectant la structure de répertoires
            try:
                # Sauvegarder le rapport via la fonction centralisée
                sauvegarder_donnees(ticket_id, "rapport_final", result, base_dir=None, is_resultat=True)
                print(f"Final report generated and saved for ticket {ticket_id}")

                # Trouver le répertoire pipeline principal
                extraction_dir = self._trouver_repertoire_extraction(ticket_id)
                if extraction_dir:
                    rapports_dir = os.path.join(extraction_dir, f"{ticket_id}_rapports")
                    pipeline_dir = os.path.join(rapports_dir, "pipeline")

                    # Sauvegarder aussi une version texte directement dans pipeline_dir
                    if os.path.exists(pipeline_dir):
                        model_name = getattr(self.llm, "modele", "unknown")
                        # Normaliser le nom du modèle pour éviter les variations point/underscore
                        def normaliser_nom_modele(nom):
                            if not nom:
                                return "unknown-model"
                            # Convertir points et underscores en tirets
                            normalized = nom.lower().replace(".", "-").replace("_", "-").replace(":", "-")
                            # Supprimer les caractères non-alphanumériques (sauf tirets)
                            result = ""
                            for c in normalized:
                                if c.isalnum() or c == '-':
                                    result += c
                            return result

                        safe_model_name = normaliser_nom_modele(model_name)
                        rapport_txt_path = os.path.join(pipeline_dir, f"rapport_final_{safe_model_name}_results.txt")
                        with open(rapport_txt_path, "w", encoding="utf-8") as f:
                            f.write(f"RAPPORT D'ANALYSE DU TICKET {ticket_id}\n")
                            f.write("="*50 + "\n\n")
                            f.write(response_fr)  # Version française

                        print(f"Text version saved in: {rapport_txt_path}")

                        # Si nécessaire, sauvegarder aussi la version anglaise
                        if need_translation:
                            rapport_en_path = os.path.join(pipeline_dir, f"rapport_final_{safe_model_name}_results_en.txt")
                            with open(rapport_en_path, "w", encoding="utf-8") as f:
                                f.write(f"ANALYSIS REPORT FOR TICKET {ticket_id}\n")
                                f.write("="*50 + "\n\n")
                                f.write(response)  # Version anglaise
                            print(f"English text version saved in: {rapport_en_path}")
            except Exception as e:
                logger.error(f"Error when saving results: {str(e)}")
                # Fallback: créer un fichier de debug
                try:
                    debug_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../debug"))
                    os.makedirs(debug_dir, exist_ok=True)
                    debug_path = os.path.join(debug_dir, f"rapport_debug_{ticket_id}.json")
                    with open(debug_path, "w", encoding="utf-8") as f:
                        json.dump(result, f, ensure_ascii=False, indent=2)
                    print(f"Debug file created: {debug_path}")
                except Exception as debug_err:
                    print(f"Even debug save failed: {str(debug_err)}")

            # Add to history
            self.ajouter_historique("rapport_final", {
                "ticket_id": ticket_id,
                "prompt": prompt,
                "timestamp": self._get_timestamp()
            }, response_fr)  # French version for history

            print(f"Report processing completed for ticket {ticket_id}")
            return response_fr  # Return French version for pipeline consistency

        except Exception as e:
            logger.error(f"Error generating report: {str(e)}")
            logger.error(traceback.format_exc())
            print(f"CRITICAL ERROR during report generation: {str(e)}")
            return f"ERROR: {str(e)}"

    def _trouver_repertoire_extraction(self, ticket_id: str) -> Optional[str]:
        """
        Trouve le répertoire d'extraction le plus récent pour un ticket.

        Args:
            ticket_id: ID du ticket

        Returns:
            Chemin du répertoire d'extraction ou None si non trouvé
        """
        base_dir = "output"
        ticket_dir = os.path.join(base_dir, f"ticket_{ticket_id}")

        if not os.path.exists(ticket_dir):
            return None

        # Trouver l'extraction la plus récente
        extractions = []
        for extraction in os.listdir(ticket_dir):
            extraction_path = os.path.join(ticket_dir, extraction)
            if os.path.isdir(extraction_path) and extraction.startswith(ticket_id):
                extractions.append(extraction_path)

        if not extractions:
            return None

        # Trier par date (la plus récente d'abord)
        extractions.sort(key=lambda x: os.path.getmtime(x), reverse=True)
        return extractions[0]

    def _generer_prompt(self, rapport_data: Dict[str, Any]) -> str:
        """
        Generates the prompt for the report generator

        Args:
            rapport_data: Report data containing ticket analysis and image analyses

        Returns:
            str: Generated prompt
        """
        ticket_text = rapport_data.get("ticket_analyse", "")
        image_blocs = []
        analyses_images = rapport_data.get("analyse_images", {})

        # Add logs to check image data
        logger.info(f"Number of images to analyze: {len(analyses_images)}")

        for chemin_image, analyse_obj in analyses_images.items():
            # Check if the image is relevant
            is_relevant = analyse_obj.get("sorting", {}).get("is_relevant", False)

            # Get analysis if it exists
            analyse = ""
            if "analysis" in analyse_obj and analyse_obj["analysis"]:
                # Check if there is an English or French version of the analysis
                if "analyse_en" in analyse_obj["analysis"]:
                    # Use directly the English version if the model is LlamaVision
                    model_name = getattr(self.llm, "modele", "").lower()
                    if "llama" in model_name or "vision" in model_name:
                        analyse = analyse_obj["analysis"].get("analyse_en", "")
                    else:
                        analyse = analyse_obj["analysis"].get("analyse", "")
                else:
                    # Use standard analysis if no language versions
                    analyse = analyse_obj["analysis"].get("analyse", "")

            if analyse:
                image_blocs.append(f"--- IMAGE : {os.path.basename(chemin_image)} ---\n{analyse}\n")
                logger.info(f"Adding analysis of image {os.path.basename(chemin_image)} ({len(analyse)} characters)")
            else:
                logger.warning(f"Image {os.path.basename(chemin_image)} without analysis")

        bloc_images = "\n".join(image_blocs)

        # Log to check data size
        logger.info(f"Size of ticket analysis: {len(ticket_text)} characters")
        logger.info(f"Size of image block: {len(bloc_images)} characters")

        # Keep the prompt in French as output will be in French or translated from English
        prompt = (
            f"Voici les données d'analyse pour un ticket de support :\n\n"
            f"=== ANALYSE DU TICKET ===\n{ticket_text}\n\n"
            f"=== ANALYSES D'IMAGES ===\n{bloc_images}\n\n"
            f"Génère un rapport croisé en suivant les instructions précédentes, incluant un tableau chronologique des échanges entre CLIENT et SUPPORT. "
            f"Utilise le format suivant pour le tableau :\n"
            f"| ÉMETTEUR | TYPE | DATE | CONTENU | ÉLÉMENTS VISUELS |\n"
            f"| --- | --- | --- | --- | --- |\n"
            f"| CLIENT | question | date | texte de la question | éléments pertinents des images |\n"
            f"| SUPPORT | réponse | date | texte de la réponse | éléments pertinents des images |\n\n"
            f"Ce tableau doit synthétiser les échanges tout en intégrant les données pertinentes des images avec le maximum de contexte technique."
        )

        return prompt

    def _get_timestamp(self) -> str:
        """Returns a timestamp in YYYYMMDD_HHMMSS format"""
        from datetime import datetime
        return datetime.now().strftime("%Y%m%d_%H%M%S")