llm_ticket3/agents/llama_vision/agent_image_analyser.py

from ..base_agent import BaseAgent
import logging
import os
from typing import Dict, Any, List, Optional
from PIL import Image
from ..utils.pipeline_logger import sauvegarder_donnees
from utils.translate_utils import fr_to_en, en_to_fr
from datetime import datetime
import re

logger = logging.getLogger("AgentImageAnalyser")

class AgentImageAnalyser(BaseAgent):
    """
    Agent for analyzing images and extracting relevant information.
    Works in English and translates to French for compatibility.
    """
    def __init__(self, llm, params: Optional[Dict[str, Any]] = None):
        """Initialise l'agent d'analyse d'images.

        Args:
            llm: Instance du modèle LLM à utiliser
            params (Optional[Dict[str, Any]], optional): Paramètres de configuration. Defaults to None.
        """
        super().__init__("AgentImageAnalyser", llm)
        self.params = params or {
            "temperature": 0.2,
            "top_p": 0.8,
            "max_tokens": 5000
        }

        self.instructions_analyse = (
            """
            1. Objective Description
               Describe precisely what the image shows:
               - Software interface, menus, windows, tabs
               - Error messages, system messages, code or script
               - Software or module name/title if visible
               - Clearly distinguish the complete name of tests/modules (for example, "Methylene blue test" instead of simply "blue test")

            2. Key Technical Elements
               Identify:
               - Software versions or displayed modules
               - Visible error codes
               - Configurable parameters (text fields, sliders, dropdowns, checkboxes)
               - Values displayed or pre-filled in fields
               - Disabled, grayed out or hidden elements (often non-modifiable)
               - Active/inactive buttons
               - Reset or initialization buttons (often marked "RAZ" and not "PAZ")
               - Specify if colored elements are part of the standard interface (e.g., always red button) or if they seem to be related to the problem

            3. URLs and Links
               - Identify and explicitly copy ALL URLs visible in the image
               - Hyperlinks in blue or underlined text
               - API endpoints, server addresses
               - Format each URL on its own line for clarity: [URL] https://example.com
               - For masked/shortened URLs, clearly indicate what text is displayed

            4. Highlighted Elements
               - Look for circled, framed, highlighted or arrowed areas
               - These elements are often important for the client or support
               - Explicitly mention their content and highlighting style
               - Specifically check if error messages are visible at the bottom or top of the screen

            5. Relationship with the Problem
               - Establish the link between visible elements and the problem described in the ticket
               - Indicate if components seem related to a misconfiguration or error
               - Specify the complete name of the module/test concerned by the problem (for example "Methylene blue test (MB)" and not just "blue test")
               - Identify if the user has access to the test screen but with errors, or if there is no access at all

            6. Potential Answers
               - Determine if the image provides elements of answer to a question asked in:
                 - The ticket title
                 - The problem description
               - Try to extrapolate the precise technical context by observing the interface (e.g., the "blue test" mentioned by the client clearly corresponds to "methylene blue test (MB) - NF EN 933-9")

            7. Link with the Discussion
               - Check if the image echoes a step described in the discussion thread
               - Note correspondences (e.g., same module, same error message as previously mentioned)
               - Establish explicit connections between the vocabulary used by the client and what's visible in the interface

            8. Broader Technical Context
               - Identify the wider context of the application (laboratory, technical tests, standardized tests)
               - Note any references to standards or norms (e.g., NF EN 933-9)
               - Mention any visible codes or identifiers that might be useful (e.g., sample numbers)

            Important Rules:
            - Do NOT make ANY interpretation or diagnosis about possible causes
            - Do NOT propose solutions or recommendations
            - Remain strictly factual and objective, but make explicit links with terms used by the client
            - Focus only on what is visible in the image
            - Reproduce exact texts (e.g., error messages, parameter labels)
            - Pay special attention to modifiable (interactive) and non-modifiable (grayed out) elements
            - Systematically use the complete and precise name of modules and tests
            - Verify correct reading of buttons and menus (beware of confusions like PAZ/RAZ)
            - ALWAYS list URLs and links in a separate dedicated section
            """
        )

        self.system_prompt = (
            """
            You are an expert in image analysis for BRG-Lab technical support for CBAO company.
            Your mission is to analyze screenshots related to the support ticket context.

            You must be extremely precise in your reading of interfaces and technical elements.
            Clients often use abbreviated terms (like "blue test") while the interface shows the full term ("Methylene blue test"). You must make the connection between these terms.

            Some elements in the interface may cause confusion:
            - "RAZ" buttons (reset) are sometimes difficult to read
            - Colored elements may be part of the standard interface (and not part of the problem)
            - Error messages are often at the bottom of the screen and contain crucial information
            - URLs and links must be explicitly captured and listed separately

            Structure your image analysis factually:
            {instructions}

            Your analysis will be used as a factual element for a more complete technical report and to link the client's vocabulary with the actual technical elements.

            IMPORTANT: All responses should be in English. Translation to French will be handled separately.
            """
        ).format(
            instructions=self.instructions_analyse
        )

        # Collecteur de résultats pour traitement par lots (comme dans AgentImageSorter)
        self.resultats = []

        self._appliquer_config_locale()
        logger.info("AgentImageAnalyser initialized")

    def _appliquer_config_locale(self) -> None:
        """
        Applies local configuration to the LLM model.
        """
        if hasattr(self.llm, "prompt_system"):
            self.llm.prompt_system = self.system_prompt

        if hasattr(self.llm, "configurer"):
            self.llm.configurer(**self.params)

    def _verifier_image(self, image_path: str) -> bool:
        """
        Checks if the image exists and is accessible
        """
        try:
            if not os.path.exists(image_path) or not os.access(image_path, os.R_OK):
                return False

            with Image.open(image_path) as img:
                width, height = img.size
                return width > 0 and height > 0
        except Exception as e:
            logger.error(f"Verification failed for {image_path}: {e}")
            return False

    def _extraire_urls(self, texte: str) -> List[str]:
        """
        Extracts URLs from a text

        Args:
            texte: The text to analyze

        Returns:
            List of extracted URLs
        """
        # Pattern to detect URLs (more complete than a simple http:// search)
        url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'

        # Search in the text with a broader pattern to capture context
        url_mentions = re.finditer(r'(?:URL|link|adresse|href|http)[^\n]*?(https?://[^\s\)\]\"\']+)', texte, re.IGNORECASE)

        # List to store URLs with their context
        urls = []

        # Add URLs extracted with the generic pattern
        for url in re.findall(url_pattern, texte):
            if url not in urls:
                urls.append(url)

        # Add URLs extracted from the broader context
        for match in url_mentions:
            url = match.group(1)
            if url not in urls:
                urls.append(url)

        return urls

    def _construire_prompt(self, image_path: str, contexte: Dict[str, Any]) -> str:
        """
        Construit le prompt pour l'analyse d'image avec contexte.

        Args:
            image_path: Chemin vers l'image à analyser
            contexte: Contexte d'analyse du ticket et OCR

        Returns:
            Prompt formaté avec instructions et contexte
        """
        image_name = os.path.basename(image_path)

        # Extraire le contexte du ticket (résumé en anglais)
        ticket_content_en = ""
        if isinstance(contexte, dict):
            # Vérifier les différentes structures possibles du contexte
            if "response_en" in contexte:
                ticket_content_en = contexte["response_en"]
            elif "response" in contexte:
                ticket_content_en = contexte["response"]
            elif "analyse" in contexte:
                # Structure de l'analyse de ticket
                ticket_content_en = contexte["analyse"].get("en", "") or contexte["analyse"].get("analyse_en", "")

            # Ajouter le texte OCR s'il est disponible
            ocr_text = ""
            if "ocr_text" in contexte:
                ocr_text = contexte["ocr_text"]
            elif "ocr_info" in contexte and isinstance(contexte["ocr_info"], dict):
                ocr_text = contexte["ocr_info"].get("texte_en", "")

        # Construire le prompt avec instructions précises
        prompt = f"""[ENGLISH RESPONSE REQUESTED]

Analyze this image in the context of a technical support ticket.

IMAGE: {image_name}

"""

        # Ajouter le texte OCR s'il est disponible
        if ocr_text and len(ocr_text.strip()) > 10:
            prompt += f"""OCR TEXT DETECTED IN IMAGE:
{ocr_text}

"""

        # Ajouter le contexte du ticket s'il est disponible
        if ticket_content_en:
            prompt += f"""SUPPORT TICKET CONTEXT:
{ticket_content_en[:1500]}

"""

        prompt += """INSTRUCTIONS:
1. Describe what is shown in this image in detail
2. Identify any error messages, technical information, or interface elements
3. Explain how this image relates to the support ticket context provided
4. Note any version numbers, status indicators, or dates visible
5. Extract specific technical details that might help diagnose the issue

If the image contains text, code, or error messages, transcribe all important parts.
Structure your analysis clearly with headers and bullet points.
"""

        logger.debug(f"Prompt construit pour {image_name} avec OCR: {bool(ocr_text)} et contexte ticket: {bool(ticket_content_en)}")
        return prompt

    def _extraire_ticket_id_depuis_path(self, path: str) -> str:
        """Extrait l'ID du ticket depuis le chemin de l'image.

        Args:
            path (str): Chemin de l'image

        Returns:
            str: ID du ticket ou 'unknown' si non trouvé
        """
        try:
            # Recherche un pattern comme T12345 dans le chemin
            match = re.search(r'T\d+', path)
            if match:
                return match.group(0)
        except Exception as e:
            logger.error(f"Erreur lors de l'extraction de l'ID du ticket: {e}")
        return "unknown"

    def executer(self, image_path: str, contexte: Optional[dict] = None) -> dict:
        """
        Analyse une image et extrait les informations pertinentes.

        Args:
            image_path: Chemin vers l'image à analyser
            contexte: Contexte optionnel (texte OCR, analyse ticket, etc)

        Returns:
            Dictionnaire contenant les résultats d'analyse
        """
        logger.info(f"Analyzing image: {image_path}")

        try:
            if not self._verifier_image(image_path):
                return self._erreur("Image inaccessible ou invalide", image_path)

            # Construire le prompt avec le contexte
            prompt = self._construire_prompt(image_path, contexte or {})

            # Analyser l'image avec le LLM
            if not hasattr(self.llm, "interroger_avec_image"):
                return self._erreur("Le modèle ne supporte pas l'analyse d'images", image_path)

            logger.info(f"[LANGUE] Envoi d'une requête en anglais au modèle avec une image: {os.path.basename(image_path)}")
            logger.info(f"[LANGUE] Taille du prompt en anglais: {len(prompt)} caractères")

            response = self.llm.interroger_avec_image(image_path, prompt)

            logger.info(f"[LANGUE] Réponse reçue du modèle en anglais: {len(response)} caractères")

            if self._verifier_reponse_invalide(response):
                return self._erreur("Réponse du modèle invalide", image_path, response)

            # Extraire le ticket_id
            ticket_id = self._extraire_ticket_id(image_path, contexte or {})

            # Nettoyer le nom du modèle pour éviter les doublons
            model_name = getattr(self.llm, "pipeline_normalized_name", None)
            if not model_name:
                # Si pipeline_normalized_name n'est pas disponible, utiliser le nom du modèle
                model_name = getattr(self.llm, "modele", "llama3-2-vision-90b-instruct")
                # Normaliser manuellement
                model_name = model_name.replace(".", "-").replace(":", "-").replace("_", "-")

            logger.info(f"Model name used for logging: {model_name}")
            logger.debug(f"Nom du modèle avant normalisation: {getattr(self.llm, 'modele', 'inconnu')}")
            logger.debug(f"Nom du modèle normalisé: {model_name}")


            # Traduire la réponse en français
            logger.info(f"[TRADUCTION] Traduction de la réponse d'analyse d'image de EN vers FR")
            logger.info(f"[TRADUCTION] Taille de la réponse en anglais: {len(response)} caractères")
            response_fr = en_to_fr(response)
            logger.info(f"[TRADUCTION] Taille de la réponse traduite en français: {len(response_fr)} caractères")

            # Construire le résultat
            result = {
                "timestamp": datetime.now().isoformat(),
                "image": os.path.basename(image_path),
                "ticket_id": ticket_id,
                "analyse": {
                    "en": response,
                    "fr": response_fr
                },
                "model_info": {
                    "model": model_name,
                    **self.params
                }
            }

            # Extraire les URLs trouvées dans la réponse
            urls = self._extraire_urls(response)
            if urls:
                logger.info(f"[ANALYSE] {len(urls)} URLs extraites de l'analyse: {urls}")
                result["urls"] = urls

            # Ajouter au collecteur de résultats
            self.resultats.append(result)

            logger.debug(f"Résultat de l'analyse pour l'image {image_path}: {result}")
            logger.info(f"[LANGUES] Résultat d'analyse disponible en deux langues: EN et FR")

            return result

        except Exception as e:
            logger.error(f"Error analyzing image {image_path}: {e}")
            return self._erreur(f"Erreur inattendue: {str(e)}", image_path)

    def _corriger_termes_courants(self, texte: str) -> str:
        """
        Corrects commonly misinterpreted terms by the model.
        """
        corrections = {
            "PAZ": "RAZ",
            "Essai bleu": "Essai au bleu de méthylène",
            "essai bleu": "essai au bleu de méthylène",
            "Essai au bleu": "Essai au bleu de méthylène",
            "Methylene blue test": "Essai au bleu de méthylène",
            "Blue test": "Essai au bleu de méthylène"
        }

        for terme_incorrect, terme_correct in corrections.items():
            texte = texte.replace(terme_incorrect, terme_correct)

        return texte

    def _erreur(self, message: str, path: str, details: Any = None) -> Dict[str, Any]:
        """
        Crée un dictionnaire d'erreur standardisé.

        Args:
            message: Message d'erreur
            path: Chemin du fichier concerné
            details: Détails supplémentaires de l'erreur (optionnel)

        Returns:
            Dictionnaire contenant les informations d'erreur
        """
        error_dict = {
            "timestamp": self._get_timestamp(),
            "success": False,
            "error": message,
            "image": os.path.basename(path),
            "metadata": {
                "error_details": details if details else {},
                "source_agent": self.nom
            }
        }
        logger.error(f"Erreur: {message} pour {path}")
        return error_dict

    def _get_timestamp(self) -> str:
        """Returns a timestamp in YYYYMMDD_HHMMSS format"""
        return datetime.now().strftime("%Y%m%d_%H%M%S")

    def _extraire_ticket_id(self, image_path: str, contexte: Dict[str, Any]) -> str:
        """
        Extrait l'ID du ticket à partir du chemin de l'image ou du contexte.

        Args:
            image_path: Chemin vers l'image
            contexte: Contexte d'analyse du ticket

        Returns:
            ID du ticket ou "UNKNOWN" si non trouvé
        """
        # D'abord, chercher dans le contexte
        if isinstance(contexte, dict):
            if "metadata" in contexte and "ticket_id" in contexte["metadata"]:
                return contexte["metadata"]["ticket_id"]
            if "ticket_id" in contexte:
                return contexte["ticket_id"]

        # Ensuite, chercher dans le chemin de l'image
        parts = image_path.split(os.path.sep)
        for part in parts:
            # Format T12345
            if part.startswith("T") and part[1:].isdigit():
                return part
            # Format ticket_T12345
            if part.startswith("ticket_T"):
                return part.replace("ticket_", "")

        return "UNKNOWN"

    def _error_response(self, message: str, ticket_id: str = "UNKNOWN") -> Dict[str, Any]:
        """
        Crée une réponse d'erreur standardisée.

        Args:
            message: Message d'erreur
            ticket_id: ID du ticket

        Returns:
            Dictionnaire avec la réponse d'erreur formatée
        """
        return {
            "analyse": f"ERREUR: {message}",
            "analyse_en": f"ERROR: {message}",
            "error": True,
            "metadata": {
                "timestamp": self._get_timestamp(),
                "error": True,
                "ticket_id": ticket_id,
                "source_agent": self.nom
            }
        }

    def _verifier_reponse_invalide(self, response: str) -> bool:
        """
        Vérifie si la réponse du modèle est invalide ou inappropriée

        Args:
            response: Réponse du modèle à analyser

        Returns:
            True si la réponse est invalide, False sinon
        """
        response_lower = response.lower()

        # Vérifier les marqueurs d'échec courants
        invalid_markers = [
            "i cannot", "unable to", "i'm unable", "i am unable",
            "i don't see", "i do not see", "i can't see", "cannot see",
            "sorry, i cannot", "i apologize", "not able to"
        ]

        # Si la réponse est vide ou trop courte
        if not response or len(response.strip()) < 20:
            return True

        # Si la réponse contient des marqueurs d'échec
        for marker in invalid_markers:
            if marker in response_lower:
                # Vérifier qu'il s'agit bien d'un échec global et non d'une réponse légitime
                # qui inclut ces termes dans un contexte différent
                context_words = ["but i can", "however", "nevertheless", "although", "can describe"]
                has_context = any(context in response_lower for context in context_words)

                if not has_context and marker in response_lower[:100]:
                    return True

        return False

    def sauvegarder_resultats(self) -> None:
        """
        Sauvegarde tous les résultats collectés en garantissant leur accumulation.
        Utilise un format de liste pour maintenir les multiples résultats.
        """
        logger.info(f"Sauvegarde de {len(self.resultats)} résultats d'analyse d'images")

        if not self.resultats:
            return

        # Récupérer le ticket_id du premier résultat
        ticket_id = self.resultats[0].get("ticket_id", self.resultats[0].get("metadata", {}).get("ticket_id", "UNKNOWN"))

        try:
            # Obtenir directement le nom normalisé du modèle depuis l'instance LLM
            normalized_model_name = getattr(self.llm, "pipeline_normalized_name", None)
            if normalized_model_name:
                logger.info(f"Utilisation du nom de modèle normalisé depuis LLM: {normalized_model_name}")
            else:
                # Fallback : utiliser le nom du modèle de l'instance LLM
                normalized_model_name = getattr(self.llm, "modele", "llama3-vision-90b-instruct")
                # Normaliser manuellement
                normalized_model_name = normalized_model_name.replace(".", "-").replace(":", "-").replace("_", "-")
                logger.info(f"Fallback : utilisation du nom de modèle normalisé manuellement: {normalized_model_name}")

            # Normaliser les noms de modèles dans tous les résultats
            for result in self.resultats:
                if "model_info" in result and "model" in result["model_info"]:
                    # Utiliser le nom de modèle normalisé pour tous les résultats
                    result["model_info"]["model"] = normalized_model_name
                    logger.debug(f"Nom de modèle défini pour un résultat: {normalized_model_name}")

            # Ajouter un log pour voir le premier résultat avec le modèle normalisé
            if self.resultats and "model_info" in self.resultats[0]:
                logger.info(f"Modèle utilisé pour sauvegarder les résultats: {self.resultats[0]['model_info'].get('model', 'non défini')}")

            # Sauvegarder en mode liste pour accumuler les résultats
            sauvegarder_donnees(
                ticket_id=ticket_id,
                step_name="analyse_image",
                data=self.resultats,
                base_dir=None,
                is_resultat=True
            )
            print(f"Sauvegarde groupée de {len(self.resultats)} résultats d'analyse d'images")

            # Vérifier si les fichiers ont été créés avec le bon nom
            from os import path, listdir
            rapport_dir = path.join("output", f"ticket_{ticket_id}")
            if path.exists(rapport_dir):
                extractions = [d for d in listdir(rapport_dir) if path.isdir(path.join(rapport_dir, d)) and d.startswith(ticket_id)]
                if extractions:
                    extraction_path = path.join(rapport_dir, sorted(extractions, reverse=True)[0])
                    pipeline_dir = path.join(extraction_path, f"{ticket_id}_rapports", "pipeline")
                    if path.exists(pipeline_dir):
                        files = [f for f in listdir(pipeline_dir) if f.startswith("analyse_image_") and f.endswith("_results.json")]
                        logger.info(f"Fichiers d'analyse d'images trouvés après sauvegarde: {files}")

            # Réinitialiser la liste après la sauvegarde
            self.resultats = []
        except Exception as e:
            logger.error(f"Erreur lors de la sauvegarde des résultats d'analyse d'images : {e}")
            logger.exception("Détails de l'erreur:")
            print(f"Erreur lors de la sauvegarde des résultats : {e}")