mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 15:46:52 +01:00
431 lines
18 KiB
Python
431 lines
18 KiB
Python
from ..base_agent import BaseAgent
|
|
import logging
|
|
import os
|
|
from typing import Dict, Any, List, Optional
|
|
from PIL import Image
|
|
from ..utils.pipeline_logger import sauvegarder_donnees
|
|
from utils.translate_utils import fr_to_en, en_to_fr
|
|
|
|
logger = logging.getLogger("AgentImageAnalyser")
|
|
|
|
class AgentImageAnalyser(BaseAgent):
|
|
"""
|
|
Agent for analyzing images and extracting relevant information.
|
|
Works in English and translates to French for compatibility.
|
|
"""
|
|
def __init__(self, llm):
|
|
super().__init__("AgentImageAnalyser", llm)
|
|
|
|
# Configurable parameters
|
|
self.params = {
|
|
"temperature": 0.2,
|
|
"top_p": 0.8,
|
|
"max_tokens": 3000
|
|
}
|
|
|
|
self.instructions_analyse = (
|
|
"""
|
|
1. Objective Description
|
|
Describe precisely what the image shows:
|
|
- Software interface, menus, windows, tabs
|
|
- Error messages, system messages, code or script
|
|
- Software or module name/title if visible
|
|
- Clearly distinguish the complete name of tests/modules (for example, "Methylene blue test" instead of simply "blue test")
|
|
|
|
2. Key Technical Elements
|
|
Identify:
|
|
- Software versions or displayed modules
|
|
- Visible error codes
|
|
- Configurable parameters (text fields, sliders, dropdowns, checkboxes)
|
|
- Values displayed or pre-filled in fields
|
|
- Disabled, grayed out or hidden elements (often non-modifiable)
|
|
- Active/inactive buttons
|
|
- Reset or initialization buttons (often marked "RAZ" and not "PAZ")
|
|
- Specify if colored elements are part of the standard interface (e.g., always red button) or if they seem to be related to the problem
|
|
|
|
3. URLs and Links
|
|
- Identify and explicitly copy ALL URLs visible in the image
|
|
- Hyperlinks in blue or underlined text
|
|
- API endpoints, server addresses
|
|
- Format each URL on its own line for clarity: [URL] https://example.com
|
|
- For masked/shortened URLs, clearly indicate what text is displayed
|
|
|
|
4. Highlighted Elements
|
|
- Look for circled, framed, highlighted or arrowed areas
|
|
- These elements are often important for the client or support
|
|
- Explicitly mention their content and highlighting style
|
|
- Specifically check if error messages are visible at the bottom or top of the screen
|
|
|
|
5. Relationship with the Problem
|
|
- Establish the link between visible elements and the problem described in the ticket
|
|
- Indicate if components seem related to a misconfiguration or error
|
|
- Specify the complete name of the module/test concerned by the problem (for example "Methylene blue test (MB)" and not just "blue test")
|
|
- Identify if the user has access to the test screen but with errors, or if there is no access at all
|
|
|
|
6. Potential Answers
|
|
- Determine if the image provides elements of answer to a question asked in:
|
|
- The ticket title
|
|
- The problem description
|
|
- Try to extrapolate the precise technical context by observing the interface (e.g., the "blue test" mentioned by the client clearly corresponds to "methylene blue test (MB) - NF EN 933-9")
|
|
|
|
7. Link with the Discussion
|
|
- Check if the image echoes a step described in the discussion thread
|
|
- Note correspondences (e.g., same module, same error message as previously mentioned)
|
|
- Establish explicit connections between the vocabulary used by the client and what's visible in the interface
|
|
|
|
8. Broader Technical Context
|
|
- Identify the wider context of the application (laboratory, technical tests, standardized tests)
|
|
- Note any references to standards or norms (e.g., NF EN 933-9)
|
|
- Mention any visible codes or identifiers that might be useful (e.g., sample numbers)
|
|
|
|
Important Rules:
|
|
- Do NOT make ANY interpretation or diagnosis about possible causes
|
|
- Do NOT propose solutions or recommendations
|
|
- Remain strictly factual and objective, but make explicit links with terms used by the client
|
|
- Focus only on what is visible in the image
|
|
- Reproduce exact texts (e.g., error messages, parameter labels)
|
|
- Pay special attention to modifiable (interactive) and non-modifiable (grayed out) elements
|
|
- Systematically use the complete and precise name of modules and tests
|
|
- Verify correct reading of buttons and menus (beware of confusions like PAZ/RAZ)
|
|
- ALWAYS list URLs and links in a separate dedicated section
|
|
"""
|
|
)
|
|
|
|
self.system_prompt = (
|
|
"""
|
|
You are an expert in image analysis for BRG-Lab technical support for CBAO company.
|
|
Your mission is to analyze screenshots related to the support ticket context.
|
|
|
|
You must be extremely precise in your reading of interfaces and technical elements.
|
|
Clients often use abbreviated terms (like "blue test") while the interface shows the full term ("Methylene blue test"). You must make the connection between these terms.
|
|
|
|
Some elements in the interface may cause confusion:
|
|
- "RAZ" buttons (reset) are sometimes difficult to read
|
|
- Colored elements may be part of the standard interface (and not part of the problem)
|
|
- Error messages are often at the bottom of the screen and contain crucial information
|
|
- URLs and links must be explicitly captured and listed separately
|
|
|
|
Structure your image analysis factually:
|
|
{instructions}
|
|
|
|
Your analysis will be used as a factual element for a more complete technical report and to link the client's vocabulary with the actual technical elements.
|
|
|
|
IMPORTANT: All responses should be in English. Translation to French will be handled separately.
|
|
"""
|
|
).format(
|
|
instructions=self.instructions_analyse
|
|
)
|
|
|
|
self._appliquer_config_locale()
|
|
logger.info("AgentImageAnalyser initialized")
|
|
|
|
def _appliquer_config_locale(self) -> None:
|
|
"""
|
|
Applies local configuration to the LLM model.
|
|
"""
|
|
if hasattr(self.llm, "prompt_system"):
|
|
self.llm.prompt_system = self.system_prompt
|
|
|
|
if hasattr(self.llm, "configurer"):
|
|
self.llm.configurer(**self.params)
|
|
|
|
def _verifier_image(self, image_path: str) -> bool:
|
|
"""
|
|
Checks if the image exists and is accessible
|
|
"""
|
|
try:
|
|
if not os.path.exists(image_path) or not os.access(image_path, os.R_OK):
|
|
return False
|
|
|
|
with Image.open(image_path) as img:
|
|
width, height = img.size
|
|
return width > 0 and height > 0
|
|
except Exception as e:
|
|
logger.error(f"Verification failed for {image_path}: {e}")
|
|
return False
|
|
|
|
def _extraire_urls(self, texte: str) -> List[str]:
|
|
"""
|
|
Extracts URLs from a text
|
|
|
|
Args:
|
|
texte: The text to analyze
|
|
|
|
Returns:
|
|
List of extracted URLs
|
|
"""
|
|
import re
|
|
# Pattern to detect URLs (more complete than a simple http:// search)
|
|
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
|
|
|
|
# Search in the text with a broader pattern to capture context
|
|
url_mentions = re.finditer(r'(?:URL|link|adresse|href|http)[^\n]*?(https?://[^\s\)\]\"\']+)', texte, re.IGNORECASE)
|
|
|
|
# List to store URLs with their context
|
|
urls = []
|
|
|
|
# Add URLs extracted with the generic pattern
|
|
for url in re.findall(url_pattern, texte):
|
|
if url not in urls:
|
|
urls.append(url)
|
|
|
|
# Add URLs extracted from the broader context
|
|
for match in url_mentions:
|
|
url = match.group(1)
|
|
if url not in urls:
|
|
urls.append(url)
|
|
|
|
return urls
|
|
|
|
def _construire_prompt(self, image_path: str, contexte: Dict[str, Any]) -> str:
|
|
"""
|
|
Construit le prompt pour l'analyse d'image avec contexte.
|
|
|
|
Args:
|
|
image_path: Chemin vers l'image à analyser
|
|
contexte: Contexte d'analyse du ticket
|
|
|
|
Returns:
|
|
Prompt formaté avec instructions et contexte
|
|
"""
|
|
image_name = os.path.basename(image_path)
|
|
|
|
# Extraire le contexte du ticket (résumé en anglais)
|
|
ticket_content_en = ""
|
|
if isinstance(contexte, dict):
|
|
if "response_en" in contexte:
|
|
ticket_content_en = contexte["response_en"]
|
|
elif "response" in contexte:
|
|
ticket_content_en = contexte["response"]
|
|
|
|
# Extraire le texte OCR si disponible dans le contexte
|
|
ocr_text = ""
|
|
if isinstance(contexte, dict) and "ocr_text" in contexte:
|
|
ocr_text = contexte["ocr_text"]
|
|
elif isinstance(contexte, dict) and "ocr_info" in contexte:
|
|
ocr_text = contexte["ocr_info"].get("texte_en", "")
|
|
|
|
# Construire le prompt avec instructions précises
|
|
prompt = f"""[ENGLISH RESPONSE REQUESTED]
|
|
|
|
Analyze this image in the context of a technical support ticket.
|
|
|
|
IMAGE: {image_name}
|
|
|
|
"""
|
|
|
|
# Ajouter le texte OCR s'il est disponible
|
|
if ocr_text and len(ocr_text.strip()) > 10:
|
|
prompt += f"""OCR TEXT DETECTED IN IMAGE:
|
|
{ocr_text}
|
|
|
|
"""
|
|
|
|
# Ajouter le contexte du ticket
|
|
prompt += f"""SUPPORT TICKET CONTEXT:
|
|
{ticket_content_en[:1500]}
|
|
|
|
INSTRUCTIONS:
|
|
1. Describe what is shown in this image in detail
|
|
2. Identify any error messages, technical information, or interface elements
|
|
3. Explain how this image relates to the support ticket context provided
|
|
4. Note any version numbers, status indicators, or dates visible
|
|
5. Extract specific technical details that might help diagnose the issue
|
|
|
|
If the image contains text, code, or error messages, transcribe all important parts.
|
|
Structure your analysis clearly with headers and bullet points.
|
|
"""
|
|
|
|
return prompt
|
|
|
|
def executer(self, image_path: str, contexte: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
"""
|
|
Exécute l'analyse d'une image en utilisant les capacités du LLM.
|
|
"""
|
|
if contexte is None:
|
|
contexte = {}
|
|
|
|
nom_image = os.path.basename(image_path)
|
|
logger.info(f"Début de l'analyse de l'image {nom_image}")
|
|
|
|
try:
|
|
if not self._verifier_image(image_path):
|
|
return self._erreur(f"Image invalide ou inaccessible: {image_path}", path=image_path)
|
|
|
|
ticket_id = self._extraire_ticket_id(image_path, contexte)
|
|
prompt = self._construire_prompt(image_path, contexte)
|
|
|
|
if not self.llm.peut_analyser_images():
|
|
return self._erreur("Le modèle ne supporte pas l'analyse d'images", path=image_path)
|
|
|
|
reponse = self.llm.interroger_avec_image(image_path, prompt)
|
|
if self._verifier_reponse_invalide(reponse):
|
|
return self._erreur(f"Réponse invalide du modèle pour {image_path}", path=image_path)
|
|
|
|
# Vérifier si une traduction est nécessaire
|
|
if not self.llm.mode_anglais_uniquement:
|
|
reponse_en = reponse
|
|
reponse_fr = self.llm.traduire(reponse)
|
|
else:
|
|
reponse_en = reponse
|
|
reponse_fr = None
|
|
|
|
# Utiliser le nom normalisé du modèle s'il existe
|
|
model_name = getattr(self.llm, "pipeline_normalized_name", None)
|
|
if model_name is None:
|
|
model_name = getattr(self.llm, "modele", "llama3-vision-90b-instruct")
|
|
|
|
resultat = {
|
|
"timestamp": self._get_timestamp(),
|
|
"image": nom_image,
|
|
"ticket_id": ticket_id,
|
|
"analyse": {
|
|
"en": reponse_en,
|
|
"fr": reponse_fr if reponse_fr else reponse_en
|
|
},
|
|
"metadata": {
|
|
"model_info": {
|
|
"name": model_name,
|
|
"type": "vision",
|
|
"parameters": self.llm.params
|
|
},
|
|
"image_path": image_path,
|
|
"success": True
|
|
}
|
|
}
|
|
|
|
logger.info(f"Analyse de {nom_image} terminée avec succès")
|
|
return resultat
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de l'analyse de {image_path}: {str(e)}")
|
|
return self._erreur(f"Erreur lors de l'analyse: {str(e)}", path=image_path)
|
|
|
|
def _corriger_termes_courants(self, texte: str) -> str:
|
|
"""
|
|
Corrects commonly misinterpreted terms by the model.
|
|
"""
|
|
corrections = {
|
|
"PAZ": "RAZ",
|
|
"Essai bleu": "Essai au bleu de méthylène",
|
|
"essai bleu": "essai au bleu de méthylène",
|
|
"Essai au bleu": "Essai au bleu de méthylène",
|
|
"Methylene blue test": "Essai au bleu de méthylène",
|
|
"Blue test": "Essai au bleu de méthylène"
|
|
}
|
|
|
|
for terme_incorrect, terme_correct in corrections.items():
|
|
texte = texte.replace(terme_incorrect, terme_correct)
|
|
|
|
return texte
|
|
|
|
def _erreur(self, message: str, path: str, raw: str = "") -> Dict[str, Any]:
|
|
"""
|
|
Creates an error response dictionary
|
|
"""
|
|
return {
|
|
"analyse": f"ERROR: {message}",
|
|
"raw_response": raw,
|
|
"error": True,
|
|
"metadata": {
|
|
"image_path": path,
|
|
"image_name": os.path.basename(path),
|
|
"timestamp": self._get_timestamp(),
|
|
"error": True,
|
|
"source_agent": self.nom
|
|
}
|
|
}
|
|
|
|
def _get_timestamp(self) -> str:
|
|
"""Returns a timestamp in YYYYMMDD_HHMMSS format"""
|
|
from datetime import datetime
|
|
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
def _extraire_ticket_id(self, image_path: str, contexte: Dict[str, Any]) -> str:
|
|
"""
|
|
Extrait l'ID du ticket à partir du chemin de l'image ou du contexte.
|
|
|
|
Args:
|
|
image_path: Chemin vers l'image
|
|
contexte: Contexte d'analyse du ticket
|
|
|
|
Returns:
|
|
ID du ticket ou "UNKNOWN" si non trouvé
|
|
"""
|
|
# D'abord, chercher dans le contexte
|
|
if isinstance(contexte, dict):
|
|
if "metadata" in contexte and "ticket_id" in contexte["metadata"]:
|
|
return contexte["metadata"]["ticket_id"]
|
|
if "ticket_id" in contexte:
|
|
return contexte["ticket_id"]
|
|
|
|
# Ensuite, chercher dans le chemin de l'image
|
|
parts = image_path.split(os.path.sep)
|
|
for part in parts:
|
|
# Format T12345
|
|
if part.startswith("T") and part[1:].isdigit():
|
|
return part
|
|
# Format ticket_T12345
|
|
if part.startswith("ticket_T"):
|
|
return part.replace("ticket_", "")
|
|
|
|
return "UNKNOWN"
|
|
|
|
def _error_response(self, message: str, ticket_id: str = "UNKNOWN") -> Dict[str, Any]:
|
|
"""
|
|
Crée une réponse d'erreur standardisée.
|
|
|
|
Args:
|
|
message: Message d'erreur
|
|
ticket_id: ID du ticket
|
|
|
|
Returns:
|
|
Dictionnaire avec la réponse d'erreur formatée
|
|
"""
|
|
return {
|
|
"analyse": f"ERREUR: {message}",
|
|
"analyse_en": f"ERROR: {message}",
|
|
"error": True,
|
|
"metadata": {
|
|
"timestamp": self._get_timestamp(),
|
|
"error": True,
|
|
"ticket_id": ticket_id,
|
|
"source_agent": self.nom
|
|
}
|
|
}
|
|
|
|
def _verifier_reponse_invalide(self, response: str) -> bool:
|
|
"""
|
|
Vérifie si la réponse du modèle est invalide ou inappropriée
|
|
|
|
Args:
|
|
response: Réponse du modèle à analyser
|
|
|
|
Returns:
|
|
True si la réponse est invalide, False sinon
|
|
"""
|
|
response_lower = response.lower()
|
|
|
|
# Vérifier les marqueurs d'échec courants
|
|
invalid_markers = [
|
|
"i cannot", "unable to", "i'm unable", "i am unable",
|
|
"i don't see", "i do not see", "i can't see", "cannot see",
|
|
"sorry, i cannot", "i apologize", "not able to"
|
|
]
|
|
|
|
# Si la réponse est vide ou trop courte
|
|
if not response or len(response.strip()) < 20:
|
|
return True
|
|
|
|
# Si la réponse contient des marqueurs d'échec
|
|
for marker in invalid_markers:
|
|
if marker in response_lower:
|
|
# Vérifier qu'il s'agit bien d'un échec global et non d'une réponse légitime
|
|
# qui inclut ces termes dans un contexte différent
|
|
context_words = ["but i can", "however", "nevertheless", "although", "can describe"]
|
|
has_context = any(context in response_lower for context in context_words)
|
|
|
|
if not has_context and marker in response_lower[:100]:
|
|
return True
|
|
|
|
return False |