mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 12:36:50 +01:00
366 lines
18 KiB
Python
366 lines
18 KiB
Python
from ..base_agent import BaseAgent
|
|
from typing import Dict, Any, Optional
|
|
import logging
|
|
import os
|
|
import json
|
|
import traceback
|
|
from datetime import datetime
|
|
from ..utils.pipeline_logger import sauvegarder_donnees
|
|
from utils.translate_utils import fr_to_en, en_to_fr
|
|
|
|
# Configuration for detailed logging
|
|
logger = logging.getLogger("AgentReportGenerator")
|
|
logger.setLevel(logging.DEBUG) # Increase logging level for debugging
|
|
|
|
class AgentReportGenerator(BaseAgent):
|
|
def __init__(self, llm):
|
|
super().__init__("AgentReportGenerator", llm)
|
|
|
|
self.params = {
|
|
"temperature": 0.2,
|
|
"top_p": 0.8,
|
|
"max_tokens": 10000
|
|
}
|
|
|
|
# System prompt in French (preserved for French-speaking models)
|
|
self.system_prompt_fr = """Tu es un expert en support technique chargé de générer un rapport final à partir des analyses d'un ticket de support.
|
|
Ton rôle est de croiser les informations provenant :
|
|
- de l'analyse textuelle du ticket client
|
|
- des analyses détaillées de plusieurs captures d'écran
|
|
|
|
Tu dois structurer ta réponse en format question/réponse de manière claire, en gardant l'intégralité des points importants.
|
|
|
|
Ne propose jamais de solution. Ne reformule pas le contexte.
|
|
Ta seule mission est de croiser les données textuelles et visuelles et d'en tirer des observations claires, en listant les éléments factuels visibles dans les captures qui appuient ou complètent le texte du ticket.
|
|
|
|
Structure du rapport attendu :
|
|
1. Contexte général (résumé du ticket textuel en une phrase)
|
|
2. Problèmes ou questions identifiés (sous forme de questions claires)
|
|
3. Résumé croisé image/texte pour chaque question
|
|
4. Liste d'observations supplémentaires pertinentes (si applicable)
|
|
5. Tableau chronologique d'échanges
|
|
- Inclure un tableau structuré des échanges entre client et support
|
|
- Format : Émetteur | Type | Date | Contenu | Éléments visuels pertinents
|
|
- Ne pas mentionner les noms réels des personnes, utiliser "CLIENT" et "SUPPORT"
|
|
- Synthétiser le contenu tout en conservant les informations importantes
|
|
- Conserver les compléments d'informations apportés par les images (exemple: client: essai bleu, analyse image: Essai au bleu de méthylène de méthylène (MB) - NF EN 933-9 (02-2022))
|
|
- Conserver les liens utiles (documentation, FAQ, manuel, liens vers des pages web, etc.)
|
|
- Associer les éléments visuels des captures d'écran aux échanges correspondants
|
|
|
|
Règles pour le tableau d'échanges :
|
|
- TYPE peut être : question, réponse, information, complément visuel
|
|
- Pour chaque échange du client mentionnant un problème, ajoute les éléments visuels des captures qui contextualisent ce problème
|
|
- Pour chaque réponse du support, ajoute les éléments visuels qui confirment ou infirment la réponse
|
|
- N'invente aucun contenu ni aucune date
|
|
- Utilise les données factuelles des images pour enrichir la compréhension des échanges
|
|
|
|
Reste strictement factuel. Ne fais aucune hypothèse. Ne suggère pas d'étapes ni d'interprétation."""
|
|
|
|
# System prompt in English for LlamaVision
|
|
self.system_prompt_en = """You are a technical support expert responsible for generating a final report from the analyses of a support ticket.
|
|
Your role is to cross-reference information from:
|
|
- textual analysis of the customer ticket
|
|
- detailed analyses of multiple screenshots
|
|
|
|
You must structure your response in a clear question/answer format, keeping all important points.
|
|
|
|
Never propose a solution. Do not reformulate the context.
|
|
Your only mission is to cross-reference textual and visual data to draw clear observations, listing factual elements visible in the screenshots that support or complement the ticket text.
|
|
|
|
Expected report structure:
|
|
1. General context (textual ticket summary in one sentence)
|
|
2. Identified problems or questions (in the form of clear questions)
|
|
3. Cross-referenced image/text summary for each question
|
|
4. List of additional relevant observations (if applicable)
|
|
5. Chronological exchange table
|
|
- Include a structured table of exchanges between client and support
|
|
- Format: Sender | Type | Date | Content | Relevant visual elements
|
|
- Do not mention real names, use "CLIENT" and "SUPPORT"
|
|
- Synthesize content while preserving important information
|
|
- Preserve additional information provided by images (example: client: blue test, image analysis: Methylene blue test (MB) - NF EN 933-9 (02-2022))
|
|
- Preserve useful links (documentation, FAQ, manual, links to web pages, etc.)
|
|
- Associate visual elements from screenshots with corresponding exchanges
|
|
|
|
Rules for the exchange table:
|
|
- TYPE can be: question, answer, information, visual complement
|
|
- For each client exchange mentioning a problem, add visual elements from screenshots that contextualize the problem
|
|
- For each support response, add visual elements that confirm or contradict the response
|
|
- Do not invent any content or date
|
|
- Use factual data from images to enrich understanding of exchanges
|
|
|
|
Stay strictly factual. Make no assumptions. Do not suggest steps or interpretation."""
|
|
|
|
self._appliquer_config_locale()
|
|
logger.info("AgentReportGenerator initialized")
|
|
|
|
def _appliquer_config_locale(self) -> None:
|
|
"""
|
|
Applies local configuration based on the model used (LlamaVision or other)
|
|
"""
|
|
if hasattr(self.llm, "prompt_system"):
|
|
model_name = getattr(self.llm, "modele", "").lower()
|
|
# Use English prompt for LlamaVision
|
|
if "llama" in model_name or "vision" in model_name:
|
|
self.llm.prompt_system = self.system_prompt_en
|
|
logger.info("LlamaVision mode detected: using English system prompt")
|
|
else:
|
|
self.llm.prompt_system = self.system_prompt_fr
|
|
logger.info("Using French system prompt")
|
|
|
|
if hasattr(self.llm, "configurer"):
|
|
self.llm.configurer(**self.params)
|
|
|
|
def _verifier_donnees_entree(self, rapport_data: Dict[str, Any]) -> bool:
|
|
"""
|
|
Verifies that the input data contains the necessary elements.
|
|
|
|
Args:
|
|
rapport_data: Data for generating the report
|
|
|
|
Returns:
|
|
bool: True if data is valid, False otherwise
|
|
"""
|
|
ticket_id = rapport_data.get("ticket_id")
|
|
if not ticket_id:
|
|
logger.error("Validation error: missing ticket_id")
|
|
return False
|
|
|
|
ticket_analyse = rapport_data.get("ticket_analyse")
|
|
if not ticket_analyse:
|
|
logger.error(f"Validation error for {ticket_id}: missing ticket analysis")
|
|
return False
|
|
|
|
analyses_images = rapport_data.get("analyse_images", {})
|
|
if not analyses_images:
|
|
logger.warning(f"Warning for {ticket_id}: no image analysis available")
|
|
# Continue anyway because we can generate a report without images
|
|
|
|
# Check if at least one image has been analyzed
|
|
images_analysees = 0
|
|
for img_path, img_data in analyses_images.items():
|
|
if img_data.get("analysis") and img_data["analysis"].get("analyse"):
|
|
images_analysees += 1
|
|
|
|
if images_analysees == 0 and analyses_images:
|
|
logger.warning(f"Warning for {ticket_id}: {len(analyses_images)} images found but none analyzed")
|
|
|
|
logger.info(f"Validation for {ticket_id}: OK, {images_analysees} images analyzed out of {len(analyses_images)} images")
|
|
return True
|
|
|
|
def executer(self, rapport_data: Dict[str, Any]) -> str:
|
|
ticket_id = rapport_data.get("ticket_id", "Unknown")
|
|
print(f"AgentReportGenerator: generating report for ticket {ticket_id}")
|
|
|
|
try:
|
|
# Check and log input data for debugging
|
|
logger.debug(f"Data received for {ticket_id}: {json.dumps(rapport_data, default=str)[:500]}...")
|
|
|
|
# Verify that input data is valid
|
|
if not self._verifier_donnees_entree(rapport_data):
|
|
error_msg = f"Unable to generate report: invalid input data for {ticket_id}"
|
|
print(f"ERROR: {error_msg}")
|
|
return f"ERROR: {error_msg}"
|
|
|
|
print(f"Preparing prompt for ticket {ticket_id}...")
|
|
prompt = self._generer_prompt(rapport_data)
|
|
logger.debug(f"Generated prompt ({len(prompt)} characters): {prompt[:500]}...")
|
|
|
|
# Determine if the model is LlamaVision and translate if necessary
|
|
model_name = getattr(self.llm, "modele", "").lower()
|
|
need_translation = "llama" in model_name or "vision" in model_name
|
|
|
|
if need_translation:
|
|
# Add explicit marker and translate prompt
|
|
logger.info(f"[LANGUE] Le modèle '{model_name}' nécessite une entrée en anglais")
|
|
logger.info(f"[TRADUCTION] Traduction du prompt FR → EN pour {ticket_id}")
|
|
logger.info(f"[TRADUCTION] Taille du prompt original en français: {len(prompt)} caractères")
|
|
translated_prompt = fr_to_en(prompt)
|
|
prompt_en = f"[ENGLISH RESPONSE REQUESTED]\n\n{translated_prompt}"
|
|
logger.info(f"[TRADUCTION] Taille du prompt traduit en anglais: {len(prompt_en)} caractères")
|
|
else:
|
|
logger.info(f"[LANGUE] Le modèle '{model_name}' accepte une entrée en français, aucune traduction nécessaire")
|
|
prompt_en = prompt
|
|
|
|
print(f"Analysis in progress for ticket {ticket_id}...")
|
|
logger.info(f"[LANGUE] Envoi du prompt {'en anglais' if need_translation else 'en français'} au modèle")
|
|
response = self.llm.interroger(prompt_en)
|
|
logger.info(f"[LANGUE] Réponse reçue du modèle {'en anglais' if need_translation else 'en français'}: {len(response)} caractères")
|
|
|
|
# Translate response to French if necessary
|
|
if need_translation:
|
|
logger.info(f"[TRADUCTION] Traduction de la réponse EN → FR pour compatibilité")
|
|
logger.info(f"[TRADUCTION] Taille de la réponse originale en anglais: {len(response)} caractères")
|
|
response_fr = en_to_fr(response)
|
|
logger.info(f"[TRADUCTION] Taille de la réponse traduite en français: {len(response_fr)} caractères")
|
|
else:
|
|
response_fr = response
|
|
logger.info(f"[LANGUE] La réponse est déjà en français, aucune traduction nécessaire")
|
|
|
|
print(f"Analysis completed: {len(response_fr)} characters")
|
|
logger.debug(f"Response received ({len(response_fr)} characters): {response_fr[:500]}...")
|
|
|
|
# Create complete result with metadata
|
|
result = {
|
|
"prompt": prompt,
|
|
"prompt_en": prompt_en if need_translation else None,
|
|
"response": response_fr, # French version for compatibility
|
|
"response_en": response if need_translation else None, # Original English version
|
|
"metadata": {
|
|
"ticket_id": ticket_id,
|
|
"timestamp": self._get_timestamp(),
|
|
"source_agent": self.nom,
|
|
"model_info": {
|
|
"model": getattr(self.llm, "modele", str(type(self.llm))),
|
|
**getattr(self.llm, "params", {})
|
|
},
|
|
"language": "en-fr" if need_translation else "fr", # Indicates the language used
|
|
"is_translated": need_translation
|
|
}
|
|
}
|
|
|
|
# Utiliser sauvegarder_donnees pour centraliser la logique de sauvegarde
|
|
try:
|
|
# Récupérer le nom normalisé du modèle
|
|
model_name = getattr(self.llm, "pipeline_normalized_name", getattr(self.llm, "modele", "unknown"))
|
|
|
|
# Mettre à jour les métadonnées avec le nom du modèle
|
|
result["metadata"]["model_info"]["model"] = model_name
|
|
|
|
# Sauvegarder le rapport via la fonction centralisée uniquement
|
|
sauvegarder_donnees(
|
|
ticket_id=ticket_id,
|
|
step_name="rapport_final",
|
|
data=result,
|
|
base_dir=None,
|
|
is_resultat=True
|
|
)
|
|
print(f"Rapport final généré et sauvegardé pour le ticket {ticket_id}")
|
|
|
|
# Créer un chemin standard pour le report directory (pour compatibilité)
|
|
reports_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "reports"))
|
|
ticket_reports_dir = os.path.join(reports_dir, ticket_id)
|
|
os.makedirs(ticket_reports_dir, exist_ok=True)
|
|
txt_report_path = os.path.join(ticket_reports_dir, f"rapport_final_{ticket_id}.txt")
|
|
with open(txt_report_path, "w", encoding="utf-8") as f:
|
|
f.write(response_fr)
|
|
print(f"Rapport également sauvegardé en texte dans {reports_dir}/{ticket_id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de la sauvegarde des résultats: {str(e)}")
|
|
logger.debug(traceback.format_exc())
|
|
|
|
# Add to history
|
|
self.ajouter_historique("rapport_final", {
|
|
"ticket_id": ticket_id,
|
|
"prompt": prompt,
|
|
"timestamp": self._get_timestamp()
|
|
}, response_fr) # French version for history
|
|
|
|
print(f"Report processing completed for ticket {ticket_id}")
|
|
return response_fr # Return French version for pipeline consistency
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating report: {str(e)}")
|
|
logger.error(traceback.format_exc())
|
|
print(f"CRITICAL ERROR during report generation: {str(e)}")
|
|
return f"ERROR: {str(e)}"
|
|
|
|
def _trouver_repertoire_extraction(self, ticket_id: str) -> Optional[str]:
|
|
"""
|
|
Trouve le répertoire d'extraction le plus récent pour un ticket.
|
|
|
|
Args:
|
|
ticket_id: ID du ticket
|
|
|
|
Returns:
|
|
Chemin du répertoire d'extraction ou None si non trouvé
|
|
"""
|
|
base_dir = "output"
|
|
ticket_dir = os.path.join(base_dir, f"ticket_{ticket_id}")
|
|
|
|
if not os.path.exists(ticket_dir):
|
|
return None
|
|
|
|
# Trouver l'extraction la plus récente
|
|
extractions = []
|
|
for extraction in os.listdir(ticket_dir):
|
|
extraction_path = os.path.join(ticket_dir, extraction)
|
|
if os.path.isdir(extraction_path) and extraction.startswith(ticket_id):
|
|
extractions.append(extraction_path)
|
|
|
|
if not extractions:
|
|
return None
|
|
|
|
# Trier par date (la plus récente d'abord)
|
|
extractions.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
|
return extractions[0]
|
|
|
|
def _generer_prompt(self, rapport_data: Dict[str, Any]) -> str:
|
|
"""
|
|
Generates the prompt for the report generator
|
|
|
|
Args:
|
|
rapport_data: Report data containing ticket analysis and image analyses
|
|
|
|
Returns:
|
|
str: Generated prompt
|
|
"""
|
|
ticket_text = rapport_data.get("ticket_analyse", "")
|
|
image_blocs = []
|
|
analyses_images = rapport_data.get("analyse_images", {})
|
|
|
|
# Add logs to check image data
|
|
logger.info(f"Number of images to analyze: {len(analyses_images)}")
|
|
|
|
for chemin_image, analyse_obj in analyses_images.items():
|
|
# Check if the image is relevant
|
|
is_relevant = analyse_obj.get("sorting", {}).get("is_relevant", False)
|
|
|
|
# Get analysis if it exists
|
|
analyse = ""
|
|
if "analysis" in analyse_obj and analyse_obj["analysis"]:
|
|
# Check if there is an English or French version of the analysis
|
|
if "analyse_en" in analyse_obj["analysis"]:
|
|
# Use directly the English version if the model is LlamaVision
|
|
model_name = getattr(self.llm, "modele", "").lower()
|
|
if "llama" in model_name or "vision" in model_name:
|
|
analyse = analyse_obj["analysis"].get("analyse_en", "")
|
|
else:
|
|
analyse = analyse_obj["analysis"].get("analyse", "")
|
|
else:
|
|
# Use standard analysis if no language versions
|
|
analyse = analyse_obj["analysis"].get("analyse", "")
|
|
|
|
if analyse:
|
|
image_blocs.append(f"--- IMAGE : {os.path.basename(chemin_image)} ---\n{analyse}\n")
|
|
logger.info(f"Adding analysis of image {os.path.basename(chemin_image)} ({len(analyse)} characters)")
|
|
else:
|
|
logger.warning(f"Image {os.path.basename(chemin_image)} without analysis")
|
|
|
|
bloc_images = "\n".join(image_blocs)
|
|
|
|
# Log to check data size
|
|
logger.info(f"Size of ticket analysis: {len(ticket_text)} characters")
|
|
logger.info(f"Size of image block: {len(bloc_images)} characters")
|
|
|
|
# Keep the prompt in French as output will be in French or translated from English
|
|
prompt = (
|
|
f"Voici les données d'analyse pour un ticket de support :\n\n"
|
|
f"=== ANALYSE DU TICKET ===\n{ticket_text}\n\n"
|
|
f"=== ANALYSES D'IMAGES ===\n{bloc_images}\n\n"
|
|
f"Génère un rapport croisé en suivant les instructions précédentes, incluant un tableau chronologique des échanges entre CLIENT et SUPPORT. "
|
|
f"Utilise le format suivant pour le tableau :\n"
|
|
f"| ÉMETTEUR | TYPE | DATE | CONTENU | ÉLÉMENTS VISUELS |\n"
|
|
f"| --- | --- | --- | --- | --- |\n"
|
|
f"| CLIENT | question | date | texte de la question | éléments pertinents des images |\n"
|
|
f"| SUPPORT | réponse | date | texte de la réponse | éléments pertinents des images |\n\n"
|
|
f"Ce tableau doit synthétiser les échanges tout en intégrant les données pertinentes des images avec le maximum de contexte technique."
|
|
)
|
|
|
|
return prompt
|
|
|
|
def _get_timestamp(self) -> str:
|
|
"""Returns a timestamp in YYYYMMDD_HHMMSS format"""
|
|
from datetime import datetime
|
|
return datetime.now().strftime("%Y%m%d_%H%M%S")
|