mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 12:36:50 +01:00
404 lines
17 KiB
Python
404 lines
17 KiB
Python
from ..base_agent import BaseAgent
|
|
from typing import Dict, Any, List, Union
|
|
import logging
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
from loaders.ticket_data_loader import TicketDataLoader
|
|
from ..utils.pipeline_logger import sauvegarder_donnees
|
|
from utils.translate_utils import fr_to_en, en_to_fr
|
|
import re
|
|
|
|
logger = logging.getLogger("AgentTicketAnalyser")
|
|
|
|
class AgentTicketAnalyser(BaseAgent):
|
|
def __init__(self, llm):
|
|
super().__init__("AgentTicketAnalyser", llm)
|
|
|
|
self.params = {
|
|
"temperature": 0.1,
|
|
"top_p": 0.5,
|
|
"max_tokens": 4000
|
|
}
|
|
|
|
# Instructions principales
|
|
self.instructions = """
|
|
You will analyze a technical support ticket from the CBAO company.
|
|
|
|
The ticket consists of an initial message from the client and subsequent exchanges between the client and technical support.
|
|
|
|
Follow these guidelines:
|
|
|
|
1. Overview:
|
|
- Summarize the main issue reported in the ticket
|
|
- Identify the product or service concerned
|
|
|
|
2. Detailed Analysis:
|
|
- Analyze each message chronologically
|
|
- Extract key information, error messages, and details about the problem
|
|
- Note any attached images or files mentioned
|
|
|
|
3. Links and Technical Details:
|
|
- Identify ALL links (URLs) mentioned in the ticket
|
|
- Extract technical terminology, specific error codes, or reference numbers
|
|
- If a URL appears in the ticket, always include it in your summary
|
|
- Note system configurations or version information
|
|
|
|
4. Conversation Flow:
|
|
- Identify questions asked by support and client responses
|
|
- Highlight information requests that remain unanswered
|
|
- Note any action items or next steps mentioned
|
|
|
|
5. Resolution:
|
|
- Determine if the issue was resolved
|
|
- Summarize the solution if provided
|
|
- Identify if the ticket was escalated or needed additional input
|
|
|
|
IMPORTANT: Your analysis should:
|
|
- Be factual and objective
|
|
- Extract ALL URLs and links
|
|
- Avoid speculation or technical diagnosis
|
|
- Be structured chronologically
|
|
- Focus on the exchange of information
|
|
- Distinguish clearly between client and support statements
|
|
"""
|
|
|
|
# Prompt système
|
|
self.system_prompt = f"""
|
|
You are an expert in support ticket analysis at CBAO, tasked with extracting and organizing information from BRG-Lab technical support tickets.
|
|
|
|
Your goal is to produce a clear, factual summary of the support interaction that will help the technical team understand:
|
|
- The issue reported
|
|
- Information exchanged
|
|
- Current status
|
|
- Any URLs or technical details that need attention
|
|
|
|
Specifically regarding URLs and technical details:
|
|
- You MUST preserve ALL links (URLs) that appear in the ticket
|
|
- Format each URL on its own line, prefixed with [URL] for visibility
|
|
- Flag any technical parameters, configuration details, or version information
|
|
|
|
{self.instructions}
|
|
|
|
Your output will be used by the technical team to understand the ticket context, so factual accuracy is essential.
|
|
|
|
IMPORTANT: All responses should be in English. Translation to French will be handled separately.
|
|
"""
|
|
|
|
self.ticket_loader = TicketDataLoader()
|
|
self._appliquer_config_locale()
|
|
logger.info("AgentTicketAnalyser initialisé")
|
|
|
|
def _appliquer_config_locale(self) -> None:
|
|
"""
|
|
Configure le LLM avec les paramètres spécifiques à cet agent
|
|
"""
|
|
if hasattr(self.llm, "prompt_system"):
|
|
self.llm.prompt_system = self.system_prompt
|
|
|
|
if hasattr(self.llm, "configurer"):
|
|
self.llm.configurer(**self.params)
|
|
|
|
def _extraire_urls(self, texte: str) -> List[str]:
|
|
"""
|
|
Extrait les URLs d'un texte
|
|
|
|
Args:
|
|
texte: Le texte à analyser
|
|
|
|
Returns:
|
|
Liste des URLs extraites
|
|
"""
|
|
# Motif pour détecter les URLs (plus complet qu'une simple recherche http://)
|
|
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
|
|
|
|
# Chercher dans le texte avec un motif plus large pour capturer le contexte
|
|
url_mentions = re.finditer(r'(?:URL|link|adresse|href|http)[^\n]*?(https?://[^\s\)\]\"\']+)', texte, re.IGNORECASE)
|
|
|
|
# Liste pour stocker les URLs avec leur contexte
|
|
urls = []
|
|
|
|
# Ajouter les URLs extraites avec le motif générique
|
|
for url in re.findall(url_pattern, texte):
|
|
if url not in urls:
|
|
urls.append(url)
|
|
|
|
# Ajouter les URLs extraites du contexte plus large
|
|
for match in url_mentions:
|
|
url = match.group(1)
|
|
if url not in urls:
|
|
urls.append(url)
|
|
|
|
return urls
|
|
|
|
def executer(self, ticket_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Analyse un ticket de support et extrait les informations importantes
|
|
|
|
Args:
|
|
ticket_data: Données du ticket à analyser
|
|
|
|
Returns:
|
|
Dictionnaire contenant les résultats d'analyse
|
|
"""
|
|
# Récupérer et normaliser le nom du modèle dès le début
|
|
model_name = getattr(self.llm, "modele", "llama3-vision-90b-instruct")
|
|
if model_name:
|
|
model_name = model_name.replace(".", "-").replace(":", "-").replace("_", "-")
|
|
|
|
# Récupérer le ticket_id correctement, avec vérification
|
|
ticket_id = ticket_data.get("ticket_id", "")
|
|
if not ticket_id or ticket_id == "UNKNOWN":
|
|
# Tentative d'extraction depuis le chemin du fichier JSON si disponible
|
|
if "file_path" in ticket_data:
|
|
file_path = ticket_data["file_path"]
|
|
parts = file_path.split(os.path.sep)
|
|
for part in parts:
|
|
if part.startswith("T") and len(part) >= 2 and part[1:].isdigit():
|
|
ticket_id = part
|
|
break
|
|
if part.startswith("ticket_T"):
|
|
ticket_id = part.replace("ticket_", "")
|
|
break
|
|
|
|
# Si toujours pas de ticket_id valide, utiliser UNKNOWN
|
|
if not ticket_id:
|
|
ticket_id = "UNKNOWN"
|
|
|
|
ticket_content = ticket_data.get("content", "")
|
|
|
|
print(f" AgentTicketAnalyser: analyse du ticket {ticket_id}")
|
|
|
|
if not ticket_content or len(ticket_content) < 10:
|
|
logger.warning(f"Contenu du ticket {ticket_id} vide ou trop court")
|
|
return {
|
|
"response": "Contenu du ticket insuffisant pour analyse",
|
|
"response_en": "Ticket content insufficient for analysis",
|
|
"error": True,
|
|
"metadata": {
|
|
"timestamp": self._get_timestamp(),
|
|
"source_agent": self.nom,
|
|
"ticket_id": ticket_id,
|
|
"model_info": {
|
|
"model": model_name,
|
|
**self.params
|
|
}
|
|
}
|
|
}
|
|
|
|
try:
|
|
# S'assurer que le contenu est en anglais pour l'analyser avec LlamaVision
|
|
ticket_content_original = ticket_content
|
|
is_translated = False
|
|
|
|
# Vérifier d'abord si content_en existe dans les données du ticket
|
|
if "content_en" in ticket_data:
|
|
logger.info(f"[LANGUE] Utilisation du contenu déjà traduit en anglais pour le ticket {ticket_id}")
|
|
ticket_content_en = ticket_data["content_en"]
|
|
is_translated = True
|
|
# Si le contenu est déjà marqué comme étant en anglais
|
|
elif ticket_data.get("is_english", False):
|
|
logger.info(f"[LANGUE] Le contenu du ticket {ticket_id} est déjà en anglais")
|
|
ticket_content_en = ticket_content
|
|
else:
|
|
# Traduire le contenu en anglais pour l'analyse
|
|
logger.info(f"[TRADUCTION] Nécessité de traduire le contenu du ticket {ticket_id} en anglais")
|
|
logger.info(f"[TRADUCTION] Taille du contenu original à traduire: {len(ticket_content)} caractères")
|
|
ticket_content_en = fr_to_en(ticket_content)
|
|
is_translated = True
|
|
logger.info(f"[TRADUCTION] Traduction terminée, résultat: {len(ticket_content_en)} caractères")
|
|
|
|
# Générer le prompt d'analyse avec le contenu en anglais
|
|
logger.info(f"[LANGUE] Génération du prompt d'analyse en anglais")
|
|
prompt = self._generer_prompt({"ticket_id": ticket_id, "content": ticket_content_en})
|
|
logger.info(f"[LANGUE] Taille du prompt en anglais: {len(prompt)} caractères")
|
|
|
|
# Analyser avec le LLM
|
|
logger.info(f"[LANGUE] Envoi du prompt en anglais au modèle pour analyse du ticket {ticket_id}")
|
|
response_en = self.llm.interroger(prompt)
|
|
logger.info(f"[LANGUE] Réponse reçue du modèle en anglais: {len(response_en)} caractères")
|
|
|
|
# Extraire les URLs de la réponse
|
|
urls = self._extraire_urls(response_en)
|
|
if urls:
|
|
logger.info(f"[ANALYSE] {len(urls)} URLs extraites de l'analyse: {urls}")
|
|
|
|
# Ajouter un marqueur pour indiquer le début et la fin de l'analyse en anglais
|
|
response_en_marked = "<!-- ENGLISH ANALYSIS START -->\n\n" + response_en + "\n\n<!-- ENGLISH ANALYSIS END -->"
|
|
|
|
# Traduire la réponse en français pour la cohérence du pipeline
|
|
logger.info(f"[TRADUCTION] Traduction de la réponse d'analyse du ticket de EN vers FR")
|
|
logger.info(f"[TRADUCTION] Taille de la réponse en anglais: {len(response_en)} caractères")
|
|
response_fr = en_to_fr(response_en)
|
|
logger.info(f"[TRADUCTION] Taille de la réponse traduite en français: {len(response_fr)} caractères")
|
|
|
|
# Ajouter un marqueur pour indiquer le début et la fin de la traduction
|
|
response_fr_marked = "<!-- FRENCH TRANSLATION START -->\n\n" + response_fr + "\n\n<!-- FRENCH TRANSLATION END -->"
|
|
|
|
# Formater la réponse
|
|
result = {
|
|
"prompt": ticket_content_original,
|
|
"prompt_en": ticket_content_en,
|
|
"response": response_fr,
|
|
"response_en": response_en,
|
|
"urls_extracted": urls,
|
|
"metadata": {
|
|
"timestamp": self._get_timestamp(),
|
|
"source_agent": self.nom,
|
|
"ticket_id": ticket_id,
|
|
"model_info": {
|
|
"model": model_name,
|
|
**self.params
|
|
},
|
|
"language": "en-fr",
|
|
"translation_markers": True,
|
|
"is_translated": is_translated
|
|
}
|
|
}
|
|
|
|
logger.info(f"[LANGUES] Résultat d'analyse disponible en deux langues: EN et FR")
|
|
logger.info(f"[LANGUES] Traduction du contenu original: {is_translated}")
|
|
|
|
# Sauvegarder les données avec un chemin explicite
|
|
try:
|
|
# Déterminer le répertoire ticket
|
|
extraction_dir = None
|
|
base_dir = "output"
|
|
ticket_dir = os.path.join(base_dir, f"ticket_{ticket_id}")
|
|
|
|
if os.path.exists(ticket_dir):
|
|
# Trouver l'extraction la plus récente
|
|
extractions = []
|
|
for extraction in os.listdir(ticket_dir):
|
|
extraction_path = os.path.join(ticket_dir, extraction)
|
|
if os.path.isdir(extraction_path) and extraction.startswith(ticket_id):
|
|
extractions.append(extraction_path)
|
|
|
|
if extractions:
|
|
# Trier par date de modification (plus récente en premier)
|
|
extractions.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
|
extraction_dir = extractions[0]
|
|
|
|
if extraction_dir:
|
|
rapport_dir = os.path.join(extraction_dir, f"{ticket_id}_rapports")
|
|
os.makedirs(rapport_dir, exist_ok=True)
|
|
|
|
# Maintenant utiliser sauvegarder_donnees avec base_dir spécifié
|
|
sauvegarder_donnees(
|
|
ticket_id=ticket_id,
|
|
step_name="analyse_ticket",
|
|
data=result,
|
|
base_dir=rapport_dir,
|
|
is_resultat=True
|
|
)
|
|
print(f" Analyse terminée: {len(response_fr)} caractères")
|
|
else:
|
|
logger.error(f"Impossible de déterminer le répertoire d'extraction pour {ticket_id}")
|
|
# Essayer de sauvegarder quand même en laissant pipeline_logger déterminer le répertoire
|
|
sauvegarder_donnees(ticket_id, "analyse_ticket", result, is_resultat=True)
|
|
except Exception as save_err:
|
|
logger.error(f"Erreur lors de la sauvegarde des données: {save_err}")
|
|
print(f" Erreur de sauvegarde: {save_err}")
|
|
|
|
# Ajouter à l'historique
|
|
self.ajouter_historique(
|
|
"analyse_ticket",
|
|
{"ticket_id": ticket_id, "prompt": prompt},
|
|
result
|
|
)
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur lors de l'analyse du ticket {ticket_id}: {str(e)}")
|
|
return {
|
|
"response": f"Erreur lors de l'analyse du ticket: {str(e)}",
|
|
"response_en": f"Error analyzing ticket: {str(e)}",
|
|
"error": True,
|
|
"metadata": {
|
|
"timestamp": self._get_timestamp(),
|
|
"source_agent": self.nom,
|
|
"ticket_id": ticket_id,
|
|
"model_info": {
|
|
"model": model_name,
|
|
**self.params
|
|
},
|
|
"error": True
|
|
}
|
|
}
|
|
|
|
def _generer_prompt(self, ticket_data: Dict[str, Any]) -> str:
|
|
"""
|
|
Génère un prompt pour l'analyse du ticket
|
|
|
|
Args:
|
|
ticket_data: Données du ticket
|
|
|
|
Returns:
|
|
Prompt formaté pour le LLM
|
|
"""
|
|
ticket_id = ticket_data.get("ticket_id", "UNKNOWN")
|
|
content = ticket_data.get("content", "")
|
|
|
|
# Ajout d'instructions spécifiques pour la capture des URLs
|
|
prompt = f"""[ENGLISH RESPONSE REQUESTED]
|
|
|
|
### TICKET {ticket_id}
|
|
|
|
{content}
|
|
|
|
Analyze this support ticket and provide:
|
|
1. A chronological summary of the exchanges
|
|
2. Extraction of all important technical details
|
|
3. Clear identification of ALL URLs mentioned (prefix each with "[URL]")
|
|
4. Analysis of whether the issue was resolved
|
|
|
|
Present your analysis in a clear, concise format that would be helpful for a technical support team.
|
|
Focus on FACTS only, avoid interpretation or diagnosis.
|
|
"""
|
|
return prompt
|
|
|
|
def _formater_date(self, date_str: str) -> str:
|
|
"""
|
|
Reformate une date pour l'uniformisation
|
|
(Cette méthode peut être adaptée selon le format des dates dans les tickets)
|
|
|
|
Args:
|
|
date_str: Chaîne de date à formater
|
|
|
|
Returns:
|
|
Date formatée
|
|
"""
|
|
# Formats possibles:
|
|
# - DD/MM/YYYY HH:MM
|
|
# - YYYY-MM-DD HH:MM:SS
|
|
# On uniformise en YYYY-MM-DD HH:MM
|
|
|
|
try:
|
|
# Adapter cette partie selon les formats de date rencontrés
|
|
if "/" in date_str:
|
|
# Format DD/MM/YYYY
|
|
parts = date_str.split(" ")
|
|
date_parts = parts[0].split("/")
|
|
time_part = parts[1] if len(parts) > 1 else "00:00"
|
|
return f"{date_parts[2]}-{date_parts[1]}-{date_parts[0]} {time_part}"
|
|
else:
|
|
# Format YYYY-MM-DD
|
|
if " " in date_str:
|
|
date_part, time_part = date_str.split(" ", 1)
|
|
time_part = time_part.split(".", 1)[0] # Enlever les millisecondes
|
|
return f"{date_part} {time_part}"
|
|
return date_str
|
|
except Exception:
|
|
# En cas d'erreur, retourner la date d'origine
|
|
return date_str
|
|
|
|
def _get_timestamp(self) -> str:
|
|
"""
|
|
Génère un timestamp au format YYYYMMDD_HHMMSS
|
|
|
|
Returns:
|
|
Timestamp formaté
|
|
"""
|
|
return datetime.now().strftime("%Y%m%d_%H%M%S")
|