diff --git a/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md b/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md
index 9303f37..55c6ca1 100644
--- a/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md
+++ b/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md
@@ -6444,6 +6444,8410 @@ _**Assistant**_
Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/ticket_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/attachment_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+```diff
+ import json
++ import re
+ from typing import Any
++ from html import unescape
++ from bs4 import BeautifulSoup
+
+ def save_json(data: Any, path: str):
+ with open(path, "w", encoding="utf-8") as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+
++ def is_important_image(tag, message_text: str) -> bool:
++ """
++ Détermine si une image est importante ou s'il s'agit d'un logo/signature.
++
++ Args:
++ tag: La balise d'image à analyser
++ message_text: Le texte complet du message pour contexte
++
++ Returns:
++ True si l'image semble importante, False sinon
++ """
++ # Vérifier les attributs de l'image
++ src = tag.get('src', '')
++ alt = tag.get('alt', '')
++ title = tag.get('title', '')
++ css_class = tag.get('class', '')
++
++ # Patterns pour les images inutiles
++ useless_img_patterns = [
++ 'logo', 'signature', 'outlook', 'footer', 'header', 'icon',
++ 'emoticon', 'emoji', 'cid:', 'pixel', 'spacer', 'vignette',
++ 'banner', 'separator', 'decoration', 'mail_signature'
++ ]
++
++ # Vérifier si c'est une image inutile
++ for pattern in useless_img_patterns:
++ if (pattern in src.lower() or
++ pattern in alt.lower() or
++ pattern in title.lower() or
++ (css_class and any(pattern in c.lower() for c in css_class if isinstance(c, str)))):
++ return False
++
++ # Vérifier la taille
++ width = tag.get('width', '')
++ height = tag.get('height', '')
++ try:
++ width = int(width) if width and str(width).isdigit() else None
++ height = int(height) if height and str(height).isdigit() else None
++ if width and height and width <= 50 and height <= 50:
++ return False
++ except (ValueError, TypeError):
++ pass
++
++ # Vérifier si l'image est mentionnée dans le texte
++ image_indicators = [
++ 'capture', 'screenshot', 'image', 'photo', 'illustration',
++ 'voir', 'regarder', 'ci-joint', 'écran', 'erreur', 'problème',
++ 'bug', 'pièce jointe', 'attachment', 'veuillez trouver'
++ ]
++
++ for indicator in image_indicators:
++ if indicator in message_text.lower():
++ return True
++
++ return True
++
+ def clean_html(html_content: str) -> str:
+- import re
+- from html import unescape
+- from bs4 import BeautifulSoup
+-
+- if BeautifulSoup:
+- soup = BeautifulSoup(html_content, "html.parser")
+- text = soup.get_text()
+- else:
++ """
++ Nettoie le contenu HTML en supprimant toutes les balises mais en préservant le texte important.
++ Traite spécifiquement les images pour garder uniquement celles pertinentes.
++
++ Args:
++ html_content: Contenu HTML à nettoyer
++
++ Returns:
++ Texte nettoyé sans balises HTML
++ """
++ if not html_content:
++ return ""
++
++ try:
++ # Utiliser BeautifulSoup pour manipuler le HTML
++ soup = BeautifulSoup(html_content, 'html.parser')
++
++ # Supprimer les éléments de signature
++ signature_elements = [
++ 'div.signature', '.gmail_signature', '.signature',
++ 'hr + div', 'hr + p', '.footer', '.mail-signature'
++ ]
++
++ for selector in signature_elements:
++ for element in soup.select(selector):
++ element.decompose()
++
++ # Supprimer les scripts et les styles
++ for tag in soup(['script', 'style']):
++ tag.decompose()
++
++ # Récupérer le texte complet pour analyse
++ full_text = soup.get_text(' ', strip=True)
++
++ # Traiter les images
++ for img in soup.find_all('img'):
++ if is_important_image(img, full_text):
++ alt_text = img.get('alt', '') or img.get('title', '') or '[Image importante]'
++ img.replace_with(f" [Image: {alt_text}] ")
++ else:
++ img.decompose()
++
++ # Traiter les liens vers des pièces jointes
++ for a in soup.find_all('a', href=True):
++ href = a.get('href', '').lower()
++ if 'attachment' in href or 'download' in href or 'file' in href:
++ a.replace_with(f" [Pièce jointe: {a.get_text()}] ")
++
++ # Supprimer les lignes horizontales qui séparent souvent les signatures
++ for hr in soup.find_all('hr'):
++ hr.decompose()
++
++ # Récupérer le texte sans balises HTML
++ text = soup.get_text(separator=' ', strip=True)
++
++ # Décodage des entités HTML
++ text = unescape(text)
++
++ # Nettoyer les espaces multiples
++ text = re.sub(r'\s+', ' ', text)
++
++ # Nettoyer les lignes vides multiples
++ text = re.sub(r'\n\s*\n', '\n\n', text)
++
++ # Supprimer les disclaimers et signatures standards
++ footer_patterns = [
++ r'Sent from my .*',
++ r'Envoyé depuis mon .*',
++ r'Ce message .*confidentiel.*',
++ r'This email .*confidential.*',
++ r'DISCLAIMER.*',
++ r'CONFIDENTIAL.*',
++ r'CONFIDENTIEL.*',
++ r'Le contenu de ce courriel est confidentiel.*',
++ r'This message and any attachments.*',
++ r'Ce message et ses pièces jointes.*',
++ r'AVIS DE CONFIDENTIALITÉ.*',
++ r'PRIVACY NOTICE.*',
++ r'Droit à la déconnexion.*',
++ r'tél :.*',
++ r'mobile :.*',
++ r'email :.*',
++ r'Cordialement,.*',
++ r'Bonne réception.*',
++ r'---.*'
++ ]
++
++ for pattern in footer_patterns:
++ text = re.sub(pattern, '', text, flags=re.IGNORECASE | re.DOTALL)
++
++ return text.strip()
++ except Exception as e:
++ print(f"Erreur lors du nettoyage HTML: {e}")
++ # Fallback au nettoyage simple
+ text = re.sub(r'<.*?>', '', html_content)
+-
+- text = unescape(text)
+- text = re.sub(r'\s+', ' ', text).strip()
+-
+- return text
++ text = unescape(text)
++ return re.sub(r'\s+', ' ', text).strip()
+
+```
+
+---
+
+```diff
+ from typing import List, Dict, Any
+ from .auth_manager import AuthManager
+- from .utils import clean_html
++ from .utils import clean_html, save_json
++ import os
++ import re
+
+ class MessageManager:
+ def __init__(self, auth: AuthManager):
+ self.auth = auth
++ self.model_name = "project.task"
+
+ def get_ticket_messages(self, ticket_id: int) -> List[Dict[str, Any]]:
++ """
++ Récupère tous les messages associés à un ticket.
++
++ Args:
++ ticket_id: ID du ticket
++
++ Returns:
++ Liste des messages associés au ticket
++ """
+ params = {
+ "model": "mail.message",
+ "method": "search_read",
+- "args": [[[ "res_id", "=", ticket_id], ["model", "=", "project.task"]]],
+- "kwargs": {"fields": ["id", "body", "author_id", "date"]}
+- }
+- return self.auth._rpc_call("/web/dataset/call_kw", params)
++ "args": [[["res_id", "=", ticket_id], ["model", "=", self.model_name]]],
++ "kwargs": {
++ "fields": ["id", "body", "date", "author_id", "email_from", "message_type",
++ "parent_id", "subtype_id", "subject", "tracking_value_ids", "attachment_ids"],
++ "order": "date asc"
++ }
++ }
++ messages = self.auth._rpc_call("/web/dataset/call_kw", params)
++ return messages if isinstance(messages, list) else []
++
++ def is_system_message(self, message: Dict[str, Any]) -> bool:
++ """
++ Vérifie si le message est un message système ou OdooBot.
++
++ Args:
++ message: Le message à vérifier
++
++ Returns:
++ True si c'est un message système, False sinon
++ """
++ is_system = False
++
++ # Vérifier le nom de l'auteur
++ if 'author_id' in message and isinstance(message['author_id'], list) and len(message['author_id']) > 1:
++ author_name = message['author_id'][1].lower()
++ if 'odoobot' in author_name or 'bot' in author_name or 'système' in author_name:
++ is_system = True
++
++ # Vérifier le type de message
++ if message.get('message_type') == 'notification':
++ is_system = True
++
++ # Vérifier le sous-type du message
++ if 'subtype_id' in message and isinstance(message['subtype_id'], list) and len(message['subtype_id']) > 1:
++ subtype = message['subtype_id'][1].lower()
++ if 'notification' in subtype or 'system' in subtype:
++ is_system = True
++
++ return is_system
++
++ def is_stage_change_message(self, message: Dict[str, Any]) -> bool:
++ """
++ Vérifie si le message est un changement d'état.
++
++ Args:
++ message: Le message à vérifier
++
++ Returns:
++ True si c'est un message de changement d'état, False sinon
++ """
++ if not isinstance(message.get('body', ''), str):
++ return False
++
++ body = message.get('body', '').lower()
++
++ # Patterns pour les changements d'état
++ stage_patterns = [
++ 'étape changée', 'stage changed', 'modifié l\'étape',
++ 'changed the stage', 'ticket transféré', 'ticket transferred',
++ 'statut modifié', 'status changed', 'état du ticket'
++ ]
++
++ return any(pattern in body for pattern in stage_patterns)
++
++ def is_forwarded_message(self, message: Dict[str, Any]) -> bool:
++ """
++ Détecte si un message est un message transféré.
++
++ Args:
++ message: Le message à analyser
++
++ Returns:
++ True si le message est transféré, False sinon
++ """
++ if not message.get('body'):
++ return False
++
++ # Indicateurs de message transféré
++ forwarded_indicators = [
++ "message transféré", "forwarded message",
++ "transféré de", "forwarded from",
++ "début du message transféré", "begin forwarded message",
++ "message d'origine", "original message",
++ "from:", "de:", "to:", "à:", "subject:", "objet:",
++ "envoyé:", "sent:", "date:", "cc:"
++ ]
++
++ # Vérifier le contenu du message
++ body_lower = message.get('body', '').lower() if isinstance(message.get('body', ''), str) else ""
++
++ # Vérifier la présence d'indicateurs de transfert
++ for indicator in forwarded_indicators:
++ if indicator in body_lower:
++ return True
++
++ # Vérifier si le sujet contient des préfixes courants de transfert
++ subject_value = message.get('subject', '')
++ if not isinstance(subject_value, str):
++ subject_value = str(subject_value) if subject_value is not None else ""
++
++ subject_lower = subject_value.lower()
++ forwarded_prefixes = ["tr:", "fwd:", "fw:"]
++ for prefix in forwarded_prefixes:
++ if subject_lower.startswith(prefix):
++ return True
++
++ return False
++
++ def process_messages(self, ticket_id: int, ticket_code: str, ticket_name: str, output_dir: str) -> Dict[str, Any]:
++ """
++ Traite tous les messages d'un ticket, nettoie le contenu et génère des fichiers structurés.
++
++ Args:
++ ticket_id: ID du ticket
++ ticket_code: Code du ticket
++ ticket_name: Nom du ticket
++ output_dir: Répertoire de sortie
++
++ Returns:
++ Dictionnaire avec les chemins des fichiers créés
++ """
++ # Récupérer les messages
++ messages = self.get_ticket_messages(ticket_id)
++
++ # Nettoyer et structurer les messages
++ processed_messages = []
++
++ for message in messages:
++ # Ne pas inclure les messages système sans intérêt
++ if self.is_system_message(message) and not self.is_stage_change_message(message):
++ continue
++
++ # Créer une copie du message pour éviter de modifier l'original
++ message_copy = message.copy()
++
++ # Nettoyer le corps du message
++ if message_copy.get('body'):
++ if self.is_forwarded_message(message_copy):
++ message_copy['is_forwarded'] = True
++ # Traiter différemment les messages transférés
++ # Pour l'instant on utilise le même traitement mais à l'avenir on peut le spécialiser
++
++ message_copy['body_original'] = message_copy.get('body', '')
++ message_copy['body'] = clean_html(message_copy.get('body', ''))
++
++ # Vérifier si c'est un changement d'état
++ if self.is_stage_change_message(message_copy):
++ message_copy['is_stage_change'] = True
++
++ # Récupérer les détails de l'auteur
++ if message_copy.get('author_id') and isinstance(message_copy.get('author_id'), list) and len(message_copy.get('author_id', [])) > 0:
++ author_id = message_copy.get('author_id')[0]
++ params = {
++ "model": "res.partner",
++ "method": "read",
++ "args": [[author_id]],
++ "kwargs": {"fields": ['name', 'email', 'phone', 'function', 'company_id']}
++ }
++ author_details = self.auth._rpc_call("/web/dataset/call_kw", params)
++ if author_details and isinstance(author_details, list) and len(author_details) > 0:
++ message_copy['author_details'] = author_details[0]
++
++ processed_messages.append(message_copy)
++
++ # Trier les messages par date
++ processed_messages.sort(key=lambda x: x.get('date', ''))
++
++ # Créer la structure pour le JSON
++ messages_with_summary = {
++ "ticket_summary": {
++ "id": ticket_id,
++ "code": ticket_code,
++ "name": ticket_name
++ },
++ "messages": processed_messages
++ }
++
++ # Sauvegarder les messages en JSON
++ all_messages_path = os.path.join(output_dir, "all_messages.json")
++ save_json(messages_with_summary, all_messages_path)
++
++ # Sauvegarder également les messages bruts
++ raw_messages_path = os.path.join(output_dir, "messages_raw.json")
++ save_json({"ticket_id": ticket_id, "messages": messages}, raw_messages_path)
++
++ # Créer un fichier texte pour une lecture plus facile
++ try:
++ with open(os.path.join(output_dir, "all_messages.txt"), 'w', encoding='utf-8') as f:
++ # Informations sur le ticket
++ f.write(f"TICKET: {ticket_code} - {ticket_name}\n\n")
++
++ # Parcourir les messages filtrés
++ for msg in processed_messages:
++ author = msg.get('author_details', {}).get('name', msg.get('email_from', 'Inconnu'))
++ date = msg.get('date', '')
++ subject = msg.get('subject', 'Sans objet')
++ body = msg.get('body', '')
++
++ # Formater différemment les messages spéciaux
++ if msg.get('is_stage_change'):
++ f.write("*"*80 + "\n")
++ f.write("*** CHANGEMENT D'ÉTAT ***\n")
++ f.write("*"*80 + "\n\n")
++ elif msg.get('is_forwarded'):
++ f.write("*"*80 + "\n")
++ f.write("*** MESSAGE TRANSFÉRÉ ***\n")
++ f.write("*"*80 + "\n\n")
++
++ # En-tête du message
++ f.write(f"DATE: {date}\n")
++ f.write(f"DE: {author}\n")
++ if subject:
++ f.write(f"OBJET: {subject}\n")
++ f.write("\n")
++ f.write(f"{body}\n\n")
++ f.write("-"*80 + "\n\n")
++ except Exception as e:
++ print(f"Erreur lors de la création du fichier texte: {e}")
++
++ return {
++ "all_messages_path": all_messages_path,
++ "raw_messages_path": raw_messages_path,
++ "messages_count": len(processed_messages)
++ }
+
+```
+
+---
+
+```diff
+ import json
+ from datetime import datetime
+- from typing import Dict, Any
++ from typing import Dict, List, Any, Optional
+ from .auth_manager import AuthManager
+ from .message_manager import MessageManager
+
+ def get_ticket_by_code(self, ticket_code: str) -> Dict[str, Any]:
++ """
++ Récupère un ticket par son code.
++
++ Args:
++ ticket_code: Code du ticket à rechercher
++
++ Returns:
++ Dictionnaire contenant les informations du ticket
++ """
+ params = {
+ "model": self.model_name,
+ "method": "search_read",
+ "args": [[["code", "=", ticket_code]],
+ ["id", "name", "description", "stage_id", "project_id", "partner_id",
+- "user_id", "date_start", "date_end", "create_date", "write_date",
+- "message_ids", "message_follower_ids", "attachment_ids", "timesheet_ids"]],
++ "user_id", "date_start", "date_end", "date_deadline", "create_date", "write_date",
++ "tag_ids", "priority", "email_from", "email_cc", "message_ids",
++ "message_follower_ids", "attachment_ids", "timesheet_ids"]],
+ "kwargs": {"limit": 1}
+ }
+ result = self.auth_manager._rpc_call("/web/dataset/call_kw", params)
+
+ if isinstance(result, list) and len(result) > 0:
+- return result[0]
++ # Résoudre les champs relationnels
++ return self.resolve_relation_fields(result[0])
+ else:
+ print(f"Aucun ticket trouvé avec le code {ticket_code}")
+ return {}
++
++ def resolve_relation_fields(self, ticket: Dict[str, Any]) -> Dict[str, Any]:
++ """
++ Résout les champs relationnels d'un ticket pour obtenir les noms au lieu des IDs.
++
++ Args:
++ ticket: Dictionnaire contenant les données du ticket
++
++ Returns:
++ Ticket avec champs relationnels résolus
++ """
++ relation_fields = {
++ "stage_id": "res.stage",
++ "project_id": "project.project",
++ "partner_id": "res.partner",
++ "user_id": "res.users",
++ "tag_ids": "project.tags"
++ }
++
++ # Traiter les champs many2one
++ for field, model in relation_fields.items():
++ if field in ticket and ticket[field] and field != "tag_ids":
++ if isinstance(ticket[field], list) and len(ticket[field]) >= 2:
++ # Le format est déjà [id, name]
++ ticket[f"{field}_name"] = ticket[field][1]
++ elif isinstance(ticket[field], int):
++ # Récupérer le nom depuis l'API
++ params = {
++ "model": model,
++ "method": "name_get",
++ "args": [[ticket[field]]],
++ "kwargs": {}
++ }
++ result = self.auth_manager._rpc_call("/web/dataset/call_kw", params)
++ if result and isinstance(result, list) and result[0] and len(result[0]) >= 2:
++ ticket[f"{field}_name"] = result[0][1]
++
++ # Traiter les tags (many2many)
++ if "tag_ids" in ticket and ticket["tag_ids"] and isinstance(ticket["tag_ids"], list):
++ if all(isinstance(tag_id, int) for tag_id in ticket["tag_ids"]):
++ params = {
++ "model": "project.tags",
++ "method": "name_get",
++ "args": [ticket["tag_ids"]],
++ "kwargs": {}
++ }
++ result = self.auth_manager._rpc_call("/web/dataset/call_kw", params)
++ if result and isinstance(result, list):
++ ticket["tag_names"] = [tag[1] for tag in result]
++
++ return ticket
+
+ def extract_ticket_data(self, ticket_code: str, output_dir: str):
++ """
++ Extrait toutes les données d'un ticket et les sauvegarde dans une structure organisée.
++
++ Args:
++ ticket_code: Code du ticket à extraire
++ output_dir: Répertoire de sortie
++
++ Returns:
++ Dictionnaire avec les chemins des fichiers créés ou None en cas d'erreur
++ """
+ os.makedirs(output_dir, exist_ok=True)
+
++ # Récupérer les données du ticket
+ ticket_data = self.get_ticket_by_code(ticket_code)
+
+
+ ticket_id = ticket_data["id"]
++ ticket_name = ticket_data.get("name", "Sans nom")
+
+ # Sauvegarder ticket_info.json
+ ticket_info_path = os.path.join(output_dir, "ticket_info.json")
+ save_json(ticket_data, ticket_info_path)
+
+- # Sauvegarde des messages
+- messages_data = self.message_manager.get_ticket_messages(ticket_id)
+- all_messages_path = os.path.join(output_dir, "all_messages.json")
+- save_json(messages_data, all_messages_path)
++ # Sauvegarder le résumé du ticket
++ ticket_summary = {
++ "id": ticket_id,
++ "code": ticket_code,
++ "name": ticket_name,
++ "description": ticket_data.get("description", ""),
++ "stage": ticket_data.get("stage_id_name", ""),
++ "project": ticket_data.get("project_id_name", ""),
++ "partner": ticket_data.get("partner_id_name", ""),
++ "assigned_to": ticket_data.get("user_id_name", ""),
++ "tags": ticket_data.get("tag_names", []),
++ "create_date": ticket_data.get("create_date", ""),
++ "write_date": ticket_data.get("write_date", ""),
++ "deadline": ticket_data.get("date_deadline", "")
++ }
++ summary_path = os.path.join(output_dir, "ticket_summary.json")
++ save_json(ticket_summary, summary_path)
++
++ # Traiter et sauvegarder les messages
++ messages_result = self.message_manager.process_messages(
++ ticket_id,
++ ticket_code,
++ ticket_name,
++ output_dir
++ )
+
+- # Sauvegarde des pièces jointes
+- attachments_data = self.attachment_manager.save_attachments(ticket_id, output_dir)
+- attachments_path = os.path.join(output_dir, "attachments_info.json")
++ # Récupérer et sauvegarder les pièces jointes
++ attachments_info = self.attachment_manager.save_attachments(ticket_id, output_dir)
++ attachments_info_path = os.path.join(output_dir, "attachments_info.json")
++
++ # Récupérer les followers si disponibles
++ follower_ids = ticket_data.get("message_follower_ids", [])
++ followers_path = None
++ if follower_ids:
++ params = {
++ "model": "mail.followers",
++ "method": "read",
++ "args": [follower_ids, ["id", "partner_id", "name", "email"]],
++ "kwargs": {}
++ }
++ followers = self.auth_manager._rpc_call("/web/dataset/call_kw", params)
++ if followers:
++ followers_path = os.path.join(output_dir, "followers.json")
++ save_json(followers, followers_path)
+
+- # Génération de structure.json
++ # Génération de structure.json avec toutes les informations
+ structure = {
+ "date_extraction": datetime.now().isoformat(),
+- "ticket_dir": output_dir,
+- "fichiers_json": [
+- "ticket_info.json",
+- "all_messages.json",
+- "attachments_info.json"
+- ]
++ "ticket_id": ticket_id,
++ "ticket_code": ticket_code,
++ "ticket_name": ticket_name,
++ "output_dir": output_dir,
++ "files": {
++ "ticket_info": "ticket_info.json",
++ "ticket_summary": "ticket_summary.json",
++ "messages": "all_messages.json",
++ "messages_raw": "messages_raw.json",
++ "messages_text": "all_messages.txt",
++ "attachments": "attachments_info.json",
++ "followers": "followers.json" if followers_path else None
++ },
++ "stats": {
++ "messages_count": messages_result.get("messages_count", 0),
++ "attachments_count": len(attachments_info)
++ }
+ }
+ structure_path = os.path.join(output_dir, "structure.json")
+ return {
+ "ticket_info": ticket_info_path,
+- "messages_file": all_messages_path,
++ "ticket_summary": summary_path,
++ "messages_file": messages_result.get("all_messages_path"),
++ "messages_count": messages_result.get("messages_count", 0),
+ "ticket_data_file": structure_path,
+- "attachments": attachments_data
++ "attachments": attachments_info,
++ "attachments_count": len(attachments_info)
+ }
+
+```
+
+---
+
+```diff
+-
++ #!/usr/bin/env python3
++ import os
++ import sys
++ import json
++ import logging
++ import argparse
++ from datetime import datetime
++ from utils.auth_manager import AuthManager
++ from utils.ticket_manager import TicketManager
++ from utils.utils import setup_logging, log_separator
++
++ def parse_arguments():
++ parser = argparse.ArgumentParser(description="Récupère un ticket Odoo par son code et extrait ses données.")
++ parser.add_argument("ticket_code", help="Code du ticket à extraire")
++ parser.add_argument("--output", "-o", help="Répertoire de sortie", default="ticket_structure")
++ parser.add_argument("--config", "-c", help="Fichier de configuration", default="config.json")
++ parser.add_argument("--verbose", "-v", action="store_true", help="Mode verbeux")
++ return parser.parse_args()
++
++ def load_config(config_file):
++ try:
++ with open(config_file, 'r') as f:
++ return json.load(f)
++ except Exception as e:
++ logging.error(f"Erreur lors du chargement du fichier de configuration: {e}")
++ sys.exit(1)
++
++ def main():
++ args = parse_arguments()
++ config = load_config(args.config)
++
++ # Configurer la journalisation
++ log_level = logging.DEBUG if args.verbose else logging.INFO
++ setup_logging(log_level, "retrieve_ticket.log")
++
++ # Créer le répertoire de sortie spécifique au ticket
++ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
++ ticket_dir = os.path.join(args.output, f"{args.ticket_code}_{timestamp}")
++ os.makedirs(ticket_dir, exist_ok=True)
++
++ logging.info(f"Extraction du ticket {args.ticket_code}")
++ log_separator()
++
++ try:
++ # Initialiser les gestionnaires
++ auth_manager = AuthManager(
++ url=config.get("odoo_url"),
++ db=config.get("odoo_db"),
++ username=config.get("odoo_username"),
++ api_key=config.get("odoo_api_key")
++ )
++
++ if not auth_manager.login():
++ logging.error("Échec de l'authentification à Odoo")
++ sys.exit(1)
++
++ # Extraire les données du ticket
++ ticket_manager = TicketManager(auth_manager)
++ result = ticket_manager.extract_ticket_data(args.ticket_code, ticket_dir)
++
++ if not result:
++ logging.error(f"Échec de l'extraction du ticket {args.ticket_code}")
++ sys.exit(1)
++
++ # Afficher le résumé
++ log_separator()
++ logging.info(f"Extraction terminée avec succès")
++ logging.info(f"Ticket: {args.ticket_code}")
++ logging.info(f"Répertoire: {ticket_dir}")
++ logging.info(f"Messages traités: {result.get('messages_count', 0)}")
++ logging.info(f"Pièces jointes: {result.get('attachments_count', 0)}")
++ log_separator()
++
++ # Générer un rapport de fin
++ summary = {
++ "timestamp": timestamp,
++ "ticket_code": args.ticket_code,
++ "output_directory": ticket_dir,
++ "message_count": result.get("messages_count", 0),
++ "attachment_count": result.get("attachments_count", 0),
++ "files_created": [
++ os.path.basename(result.get("ticket_info", "")),
++ os.path.basename(result.get("ticket_summary", "")),
++ os.path.basename(result.get("messages_file", "")),
++ os.path.basename(result.get("ticket_data_file", ""))
++ ]
++ }
++
++ summary_path = os.path.join(ticket_dir, "extraction_summary.json")
++ with open(summary_path, 'w', encoding='utf-8') as f:
++ json.dump(summary, f, indent=2, ensure_ascii=False)
++
++ print(f"\nExtraction du ticket {args.ticket_code} terminée avec succès.")
++ print(f"Les données ont été sauvegardées dans: {ticket_dir}")
++
++ except Exception as e:
++ logging.exception(f"Une erreur est survenue: {e}")
++ sys.exit(1)
++
++ if __name__ == "__main__":
++ main()
+```
+
+---
+
+```diff
++ import os
+ import json
++ import logging
+ import re
+- from typing import Any
++ from typing import Dict, Any, List, Optional, Union
+ from html import unescape
+ from bs4 import BeautifulSoup
++ import html2text
+
+- def save_json(data: Any, path: str):
+- with open(path, "w", encoding="utf-8") as f:
++ def setup_logging(level: int = logging.INFO, log_file: Optional[str] = None) -> None:
++ """
++ Configure la journalisation avec un format spécifique et éventuellement un fichier de logs.
++
++ Args:
++ level: Niveau de journalisation (default: logging.INFO)
++ log_file: Chemin du fichier de log (default: None)
++ """
++ log_format = '%(asctime)s - %(levelname)s - %(message)s'
++ date_format = '%Y-%m-%d %H:%M:%S'
++
++ # Configuration de base
++ logging.basicConfig(
++ level=level,
++ format=log_format,
++ datefmt=date_format
++ )
++
++ # Ajout d'un gestionnaire de fichier si log_file est spécifié
++ if log_file:
++ # S'assurer que le répertoire existe
++ log_dir = os.path.dirname(log_file)
++ if log_dir and not os.path.exists(log_dir):
++ os.makedirs(log_dir, exist_ok=True)
++
++ file_handler = logging.FileHandler(log_file, encoding='utf-8')
++ file_handler.setLevel(level)
++ file_formatter = logging.Formatter(log_format, date_format)
++ file_handler.setFormatter(file_formatter)
++ logging.getLogger().addHandler(file_handler)
++
++ def log_separator(length: int = 60) -> None:
++ """
++ Ajoute une ligne de séparation dans les logs.
++
++ Args:
++ length: Longueur de la ligne (default: 60)
++ """
++ logging.info("-" * length)
++
++ def save_json(data: Any, file_path: str) -> bool:
++ """
++ Sauvegarde des données au format JSON dans un fichier.
++
++ Args:
++ data: Données à sauvegarder
++ file_path: Chemin du fichier
++
++ Returns:
++ True si la sauvegarde a réussi, False sinon
++ """
++ try:
++ # S'assurer que le répertoire existe
++ directory = os.path.dirname(file_path)
++ if directory and not os.path.exists(directory):
++ os.makedirs(directory, exist_ok=True)
++
++ with open(file_path, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
++ return True
++ except Exception as e:
++ logging.error(f"Erreur lors de la sauvegarde du fichier JSON {file_path}: {e}")
++ return False
++
++ def save_text(text: str, file_path: str) -> bool:
++ """
++ Sauvegarde du texte dans un fichier.
++
++ Args:
++ text: Texte à sauvegarder
++ file_path: Chemin du fichier
++
++ Returns:
++ True si la sauvegarde a réussi, False sinon
++ """
++ try:
++ # S'assurer que le répertoire existe
++ directory = os.path.dirname(file_path)
++ if directory and not os.path.exists(directory):
++ os.makedirs(directory, exist_ok=True)
++
++ with open(file_path, 'w', encoding='utf-8') as f:
++ f.write(text)
++ return True
++ except Exception as e:
++ logging.error(f"Erreur lors de la sauvegarde du fichier texte {file_path}: {e}")
++ return False
+
+ def is_important_image(tag, message_text: str) -> bool:
+ return True
+
+- def clean_html(html_content: str) -> str:
++ def clean_html(html_content: str,
++ strategy: str = "html2text",
++ preserve_links: bool = False,
++ preserve_images: bool = False) -> str:
+ """
+- Nettoie le contenu HTML en supprimant toutes les balises mais en préservant le texte important.
+- Traite spécifiquement les images pour garder uniquement celles pertinentes.
++ Nettoie le contenu HTML et le convertit en texte selon la stratégie spécifiée.
+
+ Args:
+ html_content: Contenu HTML à nettoyer
++ strategy: Stratégie de nettoyage ('strip_tags', 'html2text', 'soup') (default: 'html2text')
++ preserve_links: Conserver les liens dans la version texte (default: False)
++ preserve_images: Conserver les références aux images (default: False)
+
+ Returns:
+- Texte nettoyé sans balises HTML
++ Texte nettoyé
+ """
+ if not html_content:
+ return ""
+
+- try:
+- # Utiliser BeautifulSoup pour manipuler le HTML
++ # Remplacer les balises br par des sauts de ligne
++ html_content = re.sub(r'
|
', '\n', html_content)
++
++ if strategy == "strip_tags":
++ # Solution simple: suppression des balises HTML
++ text = re.sub(r'<[^>]+>', '', html_content)
++ # Nettoyer les espaces multiples et les lignes vides multiples
++ text = re.sub(r'\s+', ' ', text)
++ text = re.sub(r'\n\s*\n', '\n\n', text)
++ return text.strip()
++
++ elif strategy == "html2text":
++ # Utiliser html2text pour une meilleure conversion
++ h = html2text.HTML2Text()
++ h.ignore_links = not preserve_links
++ h.ignore_images = not preserve_images
++ h.body_width = 0 # Ne pas limiter la largeur du texte
++ return h.handle(html_content).strip()
++
++ elif strategy == "soup":
++ # Utiliser BeautifulSoup pour un nettoyage plus avancé
+ soup = BeautifulSoup(html_content, 'html.parser')
+
+- # Supprimer les éléments de signature
+- signature_elements = [
+- 'div.signature', '.gmail_signature', '.signature',
+- 'hr + div', 'hr + p', '.footer', '.mail-signature'
+- ]
+-
+- for selector in signature_elements:
+- for element in soup.select(selector):
+- element.decompose()
+-
+- # Supprimer les scripts et les styles
+- for tag in soup(['script', 'style']):
+- tag.decompose()
+-
+- # Récupérer le texte complet pour analyse
+- full_text = soup.get_text(' ', strip=True)
+-
+- # Traiter les images
+- for img in soup.find_all('img'):
+- if is_important_image(img, full_text):
+- alt_text = img.get('alt', '') or img.get('title', '') or '[Image importante]'
+- img.replace_with(f" [Image: {alt_text}] ")
++ # Préserver les liens si demandé
++ if preserve_links:
++ for a_tag in soup.find_all('a', href=True):
++ a_tag.string = f"{a_tag.text} [{a_tag['href']}]"
++
++ # Préserver les images si demandé
++ if preserve_images:
++ for img_tag in soup.find_all('img', src=True):
++ img_tag.string = f"[Image: {img_tag.get('alt', '')} - {img_tag['src']}]"
++
++ # Convertir les listes en texte formaté
++ for ul in soup.find_all('ul'):
++ for li in ul.find_all('li'):
++ li.string = f"• {li.text}"
++
++ for ol in soup.find_all('ol'):
++ for i, li in enumerate(ol.find_all('li')):
++ li.string = f"{i+1}. {li.text}"
++
++ text = soup.get_text()
++ # Nettoyer les espaces et les lignes vides
++ text = re.sub(r'\n\s*\n', '\n\n', text)
++ return text.strip()
++
+ else:
+- img.decompose()
+-
+- # Traiter les liens vers des pièces jointes
+- for a in soup.find_all('a', href=True):
+- href = a.get('href', '').lower()
+- if 'attachment' in href or 'download' in href or 'file' in href:
+- a.replace_with(f" [Pièce jointe: {a.get_text()}] ")
+-
+- # Supprimer les lignes horizontales qui séparent souvent les signatures
+- for hr in soup.find_all('hr'):
+- hr.decompose()
+-
+- # Récupérer le texte sans balises HTML
+- text = soup.get_text(separator=' ', strip=True)
+-
+- # Décodage des entités HTML
+- text = unescape(text)
+-
+- # Nettoyer les espaces multiples
+- text = re.sub(r'\s+', ' ', text)
+-
+- # Nettoyer les lignes vides multiples
+- text = re.sub(r'\n\s*\n', '\n\n', text)
+-
+- # Supprimer les disclaimers et signatures standards
+- footer_patterns = [
+- r'Sent from my .*',
+- r'Envoyé depuis mon .*',
+- r'Ce message .*confidentiel.*',
+- r'This email .*confidential.*',
+- r'DISCLAIMER.*',
+- r'CONFIDENTIAL.*',
+- r'CONFIDENTIEL.*',
+- r'Le contenu de ce courriel est confidentiel.*',
+- r'This message and any attachments.*',
+- r'Ce message et ses pièces jointes.*',
+- r'AVIS DE CONFIDENTIALITÉ.*',
+- r'PRIVACY NOTICE.*',
+- r'Droit à la déconnexion.*',
+- r'tél :.*',
+- r'mobile :.*',
+- r'email :.*',
+- r'Cordialement,.*',
+- r'Bonne réception.*',
+- r'---.*'
+- ]
+-
+- for pattern in footer_patterns:
+- text = re.sub(pattern, '', text, flags=re.IGNORECASE | re.DOTALL)
+-
+- return text.strip()
+- except Exception as e:
+- print(f"Erreur lors du nettoyage HTML: {e}")
+- # Fallback au nettoyage simple
+- text = re.sub(r'<.*?>', '', html_content)
+- text = unescape(text)
+- return re.sub(r'\s+', ' ', text).strip()
++ # Stratégie par défaut
++ logging.warning(f"Stratégie de nettoyage '{strategy}' inconnue, utilisation de 'strip_tags'")
++ return clean_html(html_content, "strip_tags")
++
++ def detect_duplicate_content(messages: List[Dict[str, Any]]) -> List[int]:
++ """
++ Détecte les messages avec un contenu dupliqué et retourne leurs indices.
++
++ Args:
++ messages: Liste de messages à analyser
++
++ Returns:
++ Liste des indices des messages dupliqués
++ """
++ content_map = {}
++ duplicate_indices = []
++
++ for idx, message in enumerate(messages):
++ body = message.get("body", "")
++ if not body:
++ continue
++
++ # Nettoyer le contenu HTML pour la comparaison
++ cleaned_content = clean_html(body, "strip_tags")
++ # Considérer uniquement les messages avec du contenu significatif
++ if len(cleaned_content.strip()) < 10:
++ continue
++
++ # Vérifier si le contenu existe déjà
++ if cleaned_content in content_map:
++ duplicate_indices.append(idx)
++ else:
++ content_map[cleaned_content] = idx
++
++ return duplicate_indices
++
++ def normalize_filename(name: str) -> str:
++ """
++ Normalise un nom de fichier en remplaçant les caractères non autorisés.
++
++ Args:
++ name: Nom à normaliser
++
++ Returns:
++ Nom normalisé
++ """
++ # Remplacer les caractères non autorisés par des underscores
++ sanitized = re.sub(r'[\\/*?:"<>|]', '_', name)
++ # Limiter la longueur du nom à 100 caractères
++ if len(sanitized) > 100:
++ sanitized = sanitized[:97] + "..."
++ return sanitized.strip()
+
+```
+
+---
+
+```diff
++ import json
++ import logging
+ import requests
+- from typing import Dict, Any
++ from typing import Dict, Any, Optional
+
+ class AuthManager:
++ """
++ Gestionnaire d'authentification pour l'API Odoo.
++ Gère la connexion et les appels RPC à l'API Odoo.
++ """
++
+ def __init__(self, url: str, db: str, username: str, api_key: str):
+- self.url = url
++ """
++ Initialise le gestionnaire d'authentification.
++
++ Args:
++ url: URL de l'instance Odoo
++ db: Nom de la base de données Odoo
++ username: Nom d'utilisateur pour la connexion
++ api_key: Clé API ou mot de passe pour l'authentification
++ """
++ self.url = url.rstrip('/')
+ self.db = db
+ self.username = username
+ self.api_key = api_key
+ self.uid = None
+- self.session_id = None
+-
++ self.session = requests.Session()
++ self.session.headers.update({
++ 'Content-Type': 'application/json',
++ 'Accept': 'application/json'
++ })
++ self.max_retries = 3
++ self.timeout = 30 # secondes
++
+ def login(self) -> bool:
+- login_url = f"{self.url}/web/session/authenticate"
+- login_data = {
+- "jsonrpc": "2.0",
+- "params": {
+- "db": self.db,
+- "login": self.username,
+- "password": self.api_key
+- }
+- }
+- response = requests.post(login_url, json=login_data)
+- result = response.json()
+-
+- if result.get("error"):
+- print(f"Erreur de connexion: {result['error']['message']}")
++ """
++ Se connecte à l'API Odoo en utilisant les identifiants fournis.
++
++ Returns:
++ True si l'authentification réussie, False sinon
++ """
++ try:
++ logging.info(f"Tentative de connexion à {self.url} avec l'utilisateur {self.username}")
++ endpoint = '/web/session/authenticate'
++
++ payload = {
++ "jsonrpc": "2.0",
++ "params": {
++ "db": self.db,
++ "login": self.username,
++ "password": self.api_key
++ }
++ }
++
++ response = self.session.post(
++ f"{self.url}{endpoint}",
++ data=json.dumps(payload),
++ timeout=self.timeout
++ )
++ response.raise_for_status()
++
++ result = response.json()
++ if 'error' in result:
++ error = result['error']
++ logging.error(f"Erreur d'authentification: {error.get('message', 'Erreur inconnue')}")
++ return False
++
++ self.uid = result.get('result', {}).get('uid')
++ if not self.uid:
++ logging.error("Erreur: UID non trouvé dans la réponse d'authentification")
++ return False
++
++ logging.info(f"Authentification réussie. UID: {self.uid}")
++ return True
++
++ except requests.RequestException as e:
++ logging.error(f"Erreur de connexion à l'API Odoo: {e}")
++ return False
++ except json.JSONDecodeError as e:
++ logging.error(f"Erreur de décodage JSON: {e}")
++ return False
++ except Exception as e:
++ logging.error(f"Erreur inattendue lors de l'authentification: {e}")
+ return False
+-
+- self.uid = result.get("result", {}).get("uid")
+- self.session_id = response.cookies.get("session_id")
+- return bool(self.uid)
+-
+- def _rpc_call(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
+- full_url = f"{self.url}{endpoint}"
+- headers = {"Content-Type": "application/json"}
+- data = {"jsonrpc": "2.0", "method": "call", "params": params}
+-
+- response = requests.post(full_url, json=data, headers=headers, cookies={"session_id": self.session_id})
+- return response.json().get("result", {})
++
++ def _rpc_call(self, endpoint: str, params: Dict[str, Any], retry_count: int = 0) -> Any:
++ """
++ Effectue un appel RPC à l'API Odoo.
++
++ Args:
++ endpoint: Point de terminaison de l'API
++ params: Paramètres de l'appel
++ retry_count: Nombre de tentatives actuelles (pour les nouvelles tentatives)
++
++ Returns:
++ Résultat de l'appel RPC ou None en cas d'erreur
++ """
++ if not self.uid and endpoint != '/web/session/authenticate':
++ logging.warning("Tentative d'appel RPC sans être authentifié. Reconnexion...")
++ if not self.login():
++ logging.error("Échec de la reconnexion")
++ return None
++
++ try:
++ payload = {
++ "jsonrpc": "2.0",
++ "params": params
++ }
++
++ response = self.session.post(
++ f"{self.url}{endpoint}",
++ data=json.dumps(payload),
++ timeout=self.timeout
++ )
++ response.raise_for_status()
++
++ result = response.json()
++ if 'error' in result:
++ error = result['error']
++ error_msg = error.get('message', 'Erreur inconnue')
++ error_data = error.get('data', {})
++ error_name = error_data.get('name', 'UnknownError')
++ logging.error(f"Erreur RPC: {error_name} - {error_msg}")
++
++ # Gérer les erreurs d'authentification
++ if "session expired" in error_msg or "Access denied" in error_msg:
++ if retry_count < self.max_retries:
++ logging.info("Session expirée, nouvelle tentative d'authentification...")
++ if self.login():
++ return self._rpc_call(endpoint, params, retry_count + 1)
++
++ return None
++
++ return result.get('result')
++
++ except requests.RequestException as e:
++ logging.error(f"Erreur de requête RPC: {e}")
++ if retry_count < self.max_retries:
++ logging.info(f"Nouvelle tentative ({retry_count + 1}/{self.max_retries})...")
++ return self._rpc_call(endpoint, params, retry_count + 1)
++ return None
++ except json.JSONDecodeError as e:
++ logging.error(f"Erreur de décodage JSON dans la réponse RPC: {e}")
++ return None
++ except Exception as e:
++ logging.error(f"Erreur inattendue lors de l'appel RPC: {e}")
++ return None
++
++ def search_read(self, model: str, domain: list, fields: list, **kwargs) -> list:
++ """
++ Effectue une recherche et lecture sur le modèle spécifié.
++
++ Args:
++ model: Nom du modèle Odoo
++ domain: Domaine de recherche (filtres)
++ fields: Liste des champs à récupérer
++ **kwargs: Arguments supplémentaires (limit, offset, etc.)
++
++ Returns:
++ Liste des enregistrements trouvés
++ """
++ params = {
++ "model": model,
++ "method": "search_read",
++ "args": [domain, fields],
++ "kwargs": kwargs
++ }
++
++ return self._rpc_call("/web/dataset/call_kw", params) or []
++
++ def read(self, model: str, ids: list, fields: list) -> list:
++ """
++ Lit les enregistrements spécifiés par leurs IDs.
++
++ Args:
++ model: Nom du modèle Odoo
++ ids: Liste des IDs des enregistrements à lire
++ fields: Liste des champs à récupérer
++
++ Returns:
++ Liste des enregistrements lus
++ """
++ if not ids:
++ return []
++
++ params = {
++ "model": model,
++ "method": "read",
++ "args": [ids, fields],
++ "kwargs": {}
++ }
++
++ return self._rpc_call("/web/dataset/call_kw", params) or []
++
++ def get_fields(self, model: str) -> Dict[str, Any]:
++ """
++ Récupère les informations sur les champs d'un modèle.
++
++ Args:
++ model: Nom du modèle Odoo
++
++ Returns:
++ Dictionnaire avec les informations sur les champs
++ """
++ params = {
++ "model": model,
++ "method": "fields_get",
++ "args": [],
++ "kwargs": {}
++ }
++
++ return self._rpc_call("/web/dataset/call_kw", params) or {}
+
+```
+
+---
+
+
+ Searched codebase "Trouvez le code du message_manager.py pour que je puisse l'améliorer" • **25** results
+
+| File | Lines |
+|------|-------|
+| `llm-ticket3/utils/message_manager.py` | L1-16 |
+| `llm-ticket3/utils/ticket_manager.py` | L33-77 |
+| `llm-ticket3/utils/ticket_manager.py` | L1-33 |
+| `llm-ticket3/retrieve_ticket.py` | L53-102 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4957-5003 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2451-2506 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L97-179 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L3925-3960 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2506-2595 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3535-3596 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L5124-5204 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L11254-11295 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L3408-3533 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2401-2451 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2024-2108 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3857-3896 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2665-2755 |
+| `llm-ticket3/retrieve_ticket.py` | L1-53 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4291-4459 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L3960-4007 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L10729-10785 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4459-4518 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L13114-13160 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L3305-3351 |
+| `llm-ticket3/.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3946-4185 |
+
+
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+```diff
+- from typing import List, Dict, Any
++ from typing import List, Dict, Any, Optional, Tuple
+ from .auth_manager import AuthManager
+- from .utils import clean_html, save_json
++ from .utils import clean_html, save_json, save_text, detect_duplicate_content, normalize_filename
+ import os
+ import re
++ import logging
++ from datetime import datetime
+
+ class MessageManager:
++ """
++ Gestionnaire de messages pour traiter les messages associés aux tickets.
++ """
++
+ def __init__(self, auth: AuthManager):
++ """
++ Initialise le gestionnaire de messages.
++
++ Args:
++ auth: Gestionnaire d'authentification
++ """
+ self.auth = auth
+ self.model_name = "project.task"
+-
+- def get_ticket_messages(self, ticket_id: int) -> List[Dict[str, Any]]:
++ self.cleaning_strategies = {
++ "simple": {"preserve_links": False, "preserve_images": False, "strategy": "strip_tags"},
++ "standard": {"preserve_links": True, "preserve_images": True, "strategy": "html2text"},
++ "advanced": {"preserve_links": True, "preserve_images": True, "strategy": "soup"},
++ "raw": {"preserve_links": False, "preserve_images": False, "strategy": "none"}
++ }
++ self.default_strategy = "standard"
++
++ def get_ticket_messages(self, ticket_id: int, fields: Optional[List[str]] = None) -> List[Dict[str, Any]]:
+ """
+ Récupère tous les messages associés à un ticket.
+
+ Args:
+ ticket_id: ID du ticket
++ fields: Liste des champs à récupérer (facultatif)
+
+ Returns:
+ Liste des messages associés au ticket
+ """
++ if fields is None:
++ fields = ["id", "body", "date", "author_id", "email_from", "message_type",
++ "parent_id", "subtype_id", "subject", "tracking_value_ids", "attachment_ids"]
++
+ params = {
+ "model": "mail.message",
+ "method": "search_read",
+ "args": [[["res_id", "=", ticket_id], ["model", "=", self.model_name]]],
+ "kwargs": {
+- "fields": ["id", "body", "date", "author_id", "email_from", "message_type",
+- "parent_id", "subtype_id", "subject", "tracking_value_ids", "attachment_ids"],
++ "fields": fields,
+ "order": "date asc"
+ }
+ }
++
+ messages = self.auth._rpc_call("/web/dataset/call_kw", params)
+ return messages if isinstance(messages, list) else []
+ if 'author_id' in message and isinstance(message['author_id'], list) and len(message['author_id']) > 1:
+ author_name = message['author_id'][1].lower()
+- if 'odoobot' in author_name or 'bot' in author_name or 'système' in author_name:
++ if 'odoobot' in author_name or 'bot' in author_name or 'système' in author_name or 'system' in author_name:
+ is_system = True
+
+ # Vérifier le type de message
+- if message.get('message_type') == 'notification':
++ if message.get('message_type') in ['notification', 'auto_comment']:
+ is_system = True
+
+ # Vérifier le sous-type du message
+ if 'subtype_id' in message and isinstance(message['subtype_id'], list) and len(message['subtype_id']) > 1:
+ subtype = message['subtype_id'][1].lower()
+- if 'notification' in subtype or 'system' in subtype:
++ if 'notification' in subtype or 'system' in subtype or 'note' in subtype:
+ is_system = True
+
+ 'statut modifié', 'status changed', 'état du ticket'
+ ]
++
++ # Vérifier aussi les valeurs de tracking si disponibles
++ if message.get('tracking_value_ids'):
++ try:
++ tracking_values = self.auth.read("mail.tracking.value", message.get('tracking_value_ids', []),
++ ["field", "field_desc", "old_value_char", "new_value_char"])
++ for value in tracking_values:
++ if value.get("field") == "stage_id" or "stage" in value.get("field_desc", "").lower():
++ return True
++ except Exception as e:
++ logging.warning(f"Erreur lors de la vérification des valeurs de tracking: {e}")
+
+ return any(pattern in body for pattern in stage_patterns)
+ if subject_lower.startswith(prefix):
+ return True
++
++ # Patterns typiques dans les messages transférés
++ patterns = [
++ r"-{3,}Original Message-{3,}",
++ r"_{3,}Original Message_{3,}",
++ r">{3,}", # Plusieurs signes > consécutifs indiquent souvent un message cité
++ r"Le .* a écrit :"
++ ]
++ for pattern in patterns:
++ if re.search(pattern, body_lower):
++ return True
+
+ return False
+
+- def process_messages(self, ticket_id: int, ticket_code: str, ticket_name: str, output_dir: str) -> Dict[str, Any]:
++ def get_message_author_details(self, message: Dict[str, Any]) -> Dict[str, Any]:
++ """
++ Récupère les détails de l'auteur d'un message.
++
++ Args:
++ message: Le message dont il faut récupérer l'auteur
++
++ Returns:
++ Dictionnaire avec les détails de l'auteur
++ """
++ author_details = {
++ "name": "Inconnu",
++ "email": message.get('email_from', ''),
++ "is_system": False
++ }
++
++ try:
++ if message.get('author_id') and isinstance(message.get('author_id'), list) and len(message.get('author_id', [])) > 0:
++ author_id = message.get('author_id')[0]
++ params = {
++ "model": "res.partner",
++ "method": "read",
++ "args": [[author_id]],
++ "kwargs": {"fields": ['name', 'email', 'phone', 'function', 'company_id']}
++ }
++ author_data = self.auth._rpc_call("/web/dataset/call_kw", params)
++ if author_data and isinstance(author_data, list) and len(author_data) > 0:
++ author_details.update(author_data[0])
++
++ # Vérifier si c'est un auteur système
++ if author_details.get('name'):
++ author_name = author_details['name'].lower()
++ if 'odoobot' in author_name or 'bot' in author_name or 'système' in author_name:
++ author_details['is_system'] = True
++ except Exception as e:
++ logging.warning(f"Erreur lors de la récupération des détails de l'auteur: {e}")
++
++ return author_details
++
++ def process_messages(self, ticket_id: int, ticket_code: str, ticket_name: str, output_dir: str,
++ strategy: str = "standard") -> Dict[str, Any]:
+ """
+ Traite tous les messages d'un ticket, nettoie le contenu et génère des fichiers structurés.
+ ticket_name: Nom du ticket
+ output_dir: Répertoire de sortie
++ strategy: Stratégie de nettoyage (simple, standard, advanced, raw)
+
+ Returns:
+ Dictionnaire avec les chemins des fichiers créés
+ """
++ # Validation de la stratégie
++ if strategy not in self.cleaning_strategies:
++ logging.warning(f"Stratégie de nettoyage '{strategy}' inconnue, utilisation de la stratégie par défaut '{self.default_strategy}'")
++ strategy = self.default_strategy
++
++ cleaning_config = self.cleaning_strategies[strategy]
++
+ # Récupérer les messages
+ messages = self.get_ticket_messages(ticket_id)
++
++ # Détecter les messages dupliqués
++ duplicate_indices = detect_duplicate_content(messages)
+
+ # Nettoyer et structurer les messages
+ processed_messages = []
+
+- for message in messages:
+- # Ne pas inclure les messages système sans intérêt
+- if self.is_system_message(message) and not self.is_stage_change_message(message):
+- continue
++ # Créer un dictionnaire de métadonnées pour chaque message
++ message_metadata = {}
++
++ for index, message in enumerate(messages):
++ message_id = message.get('id')
++
++ # Ajouter des métadonnées au message
++ message_metadata[message_id] = {
++ "is_system": self.is_system_message(message),
++ "is_stage_change": self.is_stage_change_message(message),
++ "is_forwarded": self.is_forwarded_message(message),
++ "is_duplicate": index in duplicate_indices
++ }
+
+ # Créer une copie du message pour éviter de modifier l'original
+ message_copy = message.copy()
+
+- # Nettoyer le corps du message
++ # Ajouter les métadonnées au message copié
++ for key, value in message_metadata[message_id].items():
++ message_copy[key] = value
++
++ # Nettoyer le corps du message selon la stratégie choisie
+ if message_copy.get('body'):
+- if self.is_forwarded_message(message_copy):
+- message_copy['is_forwarded'] = True
+- # Traiter différemment les messages transférés
+- # Pour l'instant on utilise le même traitement mais à l'avenir on peut le spécialiser
+-
++ # Toujours conserver l'original
+ message_copy['body_original'] = message_copy.get('body', '')
+- message_copy['body'] = clean_html(message_copy.get('body', ''))
+-
+- # Vérifier si c'est un changement d'état
+- if self.is_stage_change_message(message_copy):
+- message_copy['is_stage_change'] = True
++
++ # Appliquer la stratégie de nettoyage, sauf si raw
++ if strategy != "raw":
++ message_copy['body'] = clean_html(
++ message_copy.get('body', ''),
++ strategy=cleaning_config['strategy'],
++ preserve_links=cleaning_config['preserve_links'],
++ preserve_images=cleaning_config['preserve_images']
++ )
+
+ # Récupérer les détails de l'auteur
+- if message_copy.get('author_id') and isinstance(message_copy.get('author_id'), list) and len(message_copy.get('author_id', [])) > 0:
+- author_id = message_copy.get('author_id')[0]
+- params = {
+- "model": "res.partner",
+- "method": "read",
+- "args": [[author_id]],
+- "kwargs": {"fields": ['name', 'email', 'phone', 'function', 'company_id']}
+- }
+- author_details = self.auth._rpc_call("/web/dataset/call_kw", params)
+- if author_details and isinstance(author_details, list) and len(author_details) > 0:
+- message_copy['author_details'] = author_details[0]
++ message_copy['author_details'] = self.get_message_author_details(message_copy)
++
++ # Ne pas inclure les messages système sans intérêt
++ if message_copy.get('is_system') and not message_copy.get('is_stage_change'):
++ # Enregistrer l'exclusion dans les métadonnées
++ message_metadata[message_id]['excluded'] = "system_message"
++ continue
++
++ # Ignorer les messages dupliqués si demandé
++ if message_copy.get('is_duplicate'):
++ # Enregistrer l'exclusion dans les métadonnées
++ message_metadata[message_id]['excluded'] = "duplicate_content"
++ continue
+
+ processed_messages.append(message_copy)
+ "id": ticket_id,
+ "code": ticket_code,
+- "name": ticket_name
++ "name": ticket_name,
++ "date_extraction": datetime.now().isoformat()
++ },
++ "metadata": {
++ "message_count": {
++ "total": len(messages),
++ "processed": len(processed_messages),
++ "excluded": len(messages) - len(processed_messages)
++ },
++ "cleaning_strategy": strategy,
++ "cleaning_config": cleaning_config
+ },
+ "messages": processed_messages
+ # Sauvegarder également les messages bruts
+ raw_messages_path = os.path.join(output_dir, "messages_raw.json")
+- save_json({"ticket_id": ticket_id, "messages": messages}, raw_messages_path)
++ save_json({
++ "ticket_id": ticket_id,
++ "ticket_code": ticket_code,
++ "message_metadata": message_metadata,
++ "messages": messages
++ }, raw_messages_path)
+
+ # Créer un fichier texte pour une lecture plus facile
+- try:
+- with open(os.path.join(output_dir, "all_messages.txt"), 'w', encoding='utf-8') as f:
++ messages_text_path = os.path.join(output_dir, "all_messages.txt")
++
++ try:
++ text_content = self._generate_messages_text(ticket_code, ticket_name, processed_messages)
++ save_text(text_content, messages_text_path)
++ except Exception as e:
++ logging.error(f"Erreur lors de la création du fichier texte: {e}")
++
++ return {
++ "all_messages_path": all_messages_path,
++ "raw_messages_path": raw_messages_path,
++ "messages_text_path": messages_text_path,
++ "messages_count": len(processed_messages),
++ "total_messages": len(messages)
++ }
++
++ def _generate_messages_text(self, ticket_code: str, ticket_name: str,
++ processed_messages: List[Dict[str, Any]]) -> str:
++ """
++ Génère un fichier texte formaté à partir des messages traités.
++
++ Args:
++ ticket_code: Code du ticket
++ ticket_name: Nom du ticket
++ processed_messages: Liste des messages traités
++
++ Returns:
++ Contenu du fichier texte
++ """
++ content = []
++
+ # Informations sur le ticket
+- f.write(f"TICKET: {ticket_code} - {ticket_name}\n\n")
++ content.append(f"TICKET: {ticket_code} - {ticket_name}")
++ content.append(f"Date d'extraction: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
++ content.append(f"Nombre de messages: {len(processed_messages)}")
++ content.append("\n" + "="*80 + "\n")
+
+ # Parcourir les messages filtrés
+ # Formater différemment les messages spéciaux
+ if msg.get('is_stage_change'):
+- f.write("*"*80 + "\n")
+- f.write("*** CHANGEMENT D'ÉTAT ***\n")
+- f.write("*"*80 + "\n\n")
++ content.append("*"*80)
++ content.append("*** CHANGEMENT D'ÉTAT ***")
++ content.append("*"*80 + "\n")
+ elif msg.get('is_forwarded'):
+- f.write("*"*80 + "\n")
+- f.write("*** MESSAGE TRANSFÉRÉ ***\n")
+- f.write("*"*80 + "\n\n")
++ content.append("*"*80)
++ content.append("*** MESSAGE TRANSFÉRÉ ***")
++ content.append("*"*80 + "\n")
+
+ # En-tête du message
+- f.write(f"DATE: {date}\n")
+- f.write(f"DE: {author}\n")
++ content.append(f"DATE: {date}")
++ content.append(f"DE: {author}")
+ if subject:
+- f.write(f"OBJET: {subject}\n")
+- f.write("\n")
+- f.write(f"{body}\n\n")
+- f.write("-"*80 + "\n\n")
+- except Exception as e:
+- print(f"Erreur lors de la création du fichier texte: {e}")
++ content.append(f"OBJET: {subject}")
++ content.append("")
++ content.append(f"{body}")
++ content.append("\n" + "-"*80 + "\n")
+
+- return {
+- "all_messages_path": all_messages_path,
+- "raw_messages_path": raw_messages_path,
+- "messages_count": len(processed_messages)
+- }
++ return "\n".join(content)
+
+```
+
+---
+
+```diff
+ import os
+ import base64
+- import json
+- from typing import List, Dict, Any
+- from datetime import datetime
++ import logging
++ from typing import List, Dict, Any, Optional
+ from .auth_manager import AuthManager
++ from .utils import save_json, normalize_filename
+
+ class AttachmentManager:
+- def __init__(self, auth_manager: AuthManager, model_name: str = "project.task"):
+- self.auth_manager = auth_manager
+- self.model_name = model_name
++ """
++ Gestionnaire de pièces jointes pour extraire et sauvegarder les fichiers attachés aux tickets.
++ """
++
++ def __init__(self, auth: AuthManager):
++ """
++ Initialise le gestionnaire de pièces jointes.
++
++ Args:
++ auth: Gestionnaire d'authentification
++ """
++ self.auth = auth
++ self.model_name = "project.task"
++ self.excluded_mime_types = [] # Types MIME à exclure si nécessaire
+
+- def fetch_attachments(self, ticket_id: int) -> List[Dict[str, Any]]:
++ def get_ticket_attachments(self, ticket_id: int) -> List[Dict[str, Any]]:
++ """
++ Récupère les pièces jointes associées à un ticket.
++
++ Args:
++ ticket_id: ID du ticket
++
++ Returns:
++ Liste des pièces jointes avec leurs métadonnées
++ """
+ params = {
+ "model": "ir.attachment",
+ "method": "search_read",
+- "args": [[
+- ('res_model', '=', self.model_name),
+- ('res_id', '=', ticket_id)
+- ]],
++ "args": [[["res_id", "=", ticket_id], ["res_model", "=", self.model_name]]],
+ "kwargs": {
+- "fields": ['id', 'name', 'datas', 'mimetype', 'create_date', 'description']
++ "fields": ["id", "name", "mimetype", "file_size", "create_date",
++ "create_uid", "datas", "description", "res_name"]
+ }
+ }
+- attachments = self.auth_manager._rpc_call("/web/dataset/call_kw", params)
++
++ attachments = self.auth._rpc_call("/web/dataset/call_kw", params)
++
++ # Résoudre les informations sur le créateur
++ for attachment in attachments:
++ if "create_uid" in attachment and isinstance(attachment["create_uid"], list) and len(attachment["create_uid"]) >= 2:
++ attachment["creator_name"] = attachment["create_uid"][1]
++ attachment["creator_id"] = attachment["create_uid"][0]
++ elif "create_uid" in attachment and isinstance(attachment["create_uid"], int):
++ # Récupérer le nom du créateur
++ params = {
++ "model": "res.users",
++ "method": "name_get",
++ "args": [[attachment["create_uid"]]],
++ "kwargs": {}
++ }
++ result = self.auth._rpc_call("/web/dataset/call_kw", params)
++ if result and isinstance(result, list) and result[0] and len(result[0]) >= 2:
++ attachment["creator_name"] = result[0][1]
++ attachment["creator_id"] = result[0][0]
++
+ return attachments if isinstance(attachments, list) else []
+
+- def save_attachments(self, ticket_id: int, ticket_dir: str) -> List[Dict[str, Any]]:
++ def download_attachment(self, attachment: Dict[str, Any], output_dir: str) -> Dict[str, Any]:
++ """
++ Télécharge et sauvegarde une pièce jointe dans le répertoire spécifié.
++
++ Args:
++ attachment: Dictionnaire contenant les métadonnées de la pièce jointe
++ output_dir: Répertoire où sauvegarder la pièce jointe
++
++ Returns:
++ Dictionnaire avec les informations sur le fichier sauvegardé
++ """
++ result = {
++ "id": attachment.get("id"),
++ "name": attachment.get("name", "Sans nom"),
++ "mimetype": attachment.get("mimetype", "application/octet-stream"),
++ "file_size": attachment.get("file_size", 0),
++ "create_date": attachment.get("create_date"),
++ "creator": attachment.get("creator_name", "Inconnu"),
++ "status": "error",
++ "file_path": "",
++ "error": ""
++ }
++
++ if not attachment.get("datas"):
++ result["error"] = "Données de pièce jointe manquantes"
++ return result
++
++ try:
++ # Créer le dossier attachments s'il n'existe pas
++ attachments_dir = os.path.join(output_dir, "attachments")
++ os.makedirs(attachments_dir, exist_ok=True)
++
++ # Construire un nom de fichier sécurisé
++ safe_filename = normalize_filename(attachment.get("name", f"attachment_{attachment.get('id')}.bin"))
++ file_path = os.path.join(attachments_dir, safe_filename)
++
++ # Vérifier si un fichier avec le même nom existe déjà
++ if os.path.exists(file_path):
++ base, ext = os.path.splitext(safe_filename)
++ counter = 1
++ while os.path.exists(file_path):
++ new_filename = f"{base}_{counter}{ext}"
++ file_path = os.path.join(attachments_dir, new_filename)
++ counter += 1
++
++ # Décoder et sauvegarder le contenu
++ file_content = base64.b64decode(attachment["datas"])
++ with open(file_path, "wb") as f:
++ f.write(file_content)
++
++ result["status"] = "success"
++ result["file_path"] = file_path
++ return result
++
++ except Exception as e:
++ logging.error(f"Erreur lors du téléchargement de la pièce jointe {attachment.get('name', '')}: {e}")
++ result["error"] = str(e)
++ return result
++
++ def save_attachments(self, ticket_id: int, output_dir: str, download: bool = True) -> List[Dict[str, Any]]:
++ """
++ Récupère et sauvegarde toutes les pièces jointes d'un ticket.
++
++ Args:
++ ticket_id: ID du ticket
++ output_dir: Répertoire de sortie
++ download: Si True, télécharge les pièces jointes, sinon récupère seulement les métadonnées
++
++ Returns:
++ Liste des informations sur les pièces jointes
++ """
+ # Récupérer les pièces jointes
+- attachments = self.fetch_attachments(ticket_id)
++ attachments = self.get_ticket_attachments(ticket_id)
+
+- attachment_dir = os.path.join(ticket_dir, "attachments")
+- os.makedirs(attachment_dir, exist_ok=True)
+-
+- attachment_info_list = []
+-
+- for attachment in attachments:
+- if attachment.get("datas"):
+- # Nettoyage du nom de fichier pour éviter des problèmes
+- clean_name = attachment['name'].replace('/', '_').replace('\\', '_')
+- attachment_name = f"{attachment['id']}_{clean_name}"
+- file_path = os.path.join(attachment_dir, attachment_name)
+-
+- try:
+- with open(file_path, "wb") as f:
+- f.write(base64.b64decode(attachment["datas"]))
+-
+- # Ne pas inclure les données binaires dans le JSON
+- attachment_info = {k: v for k, v in attachment.items() if k != "datas"}
+- attachment_info["file_path"] = file_path
+- attachment_info_list.append(attachment_info)
+-
+- print(f"Pièce jointe sauvegardée: {file_path}")
+- except Exception as e:
+- print(f"Erreur lors de l'enregistrement de l'attachement {attachment['name']}: {e}")
++ if not attachments:
++ logging.info(f"Aucune pièce jointe trouvée pour le ticket {ticket_id}")
++ return []
++
++ logging.info(f"Traitement de {len(attachments)} pièces jointes pour le ticket {ticket_id}")
++
++ # Préparer les résultats
++ attachments_info = []
++
++ # Télécharger chaque pièce jointe
++ for i, attachment in enumerate(attachments):
++ # Ne pas inclure le contenu binaire dans les métadonnées
++ attachment_meta = {key: value for key, value in attachment.items() if key != "datas"}
++
++ if download:
++ # Télécharger et sauvegarder la pièce jointe
++ download_result = self.download_attachment(attachment, output_dir)
++ attachment_meta.update({
++ "download_status": download_result.get("status"),
++ "local_path": download_result.get("file_path", ""),
++ "error": download_result.get("error", "")
++ })
++
++ if download_result.get("status") == "success":
++ logging.info(f"Pièce jointe téléchargée: {attachment_meta.get('name')} ({i+1}/{len(attachments)})")
++ else:
++ logging.warning(f"Échec du téléchargement de la pièce jointe: {attachment_meta.get('name')} - {download_result.get('error')}")
++ else:
++ # Seulement récupérer les métadonnées
++ attachment_meta.update({
++ "download_status": "not_attempted",
++ "local_path": "",
++ "error": ""
++ })
++
++ attachments_info.append(attachment_meta)
+
+- # Sauvegarde des métadonnées dans un fichier JSON
+- attachments_info_path = os.path.join(ticket_dir, "attachments_info.json")
+- with open(attachments_info_path, "w", encoding="utf-8") as f:
+- json.dump(attachment_info_list, f, indent=4, ensure_ascii=False)
++ # Sauvegarder les informations sur les pièces jointes
++ attachments_info_path = os.path.join(output_dir, "attachments_info.json")
++ save_json(attachments_info, attachments_info_path)
+
+- return attachment_info_list
++ return attachments_info
+
+```
+
+---
+
+```bash
+python -m utils.retrieve_ticket
+```
+
+```
+fgras-ca@PC-DEV:~/llm-ticket3$ python -m utils.retrieve_ticket
+Command 'python' not found, did you mean:
+ command 'python3' from deb python3
+ command 'python' from deb python-is-python3
+fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+python3 -m utils.retrieve_ticket
+```
+
+```
+fgras-ca@PC-DEV:~/llm-ticket3$ python3 -m utils.retrieve_ticket
+Traceback (most recent call last):
+ File "", line 189, in _run_module_as_main
+ File "", line 112, in _get_module_details
+ File "/home/fgras-ca/llm-ticket3/utils/__init__.py", line 5, in
+ from .ticket_manager import TicketManager
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/home/fgras-ca/llm-ticket3/utils/ticket_manager.py", line 6, in
+ from .message_manager import MessageManager
+ File "/home/fgras-ca/llm-ticket3/utils/message_manager.py", line 3, in
+ from .utils import clean_html, save_json, save_text, detect_duplicate_content, normalize_filename
+ File "/home/fgras-ca/llm-ticket3/utils/utils.py", line 8, in
+ import html2text
+ModuleNotFoundError: No module named 'html2text'
+fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+pip3 install html2text beautifulsoup4
+```
+
+```
+fgras-ca@PC-DEV:~/llm-ticket3$ pip3 install html2text beautifulsoup4
+error: externally-managed-environment
+
+× This environment is externally managed
+╰─> To install Python packages system-wide, try apt install
+ python3-xyz, where xyz is the package you are trying to
+ install.
+
+ If you wish to install a non-Debian-packaged Python package,
+ create a virtual environment using python3 -m venv path/to/venv.
+ Then use path/to/venv/bin/python and path/to/venv/bin/pip. Make
+ sure you have python3-full installed.
+
+ If you wish to install a non-Debian packaged Python application,
+ it may be easiest to use pipx install xyz, which will manage a
+ virtual environment for you. Make sure you have pipx installed.
+
+ See /usr/share/doc/python3.12/README.venv for more information.
+
+note: If you believe this is a mistake, please contact your Python installation or OS distribution provider.
+ You can override this, at the risk of breaking your Python installation or OS, by passing --break-system-pa
+ckages.
+hint: See PEP 668 for the detailed specification.
+fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+python3 -m venv venv && source venv/bin/activate && pip install html2text beautifulsoup4
+```
+
+```
+1.2 (from beautifulsoup4)
+ Using cached soupsieve-2.6-py3-none-any.whl.metadata (4.6 kB)
+Collecting typing-extensions>=4.0.0 (from beautifulsoup4)
+ Using cached typing_extensions-4.13.0-py3-none-any.whl.metadata (3.0 kB)
+Using cached beautifulsoup4-4.13.3-py3-none-any.whl (186 kB)
+Using cached soupsieve-2.6-py3-none-any.whl (36 kB)
+Using cached typing_extensions-4.13.0-py3-none-any.whl (45 kB)
+Installing collected packages: typing-extensions, soupsieve, html2text, beautifulsoup4
+Successfully installed beautifulsoup4-4.13.3 html2text-2024.2.26 soupsieve-2.6 typing-extensions-4.13.0
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```diff
+-
++
+```
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/requirements.txt
+
+---
+
+```diff
+-
++ requests>=2.25.0
++ beautifulsoup4>=4.9.0
++ html2text>=2020.0.0
+```
+
+---
+
+```bash
+pip install -r requirements.txt
+```
+
+```
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$ pip install -r requirements.txt
+Collecting requests>=2.25.0 (from -r requirements.txt (line 1))
+ Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
+Requirement already satisfied: beautifulsoup4>=4.9.0 in ./venv/lib/python3.12/site-packages (from -r require
+ments.txt (line 2)) (4.13.3)
+Requirement already satisfied: html2text>=2020.0.0 in ./venv/lib/python3.12/site-packages (from -r requireme
+nts.txt (line 3)) (2024.2.26)
+Collecting charset-normalizer<4,>=2 (from requests>=2.25.0->-r requirements.txt (line 1))
+ Using cached charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
+(35 kB)
+Collecting idna<4,>=2.5 (from requests>=2.25.0->-r requirements.txt (line 1))
+ Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
+Collecting urllib3<3,>=1.21.1 (from requests>=2.25.0->-r requirements.txt (line 1))
+ Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
+Collecting certifi>=2017.4.17 (from requests>=2.25.0->-r requirements.txt (line 1))
+ Using cached certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
+Requirement already satisfied: soupsieve>1.2 in ./venv/lib/python3.12/site-packages (from beautifulsoup4>=4.
+9.0->-r requirements.txt (line 2)) (2.6)
+Requirement already satisfied: typing-extensions>=4.0.0 in ./venv/lib/python3.12/site-packages (from beautif
+ulsoup4>=4.9.0->-r requirements.txt (line 2)) (4.13.0)
+Using cached requests-2.32.3-py3-none-any.whl (64 kB)
+Using cached certifi-2025.1.31-py3-none-any.whl (166 kB)
+Using cached charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (145 kB)
+Using cached idna-3.10-py3-none-any.whl (70 kB)
+Using cached urllib3-2.3.0-py3-none-any.whl (128 kB)
+Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests
+Successfully installed certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10 requests-2.32.3 urllib3-2.3.0
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```diff
+ {
+- "odoo": {
+- "url": "https://odoo.cbao.fr",
+- "db": "production_cbao",
+- "username": "fernand@cbao.fr",
+- "api_key": "Lestat66!"
+- },
+- "llm": {
+- "api_key": "your_mistral_api_key"
+- },
+- "output_dir": "output"
+- }
++ "odoo_url": "https://exemple.odoo.com",
++ "odoo_db": "exemple_db",
++ "odoo_username": "utilisateur@exemple.com",
++ "odoo_api_key": "votre_clé_api",
++ "output_dir": "ticket_structure"
++ }
+```
+
+---
+
+```bash
+cp config.json config.template.json && python3 -m utils.retrieve_ticket --help
+```
+
+```
+ bool:
++ def is_important_image(tag: Tag, message_text: str) -> bool:
+ """
+ Détermine si une image est importante ou s'il s'agit d'un logo/signature.
+ elif strategy == "soup":
+ # Utiliser BeautifulSoup pour un nettoyage plus avancé
++ try:
+ soup = BeautifulSoup(html_content, 'html.parser')
+
+ # Préserver les liens si demandé
+ if preserve_links:
+ for a_tag in soup.find_all('a', href=True):
+- a_tag.string = f"{a_tag.text} [{a_tag['href']}]"
++ if isinstance(a_tag, Tag):
++ href = a_tag.get('href', '')
++ new_text = f"{a_tag.get_text()} [{href}]"
++ new_tag = soup.new_string(new_text)
++ a_tag.replace_with(new_tag)
+
+ # Préserver les images si demandé
+ if preserve_images:
+- for img_tag in soup.find_all('img', src=True):
+- img_tag.string = f"[Image: {img_tag.get('alt', '')} - {img_tag['src']}]"
++ for img_tag in soup.find_all('img'):
++ if isinstance(img_tag, Tag):
++ src = img_tag.get('src', '')
++ alt = img_tag.get('alt', '')
++ new_text = f"[Image: {alt} - {src}]"
++ new_tag = soup.new_string(new_text)
++ img_tag.replace_with(new_tag)
+
+ # Convertir les listes en texte formaté
+ for ul in soup.find_all('ul'):
++ if isinstance(ul, Tag):
+ for li in ul.find_all('li'):
+- li.string = f"• {li.text}"
++ if isinstance(li, Tag):
++ li_text = li.get_text()
++ new_text = f"• {li_text}"
++ new_tag = soup.new_string(new_text)
++ li.replace_with(new_tag)
+
+ for ol in soup.find_all('ol'):
++ if isinstance(ol, Tag):
+ for i, li in enumerate(ol.find_all('li')):
+- li.string = f"{i+1}. {li.text}"
++ if isinstance(li, Tag):
++ li_text = li.get_text()
++ new_text = f"{i+1}. {li_text}"
++ new_tag = soup.new_string(new_text)
++ li.replace_with(new_tag)
+
+ text = soup.get_text()
+ # Nettoyer les espaces et les lignes vides
+ text = re.sub(r'\n\s*\n', '\n\n', text)
+ return text.strip()
++ except Exception as e:
++ logging.warning(f"Erreur lors du nettoyage HTML avec BeautifulSoup: {e}")
++ # En cas d'erreur, utiliser une méthode de secours
++ return clean_html(html_content, "strip_tags")
+
+ else:
+```
+
+---
+
+```diff
+
+ try:
+- if message.get('author_id') and isinstance(message.get('author_id'), list) and len(message.get('author_id', [])) > 0:
+- author_id = message.get('author_id')[0]
++ author_id_field = message.get('author_id')
++ if author_id_field and isinstance(author_id_field, list) and len(author_id_field) > 0:
++ author_id = author_id_field[0]
+ params = {
+ "model": "res.partner",
+```
+
+---
+
+```diff
+ """
+ # Vérifier les attributs de l'image
+- src = tag.get('src', '')
+- alt = tag.get('alt', '')
+- title = tag.get('title', '')
+- css_class = tag.get('class', '')
++ src = str(tag.get('src', ''))
++ alt = str(tag.get('alt', ''))
++ title = str(tag.get('title', ''))
+
+ # Patterns pour les images inutiles
+ if (pattern in src.lower() or
+ pattern in alt.lower() or
+- pattern in title.lower() or
+- (css_class and any(pattern in c.lower() for c in css_class if isinstance(c, str)))):
++ pattern in title.lower()):
+ return False
+
+ # Vérifier la taille
+- width = tag.get('width', '')
+- height = tag.get('height', '')
++ width_str = str(tag.get('width', ''))
++ height_str = str(tag.get('height', ''))
++
+ try:
+- width = int(width) if width and str(width).isdigit() else None
+- height = int(height) if height and str(height).isdigit() else None
+- if width and height and width <= 50 and height <= 50:
++ if width_str.isdigit() and height_str.isdigit():
++ width = int(width_str)
++ height = int(height_str)
++ if width <= 50 and height <= 50:
+ return False
+ except (ValueError, TypeError):
+```
+
+---
+
+```bash
+python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt
+```
+
+```
+=2.25.0 in ./venv/lib/python3.12/site-packages (from -r requirements
+.txt (line 1)) (2.32.3)
+Requirement already satisfied: beautifulsoup4>=4.9.0 in ./venv/lib/python3.12/site-packages (from -r require
+ments.txt (line 2)) (4.13.3)
+Requirement already satisfied: html2text>=2020.0.0 in ./venv/lib/python3.12/site-packages (from -r requireme
+nts.txt (line 3)) (2024.2.26)
+Requirement already satisfied: charset-normalizer<4,>=2 in ./venv/lib/python3.12/site-packages (from request
+s>=2.25.0->-r requirements.txt (line 1)) (3.4.1)
+Requirement already satisfied: idna<4,>=2.5 in ./venv/lib/python3.12/site-packages (from requests>=2.25.0->-
+r requirements.txt (line 1)) (3.10)
+Requirement already satisfied: urllib3<3,>=1.21.1 in ./venv/lib/python3.12/site-packages (from requests>=2.2
+5.0->-r requirements.txt (line 1)) (2.3.0)
+Requirement already satisfied: certifi>=2017.4.17 in ./venv/lib/python3.12/site-packages (from requests>=2.2
+5.0->-r requirements.txt (line 1)) (2025.1.31)
+Requirement already satisfied: soupsieve>1.2 in ./venv/lib/python3.12/site-packages (from beautifulsoup4>=4.
+9.0->-r requirements.txt (line 2)) (2.6)
+Requirement already satisfied: typing-extensions>=4.0.0 in ./venv/lib/python3.12/site-packages (from beautif
+ulsoup4>=4.9.0->-r requirements.txt (line 2)) (4.13.0)
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+source venv/bin/activate && python3 -m utils.retrieve_ticket --help
+```
+
+```
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$ source venv/bin/activate && python3 -m utils.retrieve_ticket --help
+usage: retrieve_ticket.py [-h] [--output OUTPUT] [--config CONFIG] [--verbose] ticket_code
+
+Récupère un ticket Odoo par son code et extrait ses données.
+
+positional arguments:
+ ticket_code Code du ticket à extraire
+
+options:
+ -h, --help show this help message and exit
+ --output OUTPUT, -o OUTPUT
+ Répertoire de sortie
+ --config CONFIG, -c CONFIG
+ Fichier de configuration
+ --verbose, -v Mode verbeux
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+python3 -m utils.retrieve_ticket T0167 -v
+```
+
+```
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$ python3 -m utils.retrieve_ticket T0167 -v
+2025-04-03 15:34:18 - INFO - Extraction du ticket T0167
+2025-04-03 15:34:18 - INFO - ------------------------------------------------------------
+2025-04-03 15:34:18 - INFO - Tentative de connexion à https://odoo.cbao.fr avec l'utilisateur fernand@cbao.f
+r
+2025-04-03 15:34:18 - DEBUG - Starting new HTTPS connection (1): odoo.cbao.fr:443
+2025-04-03 15:34:18 - DEBUG - https://odoo.cbao.fr:443 "POST /web/session/authenticate HTTP/1.1" 200 474
+2025-04-03 15:34:18 - INFO - Authentification réussie. UID: 33
+2025-04-03 15:34:18 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 760
+2025-04-03 15:34:18 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 63
+2025-04-03 15:34:18 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:18 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 194
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 312
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 153
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 165
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 145
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 194
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 194
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 194
+2025-04-03 15:34:19 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:20 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:20 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:20 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:20 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 None
+2025-04-03 15:34:20 - INFO - Traitement de 2 pièces jointes pour le ticket 179
+2025-04-03 15:34:20 - INFO - Pièce jointe téléchargée: Problème partie.png (1/2)
+2025-04-03 15:34:20 - INFO - Pièce jointe téléchargée: image001.png (2/2)
+2025-04-03 15:34:20 - DEBUG - https://odoo.cbao.fr:443 "POST /web/dataset/call_kw HTTP/1.1" 200 145
+2025-04-03 15:34:20 - INFO - ------------------------------------------------------------
+2025-04-03 15:34:20 - INFO - Extraction terminée avec succès
+2025-04-03 15:34:20 - INFO - Ticket: T0167
+2025-04-03 15:34:20 - INFO - Répertoire: output/ticket_T0167/T0167_20250403_153418
+2025-04-03 15:34:20 - INFO - Messages traités: 5
+2025-04-03 15:34:20 - INFO - Pièces jointes: 2
+2025-04-03 15:34:20 - INFO - ------------------------------------------------------------
+
+Extraction du ticket T0167 terminée avec succès.
+Les données ont été sauvegardées dans: output/ticket_T0167/T0167_20250403_153418
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+ls -la output/ticket_T0167/T0167_20250403_153418/
+```
+
+```
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$ ls -la output/ticket_T0167/T0167_20250403_153418/
+total 72
+drwxr-xr-x 3 fgras-ca fgras-ca 4096 Apr 3 15:34 .
+drwxr-xr-x 3 fgras-ca fgras-ca 4096 Apr 3 15:34 ..
+-rw-r--r-- 1 fgras-ca fgras-ca 13314 Apr 3 15:34 all_messages.json
+-rw-r--r-- 1 fgras-ca fgras-ca 4160 Apr 3 15:34 all_messages.txt
+drwxr-xr-x 2 fgras-ca fgras-ca 4096 Apr 3 15:34 attachments
+-rw-r--r-- 1 fgras-ca fgras-ca 1043 Apr 3 15:34 attachments_info.json
+-rw-r--r-- 1 fgras-ca fgras-ca 299 Apr 3 15:34 extraction_summary.json
+-rw-r--r-- 1 fgras-ca fgras-ca 132 Apr 3 15:34 followers.json
+-rw-r--r-- 1 fgras-ca fgras-ca 11694 Apr 3 15:34 messages_raw.json
+-rw-r--r-- 1 fgras-ca fgras-ca 604 Apr 3 15:34 structure.json
+-rw-r--r-- 1 fgras-ca fgras-ca 1576 Apr 3 15:34 ticket_info.json
+-rw-r--r-- 1 fgras-ca fgras-ca 931 Apr 3 15:34 ticket_summary.json
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+cat output/ticket_T0167/T0167_20250403_153418/all_messages.txt | head -20
+```
+
+```
+](mailto:Guillaume.LENEVEU@calvados.fr)
+Pour : | Youness BENDEQ [](mailto:youness.bendeq@cbao.fr)
+
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+cat output/ticket_T0167/T0167_20250403_153418/structure.json
+```
+
+```
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$ cat output/ticket_T0167/T0167_20250403_153418/structure.json
+{
+ "date_extraction": "2025-04-03T15:34:20.622327",
+ "ticket_id": 179,
+ "ticket_code": "T0167",
+ "ticket_name": "Pb d'affaire/chantier/partie dans un programme d'essai",
+ "output_dir": "output/ticket_T0167/T0167_20250403_153418",
+ "files": {
+ "ticket_info": "ticket_info.json",
+ "ticket_summary": "ticket_summary.json",
+ "messages": "all_messages.json",
+ "messages_raw": "messages_raw.json",
+ "messages_text": "all_messages.txt",
+ "attachments": "attachments_info.json",
+ "followers": "followers.json"
+ },
+ "stats": {
+ "messages_count": 5,
+ "attachments_count": 2
+ }
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```bash
+ls -la output/ticket_T0167/T0167_20250403_153418/attachments/
+```
+
+```
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$ ls -la output/ticket_T0167/T0167_20250403_153418/attachments/
+total 288
+drwxr-xr-x 2 fgras-ca fgras-ca 4096 Apr 3 15:34 .
+drwxr-xr-x 3 fgras-ca fgras-ca 4096 Apr 3 15:34 ..
+-rw-r--r-- 1 fgras-ca fgras-ca 255246 Apr 3 15:34 'Problème partie.png'
+-rw-r--r-- 1 fgras-ca fgras-ca 25085 Apr 3 15:34 image001.png
+(venv) fgras-ca@PC-DEV:~/llm-ticket3$
+```
+
+---
+
+```diff
+ #!/usr/bin/env python3
++ """
++ Script principal pour récupérer un ticket depuis Odoo et extraire ses données.
++ Ce script fait appel aux utilitaires dans le package utils pour effectuer l'extraction.
++ """
+
+ import os
+ import sys
+ import json
+- import base64
+ import argparse
+ from datetime import datetime
+ from utils.auth_manager import AuthManager
+ from utils.ticket_manager import TicketManager
+- from utils.attachment_manager import AttachmentManager
+- from utils.message_manager import MessageManager
+- from utils.utils import save_json
+-
+- def main():
+- parser = argparse.ArgumentParser(description="Extraction de tickets Odoo")
+- parser.add_argument("ticket_code", help="Code du ticket à extraire (ex: T0167)")
+- parser.add_argument("--config", default="config.json", help="Chemin vers le fichier de configuration")
+- parser.add_argument("--output-dir", help="Répertoire de sortie (par défaut: output/ticket_CODE)")
+- parser.add_argument("--verbose", "-v", action="store_true", help="Afficher plus d'informations")
+- args = parser.parse_args()
++ from utils.utils import setup_logging, log_separator
++
++ def parse_arguments():
++ """Parse les arguments de ligne de commande."""
++ parser = argparse.ArgumentParser(description="Récupère un ticket Odoo par son code et extrait ses données.")
++ parser.add_argument("ticket_code", help="Code du ticket à extraire")
++ parser.add_argument("--output", "-o", help="Répertoire de sortie", default=None)
++ parser.add_argument("--config", "-c", help="Fichier de configuration", default="config.json")
++ parser.add_argument("--verbose", "-v", action="store_true", help="Mode verbeux")
++ return parser.parse_args()
+
+- # Charger la configuration
++ def load_config(config_file):
++ """Charge le fichier de configuration."""
+ try:
+- with open(args.config, "r", encoding="utf-8") as f:
+- config = json.load(f)
+-
+- if args.verbose:
+- print(f"Configuration chargée depuis {args.config}")
++ with open(config_file, 'r') as f:
++ return json.load(f)
+ except Exception as e:
+- print(f"Erreur lors du chargement de la configuration: {e}")
++ print(f"Erreur lors du chargement du fichier de configuration: {e}")
+ sys.exit(1)
++
++ def main():
++ """Fonction principale du script."""
++ args = parse_arguments()
++ config = load_config(args.config)
++
++ # Configurer la journalisation
++ log_level = "DEBUG" if args.verbose else "INFO"
++ setup_logging(log_level)
+
+ # Extraire les informations de connexion
+
+ # Définir le répertoire de sortie
+- output_dir = args.output_dir or os.path.join(config.get("output_dir", "output"), f"ticket_{args.ticket_code}")
+- os.makedirs(output_dir, exist_ok=True)
+-
+- # Authentification Odoo
+- auth = AuthManager(url, db, username, api_key)
+- if not auth.login():
+- print("Échec de connexion à Odoo")
++ output_dir = args.output or os.path.join(config.get("output_dir", "output"), f"ticket_{args.ticket_code}")
++
++ # Créer le répertoire de sortie spécifique au ticket avec horodatage
++ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
++ ticket_dir = os.path.join(output_dir, f"{args.ticket_code}_{timestamp}")
++ os.makedirs(ticket_dir, exist_ok=True)
++
++ print(f"Extraction du ticket {args.ticket_code}...")
++
++ try:
++ # Initialiser l'authentification
++ auth_manager = AuthManager(
++ url=url,
++ db=db,
++ username=username,
++ api_key=api_key
++ )
++
++ if not auth_manager.login():
++ print("Échec de l'authentification à Odoo")
+ sys.exit(1)
+
+- # Initialiser les gestionnaires
+- ticket_manager = TicketManager(auth)
+- message_manager = MessageManager(auth)
+- attachment_manager = AttachmentManager(auth)
+-
+- # Récupérer le ticket
+- ticket = ticket_manager.get_ticket_by_code(args.ticket_code)
+- if not ticket:
+- print(f"Ticket {args.ticket_code} non trouvé")
++ # Extraire les données du ticket
++ ticket_manager = TicketManager(auth_manager)
++ result = ticket_manager.extract_ticket_data(args.ticket_code, ticket_dir)
++
++ if not result:
++ print(f"Échec de l'extraction du ticket {args.ticket_code}")
+ sys.exit(1)
+
+- ticket_id = ticket.get('id')
+-
+- # Sauvegarder ticket_info.json
+- ticket_info_path = os.path.join(output_dir, "ticket_info.json")
+- save_json(ticket, ticket_info_path)
+-
+- if args.verbose:
+- print(f"Ticket {args.ticket_code} trouvé (ID: {ticket_id})")
+- print(f"Extraction des données vers {output_dir}...")
+-
+- # Récupérer et sauvegarder les messages
+- messages = message_manager.get_ticket_messages(ticket_id)
+- all_messages_path = os.path.join(output_dir, "all_messages.json")
+- save_json(messages, all_messages_path)
+-
+- # Récupérer et sauvegarder les pièces jointes
+- attachments_info = attachment_manager.save_attachments(ticket_id, output_dir)
+- attachments_info_path = os.path.join(output_dir, "attachments_info.json")
+-
+- # Génération de structure.json
+- structure = {
+- "date_extraction": datetime.now().isoformat(),
+- "ticket_dir": output_dir,
+- "fichiers_json": [
+- "ticket_info.json",
+- "all_messages.json",
+- "attachments_info.json"
+- ]
+- }
+- structure_path = os.path.join(output_dir, "structure.json")
+- save_json(structure, structure_path)
+-
+- print("Extraction terminée avec succès")
+- print(f"- Informations du ticket: {ticket_info_path}")
+- print(f"- Messages: {all_messages_path}")
+- print(f"- Pièces jointes: {len(attachments_info)} fichiers")
+- print(f"- Structure: {structure_path}")
++ # Générer un rapport de fin
++ summary = {
++ "timestamp": timestamp,
++ "ticket_code": args.ticket_code,
++ "output_directory": ticket_dir,
++ "message_count": result.get("messages_count", 0),
++ "attachment_count": result.get("attachments_count", 0),
++ "files_created": [
++ os.path.basename(result.get("ticket_info", "")),
++ os.path.basename(result.get("ticket_summary", "")),
++ os.path.basename(result.get("messages_file", "")),
++ os.path.basename(result.get("ticket_data_file", ""))
++ ]
++ }
++
++ # Afficher le résumé
++ print("\nExtraction terminée avec succès")
++ print(f"Ticket: {args.ticket_code}")
++ print(f"Répertoire: {ticket_dir}")
++ print(f"Messages traités: {result.get('messages_count', 0)}")
++ print(f"Pièces jointes: {result.get('attachments_count', 0)}")
++
++ except Exception as e:
++ print(f"Une erreur est survenue: {e}")
++ sys.exit(1)
+
+ if __name__ == "__main__":
+```
+
+---
+
+```diff
++ """
++ Utilitaires généraux pour l'extraction de tickets.
++ """
++
+ import os
+ import json
+ from bs4 import BeautifulSoup, Tag
+ import html2text
++ import unicodedata
+
+ def setup_logging(level: int = logging.INFO, log_file: Optional[str] = None) -> None:
+ log_file: Chemin du fichier de log (default: None)
+ """
+- log_format = '%(asctime)s - %(levelname)s - %(message)s'
+- date_format = '%Y-%m-%d %H:%M:%S'
++ numeric_level = getattr(logging, level.upper(), None)
++ if not isinstance(numeric_level, int):
++ raise ValueError(f"Niveau de journalisation invalide: {level}")
+
+- # Configuration de base
+ logging.basicConfig(
+- level=level,
+- format=log_format,
+- datefmt=date_format
++ level=numeric_level,
++ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
++ datefmt='%Y-%m-%d %H:%M:%S'
+ )
+
+ file_handler = logging.FileHandler(log_file, encoding='utf-8')
+ file_handler.setLevel(level)
+- file_formatter = logging.Formatter(log_format, date_format)
++ file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s', '%Y-%m-%d %H:%M:%S')
+ file_handler.setFormatter(file_formatter)
+ logging.getLogger().addHandler(file_handler)
+ """
+ try:
+- # S'assurer que le répertoire existe
+- directory = os.path.dirname(file_path)
+- if directory and not os.path.exists(directory):
+- os.makedirs(directory, exist_ok=True)
+-
+ with open(file_path, 'w', encoding='utf-8') as f:
+- json.dump(data, f, indent=2, ensure_ascii=False)
++ json.dump(data, f, ensure_ascii=False, indent=2)
+ return True
+ except Exception as e:
+ Nom normalisé
+ """
++ # Enlever les accents
++ name = unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode('ASCII')
++
++ # Remplacer les caractères non alphanumériques par des underscores
++ name = re.sub(r'[^\w\.-]', '_', name)
++
++ # Limiter la longueur à 255 caractères (limitation commune des systèmes de fichiers)
+ # Remplacer les caractères non autorisés par des underscores
+ sanitized = re.sub(r'[\\/*?:"<>|]', '_', name)
+```
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+```diff
+ import unicodedata
+
+- def setup_logging(level: int = logging.INFO, log_file: Optional[str] = None) -> None:
++ def setup_logging(level: Union[str, int] = logging.INFO, log_file: Optional[str] = None) -> None:
+ """
+ Configure la journalisation avec un format spécifique et éventuellement un fichier de logs.
+
+ Args:
+- level: Niveau de journalisation (default: logging.INFO)
++ level: Niveau de journalisation en tant que chaîne (ex: "INFO", "DEBUG") ou valeur entière (default: logging.INFO)
+ log_file: Chemin du fichier de log (default: None)
+ """
++ # Convertir le niveau de log si c'est une chaîne
++ if isinstance(level, str):
+ numeric_level = getattr(logging, level.upper(), None)
+ if not isinstance(numeric_level, int):
+ raise ValueError(f"Niveau de journalisation invalide: {level}")
++ else:
++ numeric_level = level
+
+ logging.basicConfig(
+
+ file_handler = logging.FileHandler(log_file, encoding='utf-8')
+- file_handler.setLevel(level)
++ file_handler.setLevel(numeric_level)
+ file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s', '%Y-%m-%d %H:%M:%S')
+ file_handler.setFormatter(file_formatter)
+```
+
+---
+
+```bash
+cd /home/fgras-ca/llm-ticket3 && python retrieve_ticket.py T0167 --verbose
+```
+
+```
+
+ Listed directory output/ticket_T0167/T0167_20250403_153713 • **9** results
+
+| Name |
+|-------|
+| 📄 `followers.json` |
+| 📄 `structure.json` |
+| 📁 `attachments` |
+| 📄 `attachments_info.json` |
+| 📄 `all_messages.json` |
+| 📄 `all_messages.txt` |
+| 📄 `messages_raw.json` |
+| 📄 `ticket_info.json` |
+| 📄 `ticket_summary.json` |
+
+
+
+---
+
+
+ Listed directory output/ticket_T0167/T0167_20250403_153713/attachments • **2** results
+
+| Name |
+|-------|
+| 📄 `Probleme_partie.png` |
+| 📄 `image001.png` |
+
+
+
+---
+
+Read file: output/ticket_T0167/T0167_20250403_153713/structure.json
+
+---
+
+Le script de récupération a été corrigé avec succès. J'ai modifié `retrieve_ticket.py` pour utiliser la nouvelle version du gestionnaire de tickets et j'ai ajusté la fonction `setup_logging` dans `utils.py` pour qu'elle accepte à la fois des chaînes et des niveaux de journalisation sous forme d'entiers.
+
+Le test effectué avec le ticket T0167 montre que tout fonctionne correctement :
+
+1. Le script récupère bien les informations du ticket
+2. Il extrait les 5 messages associés au ticket
+3. Il télécharge les 2 pièces jointes (Probleme_partie.png et image001.png)
+4. Il génère correctement tous les fichiers JSON structurés dans le dossier de sortie
+
+La structure du dossier de sortie est maintenant bien organisée avec :
+- Les fichiers de données (ticket_info.json, all_messages.json, etc.)
+- Le fichier structure.json qui documente le contenu de l'extraction
+- Les pièces jointes dans un sous-dossier dédié
+
+Ces modifications rendent le système plus robuste et cohérent, tout en conservant les fonctionnalités essentielles des versions précédentes.
+
+---
+
+_**User**_
+
+je voudrai comprendre à quoi me sert d'avoir un fichier retrieve_ticket.py à la racine et un dans utils ont ils des spécificités
+
+---
+
+_**Assistant**_
+
+Read file: /home/fgras-ca/llm-ticket3/utils/retrieve_ticket.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/retrieve_ticket.py
+
+---
+
+Tool run_terminal_cmd not found
+
+---
+
+
+ Listed directory /home/fgras-ca/llm-ticket3 • **12** results
+
+| Name |
+|-------|
+| 📁 `.git` |
+| 📄 `retrieve_ticket.py` |
+| 📄 `retrieve_ticket.log` |
+| 📁 `output` |
+| 📄 `config.json` |
+| 📄 `README.md` |
+| 📄 `config.template.json` |
+| 📄 `requirements.txt` |
+| 📁 `venv` |
+| 📁 `utils` |
+| 📁 `.specstory` |
+| 📁 `data` |
+
+
+
+---
+
+
+ Listed directory /home/fgras-ca/llm-ticket3/utils • **9** results
+
+| Name |
+|-------|
+| 📁 `__pycache__` |
+| 📄 `utils.py` |
+| 📄 `message_manager.py` |
+| 📄 `retrieve_ticket.py` |
+| 📄 `attachment_manager.py` |
+| 📄 `auth_manager.py` |
+| 📄 `ticket_manager.py` |
+| 📄 `notes` |
+| 📄 `__init__.py` |
+
+
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/__init__.py
+
+---
+
+Les deux fichiers `retrieve_ticket.py` ont des rôles complémentaires mais légèrement différents:
+
+1. **retrieve_ticket.py à la racine du projet**:
+ - C'est le point d'entrée principal de l'application que l'utilisateur exécute directement
+ - Il fournit une interface utilisateur simple par ligne de commande
+ - Il est conçu pour être utilisé comme un script autonome
+ - Sa fonction est d'orchestrer le processus global d'extraction de tickets
+ - Il est plus orienté vers l'expérience utilisateur avec des messages plus conviviaux
+
+2. **utils/retrieve_ticket.py**:
+ - C'est un module faisant partie du package `utils`
+ - Il peut être importé et utilisé par d'autres modules Python
+ - Il est plus orienté vers l'utilisation programmatique
+ - Il a une meilleure gestion des journaux (logging)
+ - Il génère un fichier `extraction_summary.json` supplémentaire
+
+Les principales différences techniques:
+
+- Le fichier dans `utils/` utilise plus intensivement le logging avec le module `logging` de Python, tandis que celui à la racine utilise principalement des `print()` pour communiquer avec l'utilisateur
+- Le fichier dans `utils/` est conçu pour être exécuté comme module (`python -m utils.retrieve_ticket`) tandis que celui à la racine est conçu pour être exécuté directement (`python retrieve_ticket.py`)
+- Ils ont des formats de sortie légèrement différents
+
+En pratique, vous pouvez:
+1. Utiliser le fichier à la racine pour une utilisation quotidienne simple
+2. Utiliser le module dans utils si vous souhaitez l'intégrer dans un programme plus large
+
+Pour simplifier la maintenance, vous pourriez:
+1. Garder un seul fichier (de préférence celui à la racine)
+2. Utiliser le module `utils.retrieve_ticket` comme bibliothèque et faire du fichier à la racine un simple wrapper qui importe et appelle la fonction principale du module
+
+En résumé, c'est une configuration de "script vs module", où l'un est orienté utilisateur final et l'autre est orienté développeur/intégration.
+
+---
+
+_**User**_
+
+@all_messages.json j'ai encore dans mes json du code dans mes messages je voudrai régler cela. Je voudrai aussi ajouter dans all_messages.json dans le ticket_summary le project_id et le stage_id pour y rajouter du contexte.
+
+---
+
+_**Assistant**_
+
+Could not find file '/home/fgras-ca/llm-ticket3/output/ticket_T0167/T0167_20250403_153713/all_messages.json' in the workspace.
+
+---
+
+
+ Listed directory /home/fgras-ca/llm-ticket3/output • **1** results
+
+| Name |
+|-------|
+| 📁 `ticket_T11067` |
+
+
+
+---
+
+
+ Listed directory /home/fgras-ca/llm-ticket3/output/ticket_T11067 • **1** results
+
+| Name |
+|-------|
+| 📁 `T11067_20250403_154323` |
+
+
+
+---
+
+
+ Listed directory /home/fgras-ca/llm-ticket3/output/ticket_T11067/T11067_20250403_154323 • **9** results
+
+| Name |
+|-------|
+| 📄 `followers.json` |
+| 📄 `structure.json` |
+| 📁 `attachments` |
+| 📄 `attachments_info.json` |
+| 📄 `all_messages.json` |
+| 📄 `all_messages.txt` |
+| 📄 `messages_raw.json` |
+| 📄 `ticket_info.json` |
+| 📄 `ticket_summary.json` |
+
+
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/output/ticket_T11067/T11067_20250403_154323/all_messages.json
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/output/ticket_T11067/T11067_20250403_154323/ticket_summary.json
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/output/ticket_T11067/T11067_20250403_154323/ticket_info.json
+
+---
+
+
+ Listed directory /home/fgras-ca/llm-ticket3/utils • **9** results
+
+| Name |
+|-------|
+| 📁 `__pycache__` |
+| 📄 `utils.py` |
+| 📄 `message_manager.py` |
+| 📄 `retrieve_ticket.py` |
+| 📄 `attachment_manager.py` |
+| 📄 `auth_manager.py` |
+| 📄 `ticket_manager.py` |
+| 📄 `notes` |
+| 📄 `__init__.py` |
+
+
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/ticket_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/retrieve_ticket.py
+
+---
+
+Pour répondre à vos demandes, je vais effectuer deux modifications :
+
+1. Nettoyer le code HTML dans les messages JSON
+2. Ajouter le project_id et le stage_id dans le fichier all_messages.json
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+
+ Searched codebase "get_ticket_by_code function in ticket_manager.py" • **25** results
+
+| File | Lines |
+|------|-------|
+| `utils/ticket_manager.py` | L1-44 |
+| `utils/retrieve_ticket.py` | L1-18 |
+| `utils/message_manager.py` | L29-57 |
+| `utils/ticket_manager.py` | L44-80 |
+| `utils/ticket_manager.py` | L80-122 |
+| `retrieve_ticket.py` | L62-109 |
+| `retrieve_ticket.py` | L1-23 |
+| `utils/ticket_manager.py` | L122-167 |
+| `utils/retrieve_ticket.py` | L56-107 |
+| `utils/message_manager.py` | L339-374 |
+| `utils/attachment_manager.py` | L44-64 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3803-3857 |
+| `utils/attachment_manager.py` | L123-147 |
+| `utils/retrieve_ticket.py` | L18-26 |
+| `utils/message_manager.py` | L217-251 |
+| `utils/ticket_manager.py` | L167-201 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3749-3803 |
+| `utils/retrieve_ticket.py` | L26-56 |
+| `retrieve_ticket.py` | L32-62 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L5424-5554 |
+| `utils/message_manager.py` | L374-402 |
+| `.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L10785-10827 |
+| `retrieve_ticket.py` | L23-32 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L5243-5274 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L5274-5361 |
+
+
+
+---
+
+Je vais maintenant implémenter les modifications demandées. Voici les changements à apporter :
+
+1. Nettoyer le code HTML dans les messages JSON
+2. Ajouter le project_id et le stage_id au ticket_summary dans all_messages.json
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Voici les modifications à apporter au code :
+
+---
+
+Tool edit_file not found
+
+---
+
+Je vais réessayer avec une approche différente en utilisant les outils disponibles :
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/utils.py
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/ticket_manager.py
+
+---
+
+
+ Searched codebase "Where in message_manager.py does it define process_messages method?" • **25** results
+
+| File | Lines |
+|------|-------|
+| `utils/message_manager.py` | L217-251 |
+| `utils/message_manager.py` | L374-402 |
+| `utils/message_manager.py` | L339-374 |
+| `utils/message_manager.py` | L292-339 |
+| `utils/message_manager.py` | L251-292 |
+| `utils/message_manager.py` | L1-29 |
+| `utils/message_manager.py` | L57-87 |
+| `utils/message_manager.py` | L29-57 |
+| `utils/message_manager.py` | L122-172 |
+| `utils/message_manager.py` | L172-217 |
+| `utils/message_manager.py` | L87-122 |
+| `utils/ticket_manager.py` | L122-167 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2024-2108 |
+| `utils/ticket_manager.py` | L1-44 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L2506-2595 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3946-4185 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4291-4459 |
+| `.specstory/history/2025-04-01_13-55-problème-d'exécution-du-programme.md` | L3925-3960 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4459-4518 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L3535-3596 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4957-5003 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L97-179 |
+| `.specstory/history/2025-04-01_13-55-amélioration-de-l'extraction-des-pièces-jointes.md` | L4683-4729 |
+| `utils/ticket_manager.py` | L167-201 |
+| `retrieve_ticket.py` | L62-109 |
+
+
+
+---
+
+Read file: /home/fgras-ca/llm-ticket3/utils/message_manager.py
+
+---
+
+Voici les modifications à apporter au fichier `message_manager.py` pour nettoyer le code HTML des messages et ajouter les champs `project_id` et `stage_id` au résumé du ticket :
+
+```python:utils/message_manager.py
+# ... existing code ...
+
+def process_messages(self, ticket_id: int, ticket_code: str, ticket_name: str, output_dir: str,
+ strategy: str = "standard") -> Dict[str, Any]:
+ """
+ Traite tous les messages d'un ticket, nettoie le contenu et génère des fichiers structurés.
+
+ Args:
+ ticket_id: ID du ticket
+ ticket_code: Code du ticket
+ ticket_name: Nom du ticket
+ output_dir: Répertoire de sortie
+ strategy: Stratégie de nettoyage (simple, standard, advanced, raw)
+
+ Returns:
+ Dictionnaire avec les chemins des fichiers créés
+ """
+ # Validation de la stratégie
+ if strategy not in self.cleaning_strategies:
+ logging.warning(f"Stratégie de nettoyage '{strategy}' inconnue, utilisation de la stratégie par défaut '{self.default_strategy}'")
+ strategy = self.default_strategy
+
+ cleaning_config = self.cleaning_strategies[strategy]
+
+ # Récupérer les messages
+ messages = self.get_ticket_messages(ticket_id)
+
+ # Détecter les messages dupliqués
+ duplicate_indices = detect_duplicate_content(messages)
+
+ # Nettoyer et structurer les messages
+ processed_messages = []
+
+ # Créer un dictionnaire de métadonnées pour chaque message
+ message_metadata = {}
+
+ for index, message in enumerate(messages):
+ message_id = message.get('id')
+
+ # Ajouter des métadonnées au message
+ message_metadata[message_id] = {
+ "is_system": self.is_system_message(message),
+ "is_stage_change": self.is_stage_change_message(message),
+ "is_forwarded": self.is_forwarded_message(message),
+ "is_duplicate": index in duplicate_indices
+ }
+
+ # Créer une copie du message pour éviter de modifier l'original
+ message_copy = message.copy()
+
+ # Ajouter les métadonnées au message copié
+ for key, value in message_metadata[message_id].items():
+ message_copy[key] = value
+
+ # Nettoyer le corps du message selon la stratégie choisie
+ if message_copy.get('body'):
+ # Toujours conserver l'original
+ message_copy['body_original'] = message_copy.get('body', '')
+
+ # Appliquer la stratégie de nettoyage, sauf si raw
+ if strategy != "raw":
+ cleaned_body = clean_html(
+ message_copy.get('body', ''),
+ strategy=cleaning_config['strategy'],
+ preserve_links=cleaning_config['preserve_links'],
+ preserve_images=cleaning_config['preserve_images']
+ )
+
+ # Nettoyer davantage le code HTML qui pourrait rester
+ if cleaned_body:
+ # Supprimer les balises style et script avec leur contenu
+ cleaned_body = re.sub(r'', '', cleaned_body, flags=re.DOTALL)
+ cleaned_body = re.sub(r'', '', cleaned_body, flags=re.DOTALL)
+ # Supprimer les balises HTML restantes
+ cleaned_body = re.sub(r'<[^>]+>', '', cleaned_body)
+
+ message_copy['body'] = cleaned_body
+
+ # Récupérer les détails de l'auteur
+ message_copy['author_details'] = self.get_message_author_details(message_copy)
+
+ # Ne pas inclure les messages système sans intérêt
+ if message_copy.get('is_system') and not message_copy.get('is_stage_change'):
+ # Enregistrer l'exclusion dans les métadonnées
+ message_metadata[message_id]['excluded'] = "system_message"
+ continue
+
+ # Ignorer les messages dupliqués si demandé
+ if message_copy.get('is_duplicate'):
+ # Enregistrer l'exclusion dans les métadonnées
+ message_metadata[message_id]['excluded'] = "duplicate_content"
+ continue
+
+ processed_messages.append(message_copy)
+
+ # Trier les messages par date
+ processed_messages.sort(key=lambda x: x.get('date', ''))
+
+ # Récupérer les informations supplémentaires du ticket
+ try:
+ ticket_data = self.auth._rpc_call("/web/dataset/call_kw", {
+ "model": "project.task",
+ "method": "read",
+ "args": [[ticket_id]],
+ "kwargs": {"fields": ["project_id", "stage_id"]}
+ })
+
+ project_id = None
+ stage_id = None
+ project_name = None
+ stage_name = None
+
+ if ticket_data and isinstance(ticket_data, list) and len(ticket_data) > 0:
+ if "project_id" in ticket_data[0] and ticket_data[0]["project_id"]:
+ project_id = ticket_data[0]["project_id"][0] if isinstance(ticket_data[0]["project_id"], list) else ticket_data[0]["project_id"]
+ project_name = ticket_data[0]["project_id"][1] if isinstance(ticket_data[0]["project_id"], list) else None
+
+ if "stage_id" in ticket_data[0] and ticket_data[0]["stage_id"]:
+ stage_id = ticket_data[0]["stage_id"][0] if isinstance(ticket_data[0]["stage_id"], list) else ticket_data[0]["stage_id"]
+ stage_name = ticket_data[0]["stage_id"][1] if isinstance(ticket_data[0]["stage_id"], list) else None
+ except Exception as e:
+ logging.error(f"Erreur lors de la récupération des informations du ticket: {e}")
+ project_id = None
+ stage_id = None
+ project_name = None
+ stage_name = None
+
+ # Créer la structure pour le JSON
+ messages_with_summary = {
+ "ticket_summary": {
+ "id": ticket_id,
+ "code": ticket_code,
+ "name": ticket_name,
+ "project_id": project_id,
+ "project_name": project_name,
+ "stage_id": stage_id,
+ "stage_name": stage_name,
+ "date_extraction": datetime.now().isoformat()
+ },
+ "metadata": {
+ "message_count": {
+ "total": len(messages),
+ "processed": len(processed_messages),
+ "excluded": len(messages) - len(processed_messages)
+ },
+ "cleaning_strategy": strategy,
+ "cleaning_config": cleaning_config
+ },
+ "messages": processed_messages
+ }
+
+ # ... existing code ...
+```
+
+Cette modification:
+
+1. Améliore le nettoyage HTML en ajoutant un traitement supplémentaire pour éliminer les balises `