mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-15 22:06:50 +01:00
438 lines
19 KiB
Python
438 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script pour convertir les fichiers JSON de tickets en Markdown formaté.
|
|
Ce script prend les données JSON des tickets extraits et crée un fichier Markdown structuré.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
from datetime import datetime
|
|
|
|
def clean_html(html_content):
|
|
"""
|
|
Nettoie le contenu HTML pour le Markdown.
|
|
Supprime les balises, les bas de page, les messages automatiques et les sections vides.
|
|
"""
|
|
if not html_content:
|
|
return ""
|
|
|
|
import re
|
|
|
|
# Transformer les balises h1 en titres Markdown
|
|
html_content = re.sub(r'<h1>(.*?)</h1>', r'### \1', html_content)
|
|
|
|
# Transformer les listes à puces
|
|
html_content = re.sub(r'<ul>(.*?)</ul>', r'\1', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'<li><b>(.*?)</b></li>', r'- **\1**\n', html_content)
|
|
html_content = re.sub(r'<li>(.*?)</li>', r'- \1\n', html_content)
|
|
|
|
# Supprimer les balises simples
|
|
html_content = re.sub(r'<br\s*/?>|<p>|</p>|<div>|</div>', '\n', html_content)
|
|
|
|
# Supprimer les bas de page et messages automatiques du support
|
|
html_content = re.sub(r'Droit à la déconnexion :.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'\*\s*\*\s*\*.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'Ce message électronique et tous les fichiers.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'Afin d\'assurer une meilleure traçabilité.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'_Confidentialité :.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'Support technique.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
|
|
# Suppression de l'image signature CBAO et autres images
|
|
html_content = re.sub(r'!\[CBAO - développeur de rentabilité.*?(?=\n\n|\Z)', '', html_content, flags=re.DOTALL)
|
|
html_content = re.sub(r'!\[.*?\]\(/web/image/.*?\)', '', html_content)
|
|
html_content = re.sub(r'!\[cid:.*?\]\(/web/image/.*?\)', '', html_content)
|
|
|
|
# Supprimer les balises HTML restantes
|
|
html_content = re.sub(r'<.*?>', '', html_content)
|
|
|
|
# Remplacer les entités HTML courantes
|
|
html_content = html_content.replace(' ', ' ')
|
|
html_content = html_content.replace('<', '<')
|
|
html_content = html_content.replace('>', '>')
|
|
html_content = html_content.replace('&', '&')
|
|
html_content = html_content.replace('"', '"')
|
|
|
|
# Supprimer les lignes avec uniquement des **
|
|
html_content = re.sub(r'^\s*\*\*\s*\*\*\s*$', '', html_content, flags=re.MULTILINE)
|
|
html_content = re.sub(r'^\s*\*\*\s*$', '', html_content, flags=re.MULTILINE)
|
|
|
|
# Supprimer le \--- à la fin des messages
|
|
html_content = re.sub(r'\\---\s*$', '', html_content)
|
|
|
|
# Supprimer les crochets isolés
|
|
html_content = re.sub(r'\[\s*$', '', html_content)
|
|
|
|
# Supprimer les lignes vides multiples
|
|
html_content = re.sub(r'\n\s*\n', '\n\n', html_content)
|
|
|
|
# Nettoyer au début et à la fin
|
|
html_content = html_content.strip()
|
|
|
|
# Supprimer les sections vides (comme "*Contenu vide*")
|
|
if not html_content or html_content.lower() == "*contenu vide*":
|
|
return "*Contenu vide*"
|
|
|
|
return html_content
|
|
|
|
def format_date(date_str):
|
|
"""
|
|
Formate une date ISO en format lisible.
|
|
"""
|
|
if not date_str:
|
|
return ""
|
|
|
|
try:
|
|
dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
|
|
return dt.strftime("%d/%m/%Y %H:%M:%S")
|
|
except (ValueError, TypeError):
|
|
return date_str
|
|
|
|
def create_markdown_from_json(json_file, output_file):
|
|
"""
|
|
Crée un fichier Markdown à partir d'un fichier JSON de messages.
|
|
|
|
Args:
|
|
json_file: Chemin vers le fichier JSON contenant les messages
|
|
output_file: Chemin du fichier Markdown à créer
|
|
"""
|
|
try:
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
except Exception as e:
|
|
print(f"Erreur : {e}")
|
|
return False
|
|
|
|
# Obtenir le répertoire du ticket pour accéder aux autres fichiers
|
|
ticket_dir = os.path.dirname(json_file)
|
|
|
|
# Essayer de lire le fichier ticket_info.json si disponible
|
|
ticket_info = {}
|
|
ticket_info_path = os.path.join(ticket_dir, "ticket_info.json")
|
|
if os.path.exists(ticket_info_path):
|
|
try:
|
|
with open(ticket_info_path, 'r', encoding='utf-8') as f:
|
|
ticket_info = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire ticket_info.json: {e}")
|
|
|
|
# Récupérer les informations du sommaire du ticket
|
|
ticket_summary = {}
|
|
if "ticket_summary" in data:
|
|
ticket_summary = data.get("ticket_summary", {})
|
|
else:
|
|
summary_path = os.path.join(ticket_dir, "ticket_summary.json")
|
|
if os.path.exists(summary_path):
|
|
try:
|
|
with open(summary_path, 'r', encoding='utf-8') as f:
|
|
ticket_summary = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire ticket_summary.json: {e}")
|
|
|
|
# Tenter de lire le fichier structure.json
|
|
structure = {}
|
|
structure_path = os.path.join(ticket_dir, "structure.json")
|
|
if os.path.exists(structure_path):
|
|
try:
|
|
with open(structure_path, 'r', encoding='utf-8') as f:
|
|
structure = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire structure.json: {e}")
|
|
|
|
# Commencer à construire le contenu Markdown
|
|
md_content = []
|
|
|
|
# Ajouter l'en-tête du document avec les informations du ticket
|
|
ticket_code = ticket_summary.get("code", os.path.basename(ticket_dir).split('_')[0])
|
|
ticket_name = ticket_summary.get("name", "")
|
|
|
|
md_content.append(f"# Ticket {ticket_code}: {ticket_name}")
|
|
md_content.append("")
|
|
|
|
# Ajouter des métadonnées du ticket
|
|
md_content.append("## Informations du ticket")
|
|
md_content.append("")
|
|
# Ajouter l'ID du ticket
|
|
ticket_id = ticket_summary.get("id", ticket_info.get("id", ""))
|
|
md_content.append(f"- **ID** (`id`): {ticket_id}")
|
|
md_content.append(f"- **Référence** (`code`): {ticket_code}")
|
|
md_content.append(f"- **Nom** (`name`): {ticket_name}")
|
|
md_content.append(f"- **Projet** (`project_name`): {ticket_summary.get('project_name', '')}")
|
|
md_content.append(f"- **État** (`stage_name`): {ticket_summary.get('stage_name', '')}")
|
|
|
|
# Chercher l'utilisateur assigné dans les métadonnées
|
|
assigned_to = ""
|
|
if "user_id" in structure and structure["user_id"]:
|
|
user_id = structure["user_id"]
|
|
if isinstance(user_id, list) and len(user_id) > 1:
|
|
assigned_to = user_id[1]
|
|
|
|
md_content.append(f"- **Assigné à** (`user_id`): {assigned_to}")
|
|
|
|
# Ajouter le client si disponible
|
|
partner = ""
|
|
if "partner_id" in ticket_info:
|
|
partner_id = ticket_info.get("partner_id", [])
|
|
if isinstance(partner_id, list) and len(partner_id) > 1:
|
|
partner = partner_id[1]
|
|
|
|
# Ajouter l'email du client si disponible
|
|
partner_email = ""
|
|
if "email_from" in ticket_info and ticket_info["email_from"]:
|
|
partner_email = ticket_info["email_from"]
|
|
if partner:
|
|
partner += f", {partner_email}"
|
|
else:
|
|
partner = partner_email
|
|
|
|
md_content.append(f"- **Client** (`partner_id`, `email_from`): {partner}")
|
|
|
|
# Ajouter les tags s'ils sont disponibles
|
|
tags = []
|
|
if "tag_ids" in ticket_info:
|
|
tag_ids = ticket_info.get("tag_ids", []) or []
|
|
for tag in tag_ids:
|
|
if isinstance(tag, list) and len(tag) > 1:
|
|
tags.append(tag[1])
|
|
|
|
if tags:
|
|
md_content.append(f"- **Tags** (`tag_ids`): {', '.join(tags)}")
|
|
|
|
# Ajouter les dates
|
|
md_content.append(f"- **Créé le** (`create_date`): {format_date(ticket_info.get('create_date', ''))}")
|
|
md_content.append(f"- **Dernière modification** (`write_date`): {format_date(ticket_info.get('write_date', ''))}")
|
|
if "date_deadline" in ticket_info and ticket_info.get("date_deadline"):
|
|
md_content.append(f"- **Date limite** (`date_deadline`): {format_date(ticket_info.get('date_deadline', ''))}")
|
|
|
|
md_content.append("")
|
|
|
|
# Ajouter la description du ticket
|
|
description = ticket_info.get("description", "")
|
|
if description:
|
|
cleaned_description = clean_html(description)
|
|
if cleaned_description and cleaned_description != "*Contenu vide*":
|
|
md_content.append("## Description")
|
|
md_content.append("")
|
|
|
|
# Amélioration du formatage des titres et sections
|
|
cleaned_description = cleaned_description.replace("### ", "\n### ")
|
|
# Correction du formatage des points importants
|
|
import re
|
|
cleaned_description = re.sub(r'(###.*?)\n?-', r'\1\n-', cleaned_description)
|
|
|
|
md_content.append(cleaned_description)
|
|
md_content.append("")
|
|
|
|
# Ajouter les messages
|
|
messages = []
|
|
if "messages" in data:
|
|
messages = data.get("messages", [])
|
|
|
|
if not messages:
|
|
md_content.append("## Messages")
|
|
md_content.append("")
|
|
md_content.append("*Aucun message disponible*")
|
|
else:
|
|
# Filtrer les messages système non pertinents
|
|
filtered_messages = []
|
|
for msg in messages:
|
|
# Ignorer les messages système vides
|
|
if msg.get("is_system", False) and not msg.get("body", "").strip():
|
|
continue
|
|
|
|
# Ignorer les changements d'état sans contenu
|
|
if msg.get("is_stage_change", False) and not msg.get("body", "").strip():
|
|
# Sauf si on veut les garder pour la traçabilité
|
|
filtered_messages.append(msg)
|
|
continue
|
|
|
|
filtered_messages.append(msg)
|
|
|
|
# Si nous avons au moins un message significatif
|
|
if filtered_messages:
|
|
md_content.append("## Messages")
|
|
md_content.append("")
|
|
|
|
# Trier les messages par date
|
|
filtered_messages.sort(key=lambda x: x.get("date", ""))
|
|
|
|
for i, message in enumerate(filtered_messages):
|
|
if not isinstance(message, dict):
|
|
continue
|
|
|
|
# Déterminer l'auteur du message
|
|
author = "Système"
|
|
author_details = message.get("author_details", {})
|
|
if author_details and author_details.get("name"):
|
|
author = author_details.get("name")
|
|
else:
|
|
author_id = message.get("author_id", [])
|
|
if isinstance(author_id, list) and len(author_id) > 1:
|
|
author = author_id[1]
|
|
|
|
# Formater la date
|
|
date = format_date(message.get("date", ""))
|
|
|
|
# Déterminer le type de message
|
|
message_type = ""
|
|
if message.get("is_stage_change", False):
|
|
message_type = "Changement d'état"
|
|
elif message.get("is_system", False):
|
|
message_type = "Système"
|
|
elif message.get("is_note", False):
|
|
message_type = "Commentaire"
|
|
elif message.get("email_from", False):
|
|
message_type = "E-mail"
|
|
|
|
# Récupérer le sujet du message
|
|
subject = message.get("subject", "")
|
|
|
|
# Créer l'en-tête du message
|
|
md_content.append(f"### Message {i+1}")
|
|
md_content.append(f"**De** (`author_id`): {author}")
|
|
md_content.append(f"**Date** (`date`): {date}")
|
|
md_content.append(f"**Type** (`message_type`): {message_type}")
|
|
if subject:
|
|
md_content.append(f"**Sujet** (`subject`): {subject}")
|
|
|
|
# Ajouter l'ID du message si disponible
|
|
message_id = message.get("id", "")
|
|
if message_id:
|
|
md_content.append(f"**ID** (`id`): {message_id}")
|
|
|
|
# Ajouter le corps du message
|
|
body = message.get("body", "")
|
|
cleaned_body = clean_html(body)
|
|
md_content.append("")
|
|
if not cleaned_body or cleaned_body == "*Contenu vide*":
|
|
md_content.append("*Contenu vide*")
|
|
else:
|
|
md_content.append(cleaned_body)
|
|
|
|
# Ajouter les pièces jointes si elles existent
|
|
attachment_ids = message.get("attachment_ids", [])
|
|
has_attachments = False
|
|
|
|
# Vérifier si les pièces jointes existent et ne sont pas vides
|
|
if attachment_ids:
|
|
# Récupérer les informations des pièces jointes
|
|
valid_attachments = []
|
|
if isinstance(attachment_ids, list) and all(isinstance(id, int) for id in attachment_ids):
|
|
# Chercher les informations des pièces jointes dans attachments_info.json
|
|
attachments_info_path = os.path.join(ticket_dir, "attachments_info.json")
|
|
if os.path.exists(attachments_info_path):
|
|
try:
|
|
with open(attachments_info_path, 'r', encoding='utf-8') as f:
|
|
attachments_info = json.load(f)
|
|
for attachment_id in attachment_ids:
|
|
for attachment_info in attachments_info:
|
|
if attachment_info.get("id") == attachment_id:
|
|
valid_attachments.append(attachment_info)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire attachments_info.json: {e}")
|
|
elif isinstance(attachment_ids, list):
|
|
for att in attachment_ids:
|
|
if isinstance(att, list) and len(att) > 1:
|
|
valid_attachments.append(att)
|
|
|
|
if valid_attachments:
|
|
has_attachments = True
|
|
md_content.append("")
|
|
md_content.append("**Pièces jointes** (`attachment_ids`):")
|
|
for att in valid_attachments:
|
|
if isinstance(att, list) and len(att) > 1:
|
|
md_content.append(f"- {att[1]}")
|
|
elif isinstance(att, dict):
|
|
att_id = att.get("id", "")
|
|
name = att.get("name", "Pièce jointe sans nom")
|
|
mimetype = att.get("mimetype", "Type inconnu")
|
|
md_content.append(f"- {name} ({mimetype}) [ID: {att_id}]")
|
|
|
|
md_content.append("")
|
|
md_content.append("---")
|
|
md_content.append("")
|
|
|
|
# Ajouter une section pour les pièces jointes du ticket si elles existent
|
|
attachment_data = {}
|
|
attachment_path = os.path.join(ticket_dir, "attachments.json")
|
|
if os.path.exists(attachment_path):
|
|
try:
|
|
with open(attachment_path, 'r', encoding='utf-8') as f:
|
|
attachment_data = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire attachments.json: {e}")
|
|
|
|
if attachment_data and "attachments" in attachment_data:
|
|
attachments = attachment_data.get("attachments", [])
|
|
if attachments:
|
|
md_content.append("## Pièces jointes")
|
|
md_content.append("")
|
|
md_content.append("| Nom | Type | Taille | Date |")
|
|
md_content.append("|-----|------|--------|------|")
|
|
|
|
for att in attachments:
|
|
name = att.get("name", "")
|
|
mimetype = att.get("mimetype", "")
|
|
file_size = att.get("file_size", 0)
|
|
size_str = f"{file_size / 1024:.1f} KB" if file_size else ""
|
|
create_date = format_date(att.get("create_date", ""))
|
|
|
|
md_content.append(f"| {name} | {mimetype} | {size_str} | {create_date} |")
|
|
|
|
md_content.append("")
|
|
|
|
# Ajouter des informations sur l'extraction
|
|
extract_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
|
md_content.append("## Informations sur l'extraction")
|
|
md_content.append("")
|
|
md_content.append(f"- **Date d'extraction**: {extract_time}")
|
|
md_content.append(f"- **Répertoire**: {ticket_dir}")
|
|
|
|
# Écrire le contenu dans le fichier de sortie
|
|
try:
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write("\n".join(md_content))
|
|
print(f"Rapport Markdown créé : {output_file}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Erreur lors de l'écriture du fichier Markdown: {e}")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Convertir les fichiers JSON de tickets en Markdown")
|
|
parser.add_argument("--ticket_code", "-t", help="Code du ticket à convertir (ex: T11067)")
|
|
parser.add_argument("--date_dir", "-d", help="Dossier spécifique par date, optionnel (ex: 20250403_155134)")
|
|
parser.add_argument("--input_dir", "-i", default="output", help="Dossier racine contenant les tickets")
|
|
parser.add_argument("--output_name", "-o", default="rapport.md", help="Nom du fichier Markdown à générer")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.ticket_code:
|
|
print("Erreur : Vous devez spécifier un code de ticket. Exemple : -t T11067")
|
|
sys.exit(1)
|
|
|
|
# Construire le chemin d'entrée
|
|
ticket_dir = f"{args.input_dir}/ticket_{args.ticket_code}"
|
|
|
|
if args.date_dir:
|
|
ticket_dir = f"{ticket_dir}/{args.ticket_code}_{args.date_dir}"
|
|
else:
|
|
# Trouver le dossier le plus récent
|
|
import glob
|
|
date_dirs = glob.glob(f"{ticket_dir}/{args.ticket_code}_*")
|
|
if date_dirs:
|
|
ticket_dir = max(date_dirs) # Prend le plus récent par ordre alphabétique
|
|
|
|
json_file = f"{ticket_dir}/all_messages.json"
|
|
output_file = f"{ticket_dir}/{args.output_name}"
|
|
|
|
if not os.path.exists(json_file):
|
|
print(f"Erreur : Le fichier {json_file} n'existe pas.")
|
|
sys.exit(1)
|
|
|
|
if create_markdown_from_json(json_file, output_file):
|
|
print(f"Rapport Markdown créé : {output_file}")
|
|
else:
|
|
print("Échec de la création du rapport Markdown")
|
|
sys.exit(1) |