mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-15 22:06:50 +01:00
429 lines
18 KiB
Python
429 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script pour convertir les fichiers JSON de tickets en Markdown formaté.
|
|
Ce script prend les données JSON des tickets extraits et crée un fichier Markdown structuré.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import html
|
|
import subprocess
|
|
import re
|
|
from datetime import datetime
|
|
|
|
# Import direct de clean_html depuis le même répertoire
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from formatters.clean_html import formatters.clean_html, format_date
|
|
|
|
def clean_newlines(text):
|
|
"""
|
|
Nettoie les sauts de ligne excessifs dans le texte.
|
|
|
|
Args:
|
|
text: Texte à nettoyer
|
|
|
|
Returns:
|
|
Texte avec sauts de ligne normalisés
|
|
"""
|
|
if not text:
|
|
return text
|
|
|
|
# Étape 1: Normaliser tous les sauts de ligne
|
|
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
|
|
|
# Étape 2: Supprimer les lignes vides consécutives (plus de 2 sauts de ligne)
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
|
|
# Étape 3: Supprimer les espaces en début et fin de chaque ligne
|
|
lines = text.split('\n')
|
|
cleaned_lines = [line.strip() for line in lines]
|
|
|
|
# Étape 4: Supprimer les lignes qui ne contiennent que des espaces ou des caractères de mise en forme
|
|
meaningful_lines = []
|
|
for line in cleaned_lines:
|
|
# Ignorer les lignes qui ne contiennent que des caractères spéciaux de mise en forme
|
|
if line and not re.match(r'^[\s_\-=\.]+$', line):
|
|
meaningful_lines.append(line)
|
|
elif line: # Si c'est une ligne de séparation, la garder mais la normaliser
|
|
if re.match(r'^_{3,}$', line): # Ligne de tirets bas
|
|
meaningful_lines.append("___")
|
|
elif re.match(r'^-{3,}$', line): # Ligne de tirets
|
|
meaningful_lines.append("---")
|
|
elif re.match(r'^={3,}$', line): # Ligne d'égal
|
|
meaningful_lines.append("===")
|
|
else:
|
|
meaningful_lines.append(line)
|
|
|
|
# Recombiner les lignes
|
|
return '\n'.join(meaningful_lines)
|
|
|
|
def create_markdown_from_json(json_file, output_file):
|
|
"""
|
|
Crée un fichier Markdown à partir d'un fichier JSON de messages.
|
|
|
|
Args:
|
|
json_file: Chemin vers le fichier JSON contenant les messages
|
|
output_file: Chemin du fichier Markdown à créer
|
|
"""
|
|
# Obtenir le répertoire du ticket pour accéder aux autres fichiers
|
|
ticket_dir = os.path.dirname(json_file)
|
|
|
|
ticket_summary = {}
|
|
try:
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
ticket_summary = data.get("ticket_summary", {})
|
|
except Exception as e:
|
|
print(f"Erreur : {e}")
|
|
return False
|
|
|
|
ticket_code = ticket_summary.get("code", "inconnu")
|
|
|
|
# Créer le dossier rapports si il n'existe pas
|
|
reports_dir = os.path.join(ticket_dir, f"{ticket_code}_rapports")
|
|
os.makedirs(reports_dir, exist_ok=True)
|
|
|
|
output_file = os.path.join(reports_dir, f"{ticket_code}_rapport.md")
|
|
json_output_file = os.path.join(reports_dir, f"{ticket_code}_rapport.json")
|
|
|
|
# Essayer de lire le fichier ticket_info.json si disponible
|
|
ticket_info = {}
|
|
ticket_info_path = os.path.join(ticket_dir, "ticket_info.json")
|
|
if os.path.exists(ticket_info_path):
|
|
try:
|
|
with open(ticket_info_path, 'r', encoding='utf-8') as f:
|
|
ticket_info = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire ticket_info.json: {e}")
|
|
|
|
# Récupérer les informations du sommaire du ticket
|
|
ticket_summary = {}
|
|
if "ticket_summary" in data:
|
|
ticket_summary = data.get("ticket_summary", {})
|
|
else:
|
|
summary_path = os.path.join(ticket_dir, "ticket_summary.json")
|
|
if os.path.exists(summary_path):
|
|
try:
|
|
with open(summary_path, 'r', encoding='utf-8') as f:
|
|
ticket_summary = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire ticket_summary.json: {e}")
|
|
|
|
# Tenter de lire le fichier structure.json
|
|
structure = {}
|
|
structure_path = os.path.join(ticket_dir, "structure.json")
|
|
if os.path.exists(structure_path):
|
|
try:
|
|
with open(structure_path, 'r', encoding='utf-8') as f:
|
|
structure = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire structure.json: {e}")
|
|
|
|
# Commencer à construire le contenu Markdown
|
|
md_content = []
|
|
|
|
# Ajouter l'en-tête du document avec les informations du ticket
|
|
ticket_code = ticket_summary.get("code", os.path.basename(ticket_dir).split('_')[0])
|
|
ticket_name = ticket_summary.get("name", "")
|
|
|
|
md_content.append(f"# Ticket {ticket_code}: {ticket_name}")
|
|
md_content.append("")
|
|
|
|
# Ajouter des métadonnées du ticket
|
|
md_content.append("## Informations du ticket")
|
|
md_content.append("")
|
|
# Ajouter l'ID du ticket
|
|
ticket_id = ticket_summary.get("id", ticket_info.get("id", ""))
|
|
md_content.append(f"- **id**: {ticket_id}")
|
|
md_content.append(f"- **code**: {ticket_code}")
|
|
md_content.append(f"- **name**: {ticket_name}")
|
|
md_content.append(f"- **project_name**: {ticket_summary.get('project_name', '')}")
|
|
md_content.append(f"- **stage_name**: {ticket_summary.get('stage_name', '')}")
|
|
|
|
# Chercher l'utilisateur assigné dans les métadonnées
|
|
assigned_to = ""
|
|
if "user_id" in structure and structure["user_id"]:
|
|
user_id = structure["user_id"]
|
|
if isinstance(user_id, list) and len(user_id) > 1:
|
|
assigned_to = user_id[1]
|
|
|
|
md_content.append(f"- **user_id**: {assigned_to}")
|
|
|
|
# Ajouter le client si disponible
|
|
partner = ""
|
|
if "partner_id" in ticket_info:
|
|
partner_id = ticket_info.get("partner_id", [])
|
|
if isinstance(partner_id, list) and len(partner_id) > 1:
|
|
partner = partner_id[1]
|
|
|
|
# Ajouter l'email du client si disponible
|
|
partner_email = ""
|
|
if "email_from" in ticket_info and ticket_info["email_from"]:
|
|
partner_email = ticket_info["email_from"]
|
|
if partner:
|
|
partner += f", {partner_email}"
|
|
else:
|
|
partner = partner_email
|
|
|
|
md_content.append(f"- **partner_id/email_from**: {partner}")
|
|
|
|
# Ajouter les tags s'ils sont disponibles
|
|
tags = []
|
|
if "tag_ids" in ticket_info:
|
|
tag_ids = ticket_info.get("tag_ids", []) or []
|
|
for tag in tag_ids:
|
|
if isinstance(tag, list) and len(tag) > 1:
|
|
tags.append(tag[1])
|
|
|
|
if tags:
|
|
md_content.append(f"- **tag_ids**: {', '.join(tags)}")
|
|
|
|
# Ajouter les dates
|
|
md_content.append(f"- **create_date**: {format_date(ticket_info.get('create_date', ''))}")
|
|
md_content.append(f"- **write_date/last modification**: {format_date(ticket_info.get('write_date', ''))}")
|
|
if "date_deadline" in ticket_info and ticket_info.get("date_deadline"):
|
|
md_content.append(f"- **date_deadline**: {format_date(ticket_info.get('date_deadline', ''))}")
|
|
|
|
md_content.append("")
|
|
|
|
# Ajouter la description du ticket
|
|
description = ticket_info.get("description", "")
|
|
md_content.append(f"- **description**:")
|
|
md_content.append("") # saut de ligne
|
|
|
|
if description:
|
|
cleaned_description = clean_html(description, is_description=True)
|
|
if cleaned_description and cleaned_description != "*Contenu vide*":
|
|
cleaned_description = html.unescape(cleaned_description)
|
|
md_content.append(cleaned_description)
|
|
else:
|
|
md_content.append("*Aucune description fournie*")
|
|
else:
|
|
md_content.append("*Aucune description fournie*")
|
|
md_content.append("") # saut de ligne
|
|
|
|
# Ajouter les messages
|
|
messages = []
|
|
if "messages" in data:
|
|
messages = data.get("messages", [])
|
|
|
|
if not messages:
|
|
md_content.append("## Messages")
|
|
md_content.append("")
|
|
md_content.append("*Aucun message disponible*")
|
|
else:
|
|
# Filtrer les messages système non pertinents
|
|
filtered_messages = []
|
|
for msg in messages:
|
|
# Ignorer les messages système vides
|
|
if msg.get("is_system", False) and not msg.get("body", "").strip():
|
|
continue
|
|
|
|
# Ignorer les changements d'état sans contenu
|
|
if msg.get("is_stage_change", False) and not msg.get("body", "").strip():
|
|
# Sauf si on veut les garder pour la traçabilité
|
|
filtered_messages.append(msg)
|
|
continue
|
|
|
|
filtered_messages.append(msg)
|
|
|
|
# Si nous avons au moins un message significatif
|
|
if filtered_messages:
|
|
md_content.append("## Messages")
|
|
md_content.append("")
|
|
|
|
# Trier les messages par date
|
|
filtered_messages.sort(key=lambda x: x.get("date", ""))
|
|
|
|
for i, message in enumerate(filtered_messages):
|
|
if not isinstance(message, dict):
|
|
continue
|
|
|
|
# Déterminer l'auteur du message
|
|
author = "Système"
|
|
author_details = message.get("author_details", {})
|
|
if author_details and author_details.get("name"):
|
|
author = author_details.get("name")
|
|
else:
|
|
author_id = message.get("author_id", [])
|
|
if isinstance(author_id, list) and len(author_id) > 1:
|
|
author = author_id[1]
|
|
|
|
# Formater la date
|
|
date = format_date(message.get("date", ""))
|
|
|
|
# Récupérer le corps du message, en privilégiant body_original (HTML) si disponible
|
|
if "body_original" in message and message["body_original"]:
|
|
body = message["body_original"]
|
|
# Nettoyer le corps HTML avec clean_html
|
|
cleaned_body = clean_html(body, is_description=False)
|
|
else:
|
|
# Utiliser body directement (déjà en texte/markdown) sans passer par clean_html
|
|
body = message.get("body", "")
|
|
cleaned_body = body # Pas besoin de nettoyer car déjà en texte brut
|
|
|
|
# Déterminer le type de message
|
|
message_type = ""
|
|
if message.get("is_stage_change", False):
|
|
message_type = "Changement d'état"
|
|
elif message.get("is_system", False):
|
|
message_type = "Système"
|
|
elif message.get("is_note", False):
|
|
message_type = "Commentaire"
|
|
elif message.get("email_from", False):
|
|
message_type = "E-mail"
|
|
|
|
# Récupérer le sujet du message
|
|
subject = message.get("subject", "")
|
|
|
|
# Créer l'en-tête du message
|
|
md_content.append(f"### Message {i+1}")
|
|
md_content.append(f"**author_id**: {author}")
|
|
md_content.append(f"**date**: {date}")
|
|
md_content.append(f"**message_type**: {message_type}")
|
|
if subject:
|
|
md_content.append(f"**subject**: {subject}")
|
|
|
|
# Ajouter l'ID du message si disponible
|
|
message_id = message.get("id", "")
|
|
if message_id:
|
|
md_content.append(f"**id**: {message_id}")
|
|
|
|
# Ajouter le corps nettoyé du message
|
|
if cleaned_body:
|
|
cleaned_body = clean_newlines(cleaned_body)
|
|
md_content.append(cleaned_body)
|
|
else:
|
|
md_content.append("*Contenu vide*")
|
|
|
|
# Ajouter les pièces jointes si elles existent
|
|
attachment_ids = message.get("attachment_ids", [])
|
|
has_attachments = False
|
|
|
|
# Vérifier si les pièces jointes existent et ne sont pas vides
|
|
if attachment_ids:
|
|
# Récupérer les informations des pièces jointes
|
|
valid_attachments = []
|
|
if isinstance(attachment_ids, list) and all(isinstance(id, int) for id in attachment_ids):
|
|
# Chercher les informations des pièces jointes dans attachments_info.json
|
|
attachments_info_path = os.path.join(ticket_dir, "attachments_info.json")
|
|
if os.path.exists(attachments_info_path):
|
|
try:
|
|
with open(attachments_info_path, 'r', encoding='utf-8') as f:
|
|
attachments_info = json.load(f)
|
|
for attachment_id in attachment_ids:
|
|
for attachment_info in attachments_info:
|
|
if attachment_info.get("id") == attachment_id:
|
|
valid_attachments.append(attachment_info)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire attachments_info.json: {e}")
|
|
elif isinstance(attachment_ids, list):
|
|
for att in attachment_ids:
|
|
if isinstance(att, list) and len(att) > 1:
|
|
valid_attachments.append(att)
|
|
|
|
if valid_attachments:
|
|
has_attachments = True
|
|
md_content.append("")
|
|
md_content.append("**attachment_ids**:")
|
|
for att in valid_attachments:
|
|
if isinstance(att, list) and len(att) > 1:
|
|
md_content.append(f"- {att[1]}")
|
|
elif isinstance(att, dict):
|
|
att_id = att.get("id", "")
|
|
name = att.get("name", "Pièce jointe sans nom")
|
|
mimetype = att.get("mimetype", "Type inconnu")
|
|
md_content.append(f"- {name} ({mimetype}) [ID: {att_id}]")
|
|
|
|
md_content.append("")
|
|
md_content.append("---")
|
|
md_content.append("")
|
|
|
|
# Ajouter une section pour les pièces jointes du ticket si elles existent
|
|
attachment_data = {}
|
|
attachment_path = os.path.join(ticket_dir, "attachments.json")
|
|
if os.path.exists(attachment_path):
|
|
try:
|
|
with open(attachment_path, 'r', encoding='utf-8') as f:
|
|
attachment_data = json.load(f)
|
|
except Exception as e:
|
|
print(f"Avertissement: Impossible de lire attachments.json: {e}")
|
|
|
|
if attachment_data and "attachments" in attachment_data:
|
|
attachments = attachment_data.get("attachments", [])
|
|
if attachments:
|
|
md_content.append("## Pièces jointes")
|
|
md_content.append("")
|
|
md_content.append("| Nom | Type | Taille | Date |")
|
|
md_content.append("|-----|------|--------|------|")
|
|
|
|
for att in attachments:
|
|
name = att.get("name", "")
|
|
mimetype = att.get("mimetype", "")
|
|
file_size = att.get("file_size", 0)
|
|
size_str = f"{file_size / 1024:.1f} KB" if file_size else ""
|
|
create_date = format_date(att.get("create_date", ""))
|
|
|
|
md_content.append(f"| {name} | {mimetype} | {size_str} | {create_date} |")
|
|
|
|
md_content.append("")
|
|
|
|
# Ajouter des informations sur l'extraction
|
|
extract_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
|
md_content.append("## Informations sur l'extraction")
|
|
md_content.append("")
|
|
md_content.append(f"- **Date d'extraction**: {extract_time}")
|
|
md_content.append(f"- **Répertoire**: {ticket_dir}")
|
|
|
|
# Écrire le contenu dans le fichier de sortie
|
|
try:
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write("\n".join(md_content))
|
|
print(f"Rapport Markdown créé : {output_file}")
|
|
# Appeler le script markdown_to_json.py
|
|
subprocess.run(['python', 'utils/markdown_to_json.py', output_file, json_output_file], check=True)
|
|
print(f"Fichier JSON créé : {json_output_file}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Erreur lors de l'écriture du fichier Markdown: {e}")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Convertir les fichiers JSON de tickets en Markdown")
|
|
parser.add_argument("--ticket_code", "-t", help="Code du ticket à convertir (ex: T11067)")
|
|
parser.add_argument("--date_dir", "-d", help="Dossier spécifique par date, optionnel (ex: 20250403_155134)")
|
|
parser.add_argument("--input_dir", "-i", default="output", help="Dossier racine contenant les tickets")
|
|
parser.add_argument("--output_name", "-o", default="rapport.md", help="Nom du fichier Markdown à générer")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.ticket_code:
|
|
print("Erreur : Vous devez spécifier un code de ticket. Exemple : -t T11067")
|
|
sys.exit(1)
|
|
|
|
# Construire le chemin d'entrée
|
|
ticket_dir = f"{args.input_dir}/ticket_{args.ticket_code}"
|
|
|
|
if args.date_dir:
|
|
ticket_dir = f"{ticket_dir}/{args.ticket_code}_{args.date_dir}"
|
|
else:
|
|
# Trouver le dossier le plus récent
|
|
import glob
|
|
date_dirs = glob.glob(f"{ticket_dir}/{args.ticket_code}_*")
|
|
if date_dirs:
|
|
ticket_dir = max(date_dirs) # Prend le plus récent par ordre alphabétique
|
|
|
|
json_file = f"{ticket_dir}/all_messages.json"
|
|
|
|
|
|
if not os.path.exists(json_file):
|
|
print(f"Erreur : Le fichier {json_file} n'existe pas.")
|
|
sys.exit(1)
|
|
|
|
if create_markdown_from_json(json_file, None):
|
|
print(f"Rapport Markdown créé.")
|
|
else:
|
|
print("Échec de la création du rapport Markdown")
|
|
sys.exit(1) |