mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 10:46:51 +01:00
180 lines
5.6 KiB
Python
180 lines
5.6 KiB
Python
import os
|
|
import re
|
|
import json
|
|
import sys
|
|
|
|
def parse_markdown(md_content):
|
|
data = {}
|
|
|
|
# Diviser le contenu en sections
|
|
sections = re.split(r"\n## ", md_content)
|
|
|
|
# Traiter chaque section
|
|
for section in sections:
|
|
if section.startswith("Informations du ticket"):
|
|
ticket_info = parse_ticket_info(section)
|
|
data.update(ticket_info)
|
|
elif section.startswith("Messages"):
|
|
messages = parse_messages(section)
|
|
data["messages"] = messages
|
|
elif section.startswith("Informations sur l'extraction"):
|
|
extraction_info = parse_extraction_info(section)
|
|
data.update(extraction_info)
|
|
|
|
# Réorganiser les champs pour que la description soit après "name"
|
|
ordered_fields = ["id", "code", "name", "description"]
|
|
ordered_data = {}
|
|
|
|
# D'abord ajouter les champs dans l'ordre spécifié
|
|
for field in ordered_fields:
|
|
if field in data:
|
|
ordered_data[field] = data[field]
|
|
|
|
# Ensuite ajouter les autres champs
|
|
for key, value in data.items():
|
|
if key not in ordered_data:
|
|
ordered_data[key] = value
|
|
|
|
return ordered_data
|
|
|
|
def parse_ticket_info(section):
|
|
info = {}
|
|
description = []
|
|
capturing_description = False
|
|
|
|
lines = section.strip().split("\n")
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
|
|
# Si on est déjà en train de capturer la description
|
|
if capturing_description:
|
|
# Vérifie si on atteint une nouvelle section ou un nouveau champ
|
|
if i + 1 < len(lines) and (lines[i + 1].startswith("## ") or lines[i + 1].startswith("- **")):
|
|
capturing_description = False
|
|
info["description"] = "\n".join(description).strip()
|
|
else:
|
|
description.append(line)
|
|
i += 1
|
|
continue
|
|
|
|
# Détecte le début de la description
|
|
desc_match = re.match(r"- \*\*description\*\*:", line)
|
|
if desc_match:
|
|
capturing_description = True
|
|
i += 1 # Passe à la ligne suivante
|
|
continue
|
|
|
|
# Traite les autres champs normalement
|
|
match = re.match(r"- \*\*(.*?)\*\*: (.*)", line)
|
|
if match:
|
|
key, value = match.groups()
|
|
key = key.lower().replace("/", "_").replace(" ", "_")
|
|
info[key] = value.strip()
|
|
|
|
i += 1
|
|
|
|
# Si on finit en capturant la description, l'ajouter au dictionnaire
|
|
if capturing_description and description:
|
|
info["description"] = "\n".join(description).strip()
|
|
elif "description" not in info:
|
|
info["description"] = ""
|
|
|
|
return info
|
|
|
|
def parse_messages(section):
|
|
messages = []
|
|
current_message = {}
|
|
in_message = False
|
|
|
|
lines = section.strip().split("\n")
|
|
|
|
for line in lines:
|
|
if line.startswith("### Message"):
|
|
if current_message:
|
|
messages.append(current_message)
|
|
current_message = {}
|
|
in_message = True
|
|
|
|
elif line.startswith("**") and in_message:
|
|
match = re.match(r"\*\*(.*?)\*\*: (.*)", line)
|
|
if match:
|
|
key, value = match.groups()
|
|
key = key.lower().replace("/", "_").replace(" ", "_")
|
|
current_message[key] = value.strip()
|
|
else:
|
|
if in_message:
|
|
current_message["content"] = current_message.get("content", "") + line + "\n"
|
|
|
|
if current_message:
|
|
messages.append(current_message)
|
|
|
|
return messages
|
|
|
|
|
|
def parse_extraction_info(section):
|
|
extraction_info = {}
|
|
|
|
lines = section.strip().split("\n")
|
|
for line in lines:
|
|
match = re.match(r"- \*\*(.*?)\*\*: (.*)", line)
|
|
if match:
|
|
key, value = match.groups()
|
|
key = key.lower().replace("/", "_").replace(" ", "_")
|
|
extraction_info[key] = value.strip()
|
|
|
|
return extraction_info
|
|
|
|
|
|
def convert_markdown_to_json(md_file_path, output_file_path):
|
|
with open(md_file_path, 'r', encoding='utf-8') as f:
|
|
md_content = f.read()
|
|
|
|
data = parse_markdown(md_content)
|
|
|
|
# S'assurer que la description est présente
|
|
if "description" not in data:
|
|
# Trouver l'index de "name" pour insérer la description après
|
|
if "name" in data:
|
|
ordered_data = {}
|
|
for key, value in data.items():
|
|
ordered_data[key] = value
|
|
if key == "name":
|
|
ordered_data["description"] = ""
|
|
data = ordered_data
|
|
else:
|
|
data["description"] = ""
|
|
|
|
with open(output_file_path, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=4, ensure_ascii=False)
|
|
|
|
print(f"Conversion terminée. Fichier JSON créé : {output_file_path}")
|
|
|
|
# Fonction simplifiée pour être utilisée comme import
|
|
def markdown_to_json(md_file_path, output_file_path):
|
|
"""
|
|
Convertit un fichier Markdown en fichier JSON.
|
|
|
|
Args:
|
|
md_file_path: Chemin du fichier Markdown à convertir
|
|
output_file_path: Chemin du fichier JSON à créer
|
|
|
|
Returns:
|
|
bool: True si la conversion a réussi, False sinon
|
|
"""
|
|
try:
|
|
convert_markdown_to_json(md_file_path, output_file_path)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Erreur lors de la conversion Markdown → JSON: {e}")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 3:
|
|
print("Utilisation : python markdown_to_json.py <fichier_markdown.md> <fichier_sortie.json>")
|
|
sys.exit(1)
|
|
|
|
md_file = sys.argv[1]
|
|
output_file = sys.argv[2]
|
|
|
|
convert_markdown_to_json(md_file, output_file) |