llm_ticket3/formatters/markdown_to_json.py
2025-04-09 13:36:42 +02:00

180 lines
5.6 KiB
Python

import os
import re
import json
import sys
def parse_markdown(md_content):
data = {}
# Diviser le contenu en sections
sections = re.split(r"\n## ", md_content)
# Traiter chaque section
for section in sections:
if section.startswith("Informations du ticket"):
ticket_info = parse_ticket_info(section)
data.update(ticket_info)
elif section.startswith("Messages"):
messages = parse_messages(section)
data["messages"] = messages
elif section.startswith("Informations sur l'extraction"):
extraction_info = parse_extraction_info(section)
data.update(extraction_info)
# Réorganiser les champs pour que la description soit après "name"
ordered_fields = ["id", "code", "name", "description"]
ordered_data = {}
# D'abord ajouter les champs dans l'ordre spécifié
for field in ordered_fields:
if field in data:
ordered_data[field] = data[field]
# Ensuite ajouter les autres champs
for key, value in data.items():
if key not in ordered_data:
ordered_data[key] = value
return ordered_data
def parse_ticket_info(section):
info = {}
description = []
capturing_description = False
lines = section.strip().split("\n")
i = 0
while i < len(lines):
line = lines[i]
# Si on est déjà en train de capturer la description
if capturing_description:
# Vérifie si on atteint une nouvelle section ou un nouveau champ
if i + 1 < len(lines) and (lines[i + 1].startswith("## ") or lines[i + 1].startswith("- **")):
capturing_description = False
info["description"] = "\n".join(description).strip()
else:
description.append(line)
i += 1
continue
# Détecte le début de la description
desc_match = re.match(r"- \*\*description\*\*:", line)
if desc_match:
capturing_description = True
i += 1 # Passe à la ligne suivante
continue
# Traite les autres champs normalement
match = re.match(r"- \*\*(.*?)\*\*: (.*)", line)
if match:
key, value = match.groups()
key = key.lower().replace("/", "_").replace(" ", "_")
info[key] = value.strip()
i += 1
# Si on finit en capturant la description, l'ajouter au dictionnaire
if capturing_description and description:
info["description"] = "\n".join(description).strip()
elif "description" not in info:
info["description"] = ""
return info
def parse_messages(section):
messages = []
current_message = {}
in_message = False
lines = section.strip().split("\n")
for line in lines:
if line.startswith("### Message"):
if current_message:
messages.append(current_message)
current_message = {}
in_message = True
elif line.startswith("**") and in_message:
match = re.match(r"\*\*(.*?)\*\*: (.*)", line)
if match:
key, value = match.groups()
key = key.lower().replace("/", "_").replace(" ", "_")
current_message[key] = value.strip()
else:
if in_message:
current_message["content"] = current_message.get("content", "") + line + "\n"
if current_message:
messages.append(current_message)
return messages
def parse_extraction_info(section):
extraction_info = {}
lines = section.strip().split("\n")
for line in lines:
match = re.match(r"- \*\*(.*?)\*\*: (.*)", line)
if match:
key, value = match.groups()
key = key.lower().replace("/", "_").replace(" ", "_")
extraction_info[key] = value.strip()
return extraction_info
def convert_markdown_to_json(md_file_path, output_file_path):
with open(md_file_path, 'r', encoding='utf-8') as f:
md_content = f.read()
data = parse_markdown(md_content)
# S'assurer que la description est présente
if "description" not in data:
# Trouver l'index de "name" pour insérer la description après
if "name" in data:
ordered_data = {}
for key, value in data.items():
ordered_data[key] = value
if key == "name":
ordered_data["description"] = ""
data = ordered_data
else:
data["description"] = ""
with open(output_file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=4, ensure_ascii=False)
print(f"Conversion terminée. Fichier JSON créé : {output_file_path}")
# Fonction simplifiée pour être utilisée comme import
def markdown_to_json(md_file_path, output_file_path):
"""
Convertit un fichier Markdown en fichier JSON.
Args:
md_file_path: Chemin du fichier Markdown à convertir
output_file_path: Chemin du fichier JSON à créer
Returns:
bool: True si la conversion a réussi, False sinon
"""
try:
convert_markdown_to_json(md_file_path, output_file_path)
return True
except Exception as e:
print(f"Erreur lors de la conversion Markdown → JSON: {e}")
return False
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Utilisation : python markdown_to_json.py <fichier_markdown.md> <fichier_sortie.json>")
sys.exit(1)
md_file = sys.argv[1]
output_file = sys.argv[2]
convert_markdown_to_json(md_file, output_file)