mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-15 20:56:52 +01:00
76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
# utils/translate_utils.py
|
|
|
|
from deep_translator import GoogleTranslator
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
import logging
|
|
|
|
logger = logging.getLogger("Translate")
|
|
|
|
def fr_to_en(text: str) -> str:
|
|
try:
|
|
if not text.strip():
|
|
return ""
|
|
return GoogleTranslator(source="fr", target="en").translate(text)
|
|
except Exception as e:
|
|
logger.error(f"Traduction FR->EN échouée: {e}")
|
|
return ""
|
|
|
|
def en_to_fr(text: str) -> str:
|
|
try:
|
|
if not text.strip():
|
|
return ""
|
|
return GoogleTranslator(source="en", target="fr").translate(text)
|
|
except Exception as e:
|
|
logger.error(f"Traduction EN->FR échouée: {e}")
|
|
return ""
|
|
|
|
def sauvegarder_ocr_traduction(
|
|
image_path: str,
|
|
ticket_id: str,
|
|
ocr_fr: str,
|
|
ocr_en: str,
|
|
ocr_en_back_fr: str = "", # <- Ajout facultatif
|
|
base_dir: str = "reports"
|
|
) -> None:
|
|
"""
|
|
Sauvegarde les résultats OCR + TRAD en JSON + ajoute une ligne dans le fichier texte global.
|
|
Inclut éventuellement une traduction EN → FR.
|
|
"""
|
|
try:
|
|
image_name = os.path.basename(image_path)
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
rapport_dir = os.path.join(base_dir, ticket_id, "pipeline", "ocr_traduction")
|
|
os.makedirs(rapport_dir, exist_ok=True)
|
|
|
|
result = {
|
|
"image_name": image_name,
|
|
"ocr_fr": ocr_fr,
|
|
"translation_en": ocr_en,
|
|
"translation_en_back_fr": ocr_en_back_fr,
|
|
"metadata": {
|
|
"ticket_id": ticket_id,
|
|
"timestamp": timestamp,
|
|
"source_module": "ocr_utils + translate_utils",
|
|
"lang_detected": "fr"
|
|
}
|
|
}
|
|
|
|
# Fichier JSON par image
|
|
with open(os.path.join(rapport_dir, f"{image_name}.json"), "w", encoding="utf-8") as f:
|
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
|
|
# Append texte global
|
|
ligne = (
|
|
f"{image_name}\n"
|
|
f"[FR] {ocr_fr or '_'}\n"
|
|
f"[EN] {ocr_en or '_'}\n"
|
|
f"[EN→FR] {ocr_en_back_fr or '_'}\n\n"
|
|
)
|
|
with open(os.path.join(rapport_dir, "ocr_traduction.txt"), "a", encoding="utf-8") as f:
|
|
f.write(ligne)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Erreur sauvegarde OCR+TRAD pour {image_path}: {e}")
|