mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 10:46:51 +01:00
32 lines
993 B
Python
32 lines
993 B
Python
import json
|
|
from pathlib import Path
|
|
|
|
# Emplacement du dictionnaire JSON
|
|
DICT_PATH = Path(__file__).parent / "ocr_clean_dict.json"
|
|
|
|
def load_cleaning_dict(path=DICT_PATH):
|
|
"""Charge le dictionnaire de nettoyage depuis un fichier JSON."""
|
|
if not path.exists():
|
|
return {}
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
def clean_ocr_text(text, cleaning_dict=None):
|
|
"""
|
|
Applique les corrections du dictionnaire à un texte OCR ou traduit.
|
|
"""
|
|
if cleaning_dict is None:
|
|
cleaning_dict = load_cleaning_dict()
|
|
for wrong, correct in cleaning_dict.items():
|
|
text = text.replace(wrong, correct)
|
|
return text
|
|
|
|
def add_to_cleaning_dict(wrong, correct, path=DICT_PATH):
|
|
"""
|
|
Ajoute une nouvelle paire d'erreur/correction au dictionnaire.
|
|
"""
|
|
data = load_cleaning_dict(path)
|
|
data[wrong] = correct
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|