mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-16 03:37:47 +01:00
69 lines
2.6 KiB
Python
69 lines
2.6 KiB
Python
import os
|
||
from pathlib import Path
|
||
from shutil import copyfile
|
||
from PIL import Image
|
||
from ocr_utils import pytesseract, AUTO_DETECT_LANGUAGE, completer_mots_tronques
|
||
from ocr_preprocessor import preprocess_image, PREPROCESSING_PROFILES
|
||
|
||
# === CONFIGURATION ===
|
||
DOSSIER_IMAGES = "images_test"
|
||
DOSSIER_RESULTATS = "ocr_test_results"
|
||
EXTENSIONS_AUTORISEES = [".jpg", ".jpeg", ".png", ".tiff", ".bmp"]
|
||
PROFILS_A_TESTER = ["default", "document", "aggressive"]
|
||
PSM_MODES = [3, 4, 6, 11, 12]
|
||
OEM = 3
|
||
|
||
def est_image_valide(fichier):
|
||
return any(fichier.lower().endswith(ext) for ext in EXTENSIONS_AUTORISEES)
|
||
|
||
def detect_lang(img: Image.Image):
|
||
return "fra+eng" if AUTO_DETECT_LANGUAGE else "eng"
|
||
|
||
def lancer_tests(dossier_images):
|
||
images = [f for f in Path(dossier_images).glob("*") if est_image_valide(str(f))]
|
||
|
||
if not images:
|
||
print("❌ Aucune image valide trouvée.")
|
||
return
|
||
|
||
for image_path in images:
|
||
base_name = Path(image_path).stem
|
||
print(f"\n🖼️ Traitement de l'image : {image_path.name}")
|
||
|
||
for profil in PROFILS_A_TESTER:
|
||
print(f" 🔧 Profil de prétraitement : {profil}")
|
||
settings = PREPROCESSING_PROFILES[profil].copy()
|
||
|
||
dossier_profil = Path(DOSSIER_RESULTATS) / base_name / profil
|
||
dossier_profil.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Copie de l’image originale (1 fois par profil)
|
||
copyfile(image_path, dossier_profil / "image.png")
|
||
|
||
try:
|
||
img_prep = preprocess_image(str(image_path), **settings)
|
||
img_prep.save(dossier_profil / "optimized.png")
|
||
|
||
ocr_lang = detect_lang(img_prep)
|
||
|
||
for psm in PSM_MODES:
|
||
print(f" ⚙️ PSM={psm}...", end="")
|
||
config = f"--psm {psm} --oem {OEM} -l {ocr_lang} -c preserve_interword_spaces=1"
|
||
texte = pytesseract.image_to_string(img_prep, config=config)
|
||
|
||
texte_corrige = completer_mots_tronques(texte)
|
||
if len(texte_corrige) >= len(texte) * 0.9:
|
||
texte = texte_corrige
|
||
|
||
# Écriture du fichier texte
|
||
with open(dossier_profil / f"psm{psm}.txt", "w", encoding="utf-8") as f:
|
||
f.write(texte.strip())
|
||
|
||
print(f" ✅ {len(texte.strip())} caractères.")
|
||
except Exception as e:
|
||
print(f" ❌ Erreur avec profil {profil}: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
os.makedirs(DOSSIER_RESULTATS, exist_ok=True)
|
||
lancer_tests(DOSSIER_IMAGES)
|