mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 10:46:51 +01:00
166 lines
6.2 KiB
Python
166 lines
6.2 KiB
Python
"""
|
|
!!! FICHIER OBSOLÈTE !!!
|
|
Ce fichier est maintenu uniquement pour la compatibilité.
|
|
Veuillez utiliser les modules dans utils/ocr_brut/ à la place.
|
|
"""
|
|
|
|
# ocr_preprocessor.py
|
|
|
|
import os
|
|
from PIL import Image, ImageEnhance
|
|
import cv2
|
|
import numpy as np
|
|
import time
|
|
|
|
# === 🎛️ PROFILS DE TRAITEMENT D'IMAGE OCR ===
|
|
PREPROCESSING_PROFILES = {
|
|
"default": {
|
|
"resize_min_dim": 1000, # 📏 Si largeur/hauteur < 1000px, image agrandie proportionnellement
|
|
"enhance_contrast": True, # 🔆 Active l'amélioration du contraste
|
|
"contrast_factor": 1.2, # >1 = plus contrasté, typique : 1.2 à 1.8
|
|
"enhance_sharpness": False, # 💥 Active la netteté
|
|
"sharpness_factor": 1.0, # >1 = plus net, typique : 1.2 à 2.0
|
|
"apply_denoising": False, # 🚿 Réduction de bruit
|
|
"denoise_strength": {
|
|
"h": 0, # 0 à 15 : intensité du lissage luminance
|
|
"hColor": 0, # 0 à 15 : lissage chroma
|
|
"templateWindowSize": 7, # Taille du patch à comparer (typiquement 7)
|
|
"searchWindowSize": 21 # Zone autour du patch pour recherche (typiquement 21)
|
|
},
|
|
"invert_colors": False, # ↕️ Inversion si texte clair sur fond sombre
|
|
"apply_clahe": False, # 📈 Égalisation du contraste local (utile en cas de zones très sombres/claires)
|
|
"save_debug_output": True,
|
|
"debug_output_dir": "debug_ocr_preprocessed"
|
|
},
|
|
"aggressive": {
|
|
"resize_min_dim": 1400,
|
|
"enhance_contrast": True,
|
|
"contrast_factor": 1.8,
|
|
"enhance_sharpness": True,
|
|
"sharpness_factor": 1.5,
|
|
"apply_denoising": True,
|
|
"denoise_strength": {
|
|
"h": 10,
|
|
"hColor": 10,
|
|
"templateWindowSize": 7,
|
|
"searchWindowSize": 21
|
|
},
|
|
"invert_colors": False,
|
|
"apply_clahe": False,
|
|
"save_debug_output": True,
|
|
"debug_output_dir": "debug_ocr_preprocessed"
|
|
},
|
|
"document": {
|
|
"resize_min_dim": 1100,
|
|
"enhance_contrast": True,
|
|
"contrast_factor": 1.2,
|
|
"enhance_sharpness": False,
|
|
"sharpness_factor": 1.0,
|
|
"apply_denoising": False,
|
|
"denoise_strength": {"h": 0, "hColor": 0, "templateWindowSize": 7, "searchWindowSize": 21},
|
|
"invert_colors": False,
|
|
"apply_clahe": False,
|
|
"save_debug_output": True,
|
|
"debug_output_dir": "debug_ocr_preprocessed"
|
|
},
|
|
"clahe_high": {
|
|
"resize_min_dim": 1200,
|
|
"enhance_contrast": True,
|
|
"contrast_factor": 1.4,
|
|
"enhance_sharpness": True,
|
|
"sharpness_factor": 1.3,
|
|
"apply_denoising": True,
|
|
"denoise_strength": {
|
|
"h": 7,
|
|
"hColor": 7,
|
|
"templateWindowSize": 7, # Taille du patch local utilisé
|
|
"searchWindowSize": 21 # Zone de recherche du filtre
|
|
},
|
|
"invert_colors": False,
|
|
"apply_clahe": True,
|
|
"save_debug_output": True,
|
|
"debug_output_dir": "debug_ocr_preprocessed"
|
|
},
|
|
"invert_light": {
|
|
"resize_min_dim": 1200,
|
|
"enhance_contrast": True,
|
|
"contrast_factor": 1.3,
|
|
"enhance_sharpness": True,
|
|
"sharpness_factor": 1.4,
|
|
"apply_denoising": False,
|
|
"invert_colors": True,
|
|
"apply_clahe": False,
|
|
"save_debug_output": True,
|
|
"debug_output_dir": "debug_ocr_preprocessed"
|
|
}
|
|
}
|
|
|
|
def preprocess_image(image_path: str, **settings) -> Image.Image:
|
|
img = Image.open(image_path).convert("RGB")
|
|
base_name = os.path.basename(image_path)
|
|
|
|
# Gestion des dossiers de debug
|
|
debug_dir = settings.get("debug_output_dir", "debug_ocr_preprocessed")
|
|
profile_name = settings.get("profile_name", "default") # Ajout du nom du profil
|
|
debug_profile_dir = os.path.join(debug_dir, profile_name)
|
|
os.makedirs(debug_profile_dir, exist_ok=True)
|
|
|
|
# Redimensionnement
|
|
if settings.get("resize_min_dim", 0) > 0:
|
|
width, height = img.size
|
|
min_dim = min(width, height)
|
|
if min_dim < settings["resize_min_dim"]:
|
|
scale = settings["resize_min_dim"] / min_dim
|
|
new_size = (int(width * scale), int(height * scale))
|
|
img = img.resize(new_size, Image.Resampling.BICUBIC)
|
|
|
|
# Contraste
|
|
if settings.get("enhance_contrast", False):
|
|
enhancer = ImageEnhance.Contrast(img)
|
|
img = enhancer.enhance(settings.get("contrast_factor", 1.5))
|
|
|
|
# Netteté
|
|
if settings.get("enhance_sharpness", False):
|
|
enhancer = ImageEnhance.Sharpness(img)
|
|
img = enhancer.enhance(settings.get("sharpness_factor", 1.5))
|
|
|
|
# Convert to OpenCV image
|
|
img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
|
|
|
# Débruitage
|
|
if settings.get("apply_denoising", False):
|
|
strength = settings.get("denoise_strength", {})
|
|
img_cv = cv2.fastNlMeansDenoisingColored(
|
|
img_cv,
|
|
None,
|
|
h=strength.get("h", 10),
|
|
hColor=strength.get("hColor", 10),
|
|
templateWindowSize=strength.get("templateWindowSize", 7),
|
|
searchWindowSize=strength.get("searchWindowSize", 21)
|
|
)
|
|
|
|
# CLAHE
|
|
if settings.get("apply_clahe", False):
|
|
lab = cv2.cvtColor(img_cv, cv2.COLOR_BGR2LAB)
|
|
l, a, b = cv2.split(lab)
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
cl = clahe.apply(l)
|
|
img_cv = cv2.merge((cl, a, b))
|
|
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_LAB2BGR)
|
|
|
|
# Inversion
|
|
if settings.get("invert_colors", False):
|
|
img_cv = cv2.bitwise_not(img_cv)
|
|
|
|
# Sauvegarde image prétraitée (debug)
|
|
if settings.get("save_debug_output", False):
|
|
debug_path = os.path.join(debug_profile_dir, f"optimized_{base_name}")
|
|
cv2.imwrite(debug_path, img_cv)
|
|
|
|
return Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
|
|
|
|
def preprocess_image_with_profile(image_path: str, profile_name="default") -> Image.Image:
|
|
settings = PREPROCESSING_PROFILES[profile_name].copy() # On fait une copie pour ne pas modifier l'original
|
|
settings["profile_name"] = profile_name # On ajoute le nom du profil aux paramètres
|
|
return preprocess_image(image_path, **settings)
|