0705-17:12

2026-02-04 08:50:24 +01:00 · 2025-05-07 17:12:50 +02:00 · 2025-05-07 17:12:50 +02:00 · 6b96513438
commit 6b96513438
parent 211a570559
26 changed files with 725 additions and 162 deletions
--- a/agents/llama_vision/agent_vision_ocr.py
+++ b/agents/llama_vision/agent_vision_ocr.py
@ -7,9 +7,9 @@ from pathlib import Path
 from ..base_agent import BaseAgent
 from ..utils.pipeline_logger import sauvegarder_donnees
-from utils.ocr_cleaner import clean_text_with_profiles  # AJOUT
+from utils.ocr_avance.ocr_cleaner import clean_text_with_profiles
-from utils.ocr_utils import extraire_texte
+from utils.ocr_brut.ocr_utils import extraire_texte
-from utils.image_preparer import prepare_image_for_llama_vision
+from utils.ocr_avance.image_preparer import prepare_image_for_llama_vision
 logger = logging.getLogger("AgentVisionOCR")
@ -161,7 +161,9 @@ This prompt is designed to generalize across all web portals, technical forms, o
            # ✅ Étape 3 : Préparation de l'image pour le modèle Vision
            image_stem = Path(image_path).stem
-            vision_ready_path = os.path.join("debug_ocr", f"vision_ready_{image_stem}.png")
+            # Utiliser le nouveau chemin pour les résultats OCR avancé
            os.makedirs("results/ocr_avance", exist_ok=True)
            vision_ready_path = os.path.join("results/ocr_avance", f"vision_ready_{image_stem}.png")
            prepare_image_for_llama_vision(image_path, vision_ready_path)
            # Étape 4 : Appel au modèle avec image traitée
@ -176,6 +178,15 @@ This prompt is designed to generalize across all web portals, technical forms, o
                        getattr(self.llm, "modele", "llama3-vision-90b-instruct"))
            model_name = model_name.replace(".", "-").replace(":", "-").replace("_", "-")
            # Sauvegarde du résultat dans results/ocr_avance
            try:
                result_dir = "results/ocr_avance"
                os.makedirs(result_dir, exist_ok=True)
                with open(f"{result_dir}/ocr_{image_stem}.txt", "w", encoding="utf-8") as f:
                    f.write(cleaned_text)
            except Exception as e:
                logger.error(f"[OCR-LLM] Erreur sauvegarde texte: {e}")
            result = {
                "extracted_text": cleaned_text,
                "image_name": image_name,
--- a/docs/ARCHITECTURE_OCR.md
+++ b/docs/ARCHITECTURE_OCR.md
@ -0,0 +1,63 @@
 # Architecture OCR Réorganisée
 ## Structure des Répertoires
 ```
 project/
 │
 ├── utils/
 │   ├── ocr_brut/                  # OCR basique (Tesseract)
 │   │   ├── ocr_utils.py           # Fonctions d'extraction de texte 
 │   │   ├── ocr_preprocessor.py    # Prétraitement d'images pour OCR
 │   │   └── README.md              # Documentation
 │   │
 │   ├── ocr_avance/                # OCR avancé (Llama Vision)
 │   │   ├── image_preparer.py      # Préparation pour modèle Llama Vision
 │   │   ├── ocr_cleaner.py         # Nettoyage et amélioration du texte
 │   │   ├── ocr_clean_dict.json    # Dictionnaire de correction OCR
 │   │   ├── hallucination_filter.json  # Filtres pour hallucinations
 │   │   ├── translation_clean_dict.json  # Corrections pour traduction
 │   │   └── README.md              # Documentation
 │   │
 │   └── __init__.py                # Compatibilité avec imports existants
 │
 ├── agents/
 │   └── llama_vision/
 │       └── agent_vision_ocr.py    # Agent principal utilisant les modules OCR
 │
 ├── results/
 │   ├── ocr_brut/                  # Résultats OCR Tesseract
 │   ├── ocr_avance/                # Résultats OCR Llama Vision
 │   └── README.md                  # Documentation
 │
 └── test_agent_ocr.py              # Script de test principal
 ```
 ## Circuit de Traitement OCR
 1. **OCR Brut (Tesseract)**
   - Sélection d'un profil de prétraitement (default, document, aggressive, clahe_high, invert_light)
   - Prétraitement de l'image avec `ocr_preprocessor.py`
   - Extraction du texte avec Tesseract via `ocr_utils.py`
   - Sauvegarde des résultats dans `results/ocr_brut/`
 2. **OCR Avancé (Llama Vision)**
   - Préparation de l'image pour le modèle avec `image_preparer.py`
   - Envoi au modèle par l'agent `AgentVisionOCR`
   - Nettoyage du texte extrait avec `ocr_cleaner.py`
   - Sauvegarde des résultats dans `results/ocr_avance/`
 ## Configuration Actuelle
 - **OCR Brut** : Profil "document" avec PSM=11, OEM=3
 - **OCR Avancé** : Modèle Llama Vision avec image redimensionnée à 672x672
 ## Tests
 Le script `test_agent_ocr.py` permet de tester l'ensemble du circuit avec une image de test.
 ## Notes Importantes
 1. Les fichiers dans `utils/` servent uniquement à la compatibilité avec les imports existants
 2. Les résultats sont stockés dans des répertoires séparés pour plus de clarté
 3. Le répertoire `ocr_brut_test/` a été conservé pour des tests ultérieurs" 
--- a/docs/RESUME_OCR.md
+++ b/docs/RESUME_OCR.md
@ -0,0 +1,54 @@
 # Résumé des Modifications du Système OCR
 ## 1. Optimisation de l'OCR Brut (Tesseract)
 - **Configuration optimale** : 
  - Profil de prétraitement "document" adapté aux documents administratifs
  - PSM=11 (sparse text) pour une meilleure extraction
  - OEM=3 (mode par défaut)
 - **Simplification du code** :
  - Suppression du redimensionnement redondant dans ocr_utils.py
  - Uniformisation des chemins de sortie vers results/ocr_brut/
  - Amélioration de la gestion des erreurs
 ## 2. Optimisation de l'OCR Avancé (Llama Vision)
 - **Préparation des images** :
  - Standardisation de toutes les images à 672x672 pixels
  - Conservation des proportions avec padding
  - Sortie unique vers results/ocr_avance/
 - **Nettoyage du texte** :
  - Dictionnaires de nettoyage séparés par profil
  - Système modulaire pour activer différents niveaux de correction
 ## 3. Réorganisation de l'Architecture
 - **Séparation claire des modules** :
  - OCR brut (Tesseract) dans utils/ocr_brut/
  - OCR avancé (Llama Vision) dans utils/ocr_avance/
  - Résultats dans results/ocr_brut/ et results/ocr_avance/
 - **Documentation complète** :
  - README pour chaque module
  - Documentation ARCHITECTURE_OCR.md pour la vue d'ensemble
  - Avertissements dans les fichiers obsolètes
 ## 4. Tests et Compatibilité
 - **Maintien de la compatibilité** :
  - Module utils/__init__.py pour assurer la transition
  - Conservation des fichiers originaux avec avertissements
  - Duplication des fichiers critiques dans utils/ocrbrut/ pour archivage
 - **Test agent_ocr.py** :
  - Mise à jour pour utiliser les nouveaux chemins
  - Création automatique des répertoires de résultats
  - Affichage clair des chemins de sortie
 ## 5. Prochaines Étapes Possibles
 - Suppression des fichiers obsolètes une fois la transition terminée
 - Optimisation supplémentaire des profils de prétraitement
 - Développement de nouveaux dictionnaires de correction 
--- a/155
+++ b/155
@ -1,155 +0,0 @@
 2025-05-07 08:46:02 OpenVPN 2.6.12 [git:v2.6.12/038a94bae57a446c] Windows [SSL (OpenSSL)] [LZO] [LZ4] [PKCS11] [AEAD] [DCO] built on Jul 18 2024
 2025-05-07 08:46:02 Windows version 10.0 (Windows 10 or greater), amd64 executable
 2025-05-07 08:46:02 library versions: OpenSSL 3.3.1 4 Jun 2024, LZO 2.10
 2025-05-07 08:46:02 DCO version: 1.2.1
 2025-05-07 08:46:02 TCP/UDP: Preserving recently used remote address: [AF_INET]37.71.248.18:1111
 2025-05-07 08:46:02 UDPv4 link local: (not bound)
 2025-05-07 08:46:02 UDPv4 link remote: [AF_INET]37.71.248.18:1111
 2025-05-07 08:46:02 [vpn.cbao.fr] Peer Connection Initiated with [AF_INET]37.71.248.18:1111
 2025-05-07 08:46:04 open_tun
 2025-05-07 08:46:04 tap-windows6 device [Connexion au réseau local] opened
 2025-05-07 08:46:04 Set TAP-Windows TUN subnet mode network/local/netmask = 10.8.1.0/10.8.1.12/255.255.255.0 [SUCCEEDED]
 2025-05-07 08:46:04 Notified TAP-Windows driver to set a DHCP IP/netmask of 10.8.1.12/255.255.255.0 on interface {2CCBC607-261F-452C-8E9E-893870FF0EAE} [DHCP-serv: 10.8.1.0, lease-time: 31536000]
 2025-05-07 08:46:04 Successful ARP Flush on interface [6] {2CCBC607-261F-452C-8E9E-893870FF0EAE}
 2025-05-07 08:46:04 IPv4 MTU set to 1500 on interface 6 using service
 2025-05-07 08:46:09 Initialization Sequence Completed
 2025-05-07 13:46:02 [vpn.cbao.fr] Inactivity timeout (--ping-restart), restarting
 2025-05-07 13:46:02 SIGUSR1[soft,ping-restart] received, process restarting
 2025-05-07 13:46:03 TCP/UDP: Preserving recently used remote address: [AF_INET]37.71.248.18:1111
 2025-05-07 13:46:03 UDPv4 link local: (not bound)
 2025-05-07 13:46:03 UDPv4 link remote: [AF_INET]37.71.248.18:1111
 2025-05-07 13:46:05 [vpn.cbao.fr] Peer Connection Initiated with [AF_INET]37.71.248.18:1111
 2025-05-07 13:46:05 Preserving previous TUN/TAP instance: Connexion au réseau local
 2025-05-07 13:46:05 Initialization Sequence Completed
 dev tun
 persist-tun
 persist-key
 data-ciphers AES-256-GCM:AES-256-CBC
 data-ciphers-fallback AES-256-CBC
 auth SHA256
 tls-client
 client
 resolv-retry infinite
 remote 37.71.248.18 1111 udp4
 nobind
 auth-user-pass
 remote-cert-tls server
 explicit-exit-notify
 auth-nocache
 <ca>
 -----BEGIN CERTIFICATE-----
 MIIFBTCCA22gAwIBAgIIejdwM+00dLcwDQYJKoZIhvcNAQENBQAwWzEPMA0GA1UE
 AxQGQ0FfVlBOMQswCQYDVQQGEwJGUjELMAkGA1UECBMCUE8xEjAQBgNVBAcTCVBl
 cnBpZ25hbjENMAsGA1UEChMEQ0JBTzELMAkGA1UECxMCSVQwHhcNMjMwOTI5MDgx
 MjM2WhcNMzMwOTI2MDgxMjM2WjBbMQ8wDQYDVQQDFAZDQV9WUE4xCzAJBgNVBAYT
 AkZSMQswCQYDVQQIEwJQTzESMBAGA1UEBxMJUGVycGlnbmFuMQ0wCwYDVQQKEwRD
 QkFPMQswCQYDVQQLEwJJVDCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGB
 AKEpm8PH2IOLzN3YeTEw17uIxipWNXDhtG7eqWgzXrus2r4UGeH692zb59I5Krrn
 4JIWIEvAxylDkGucaRj1fAB2kHfDoiT/13ivbje0PjDNlCYNScaN0brz5/Q+CHzn
 eFA6LyjXXT/IdpduRW5pL5BsEFWGHwgCT68YG0rfIqRMfEqYHCwd16yH2kywNgqd
 xC61CEmp3plIlng+8DYaSZ6Si4UoJ2xtYWzFyH0EafH2R9Vhd6xgw9HqVD5LkTNS
 qj7imL2oqIcjx1xbBKi1jgXrv2emCYJKmU2yE4DGxnHIR4tHwgrw2rsA6EobTg3c
 GM/DwW6IchRO6AgxThvpTglDerSI3eZ7P+ezCLu1DVzDSpC4bUp5ZFLB7tR01EqK
 8LxFXdOFF9ynJNc5v6GjMF5yi1P65bYUG9gFDFF4NDx0ZYUTyNlyzAJgtuJPivRD
 milL+MQYEsrA4cdc1Em9ssSK1AGwX/987DxlGM/KRilNLGGUpRVr0Bn7S3YJTEkk
 IwIDAQABo4HMMIHJMB0GA1UdDgQWBBRNjdxfpJmKOG2uv4aIxfQDOSH3HzCBjAYD
 VR0jBIGEMIGBgBRNjdxfpJmKOG2uv4aIxfQDOSH3H6FfpF0wWzEPMA0GA1UEAxQG
 Q0FfVlBOMQswCQYDVQQGEwJGUjELMAkGA1UECBMCUE8xEjAQBgNVBAcTCVBlcnBp
 Z25hbjENMAsGA1UEChMEQ0JBTzELMAkGA1UECxMCSVSCCHo3cDPtNHS3MAwGA1Ud
 EwQFMAMBAf8wCwYDVR0PBAQDAgEGMA0GCSqGSIb3DQEBDQUAA4IBgQBqxHDMi2cn
 UHc25JH1cEqOaiBfPKOyQuDXFeXQuc6lIuWOoiH4C/XGdMwxx9zP/WLpliVV4Wfq
 PicjadSr6T77m5M55qPTFL6zG/oeKbHg+YHGIEdHMnMJPEon1nDx9lQsQFFoz9F/
 cAKMTGjxuCQbTqRnOf4mCSsi0vtn0SgYwcoNuJhAOpP4OJHu9nbUaLlx8VJMONku
 4P2EHWro/2UEqldrp1xkH2kXwx7u4LJr916z6IHdfTu4pMkr+yTcrM0EM1aVdC08
 LqKj+WXcefcP6YZqajMgVbrAmq5JPLEmY4IiAl52b+kMEHp+mBfI+gvrJMMFZSz1
 /n1U949EVAafklr/FqD9HBlgesZtbNsflybhrFF4+CE9/9Mp9YZ/nvBGkxiaXSBA
 Xr4Ftq48GMk/abpA3MCuH4UWlMO3RZLSD727umoOko2BPbNqMmkvEL4hTvCTliAr
 ThV23Aasyc9zy977HivaLeJpsKNCMC+C83LYATMab7hhQ9c2BwIj4Fs=
 -----END CERTIFICATE-----
 </ca>
 <cert>
 -----BEGIN CERTIFICATE-----
 MIIE1jCCAz6gAwIBAgIBETANBgkqhkiG9w0BAQsFADBbMQ8wDQYDVQQDFAZDQV9W
 UE4xCzAJBgNVBAYTAkZSMQswCQYDVQQIEwJQTzESMBAGA1UEBxMJUGVycGlnbmFu
 MQ0wCwYDVQQKEwRDQkFPMQswCQYDVQQLEwJJVDAeFw0yNTAzMTMwODI5NTJaFw0z
 NTAzMTEwODI5NTJaMFoxDjAMBgNVBAMTBWZncmFzMQswCQYDVQQGEwJGUjELMAkG
 A1UECBMCUE8xEjAQBgNVBAcTCVBlcnBpZ25hbjENMAsGA1UEChMEQ0JBTzELMAkG
 A1UECxMCSVQwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQChIgHQkTkw
 QePtBYlMc0lFtL1AUQSX3k7BV41BjDVIBo/k3BF/TkNzFm+dpFRVFsgWP62XNRXO
 pIIujFVRNfmckjby8r3dcFdFVJjb0uimjAUxFf2tf5PPLgBojZX/6QVpcsApJQp9
 5D+oSVPudHqqCC2raBVW7ov9d9B+2GgBc9DwNd6gJplZNbUaxTgqwBrdC6sCO45/
 nzKdXSKan7cdfWY5N8VbYR89rzwdkBnoI7qp8csKONymcXIgypHjj1MC7LdvOAYW
 6/PV8DJuzFneQmClRBRQBrCIu0lyGvAys5drdzllBllZ6xjm/cr9l4ShHGfGWtr1
 D7FaJ4umVyflAgMBAAGjggEkMIIBIDAJBgNVHRMEAjAAMAsGA1UdDwQEAwIF4DAx
 BglghkgBhvhCAQ0EJBYiT3BlblNTTCBHZW5lcmF0ZWQgVXNlciBDZXJ0aWZpY2F0
 ZTAdBgNVHQ4EFgQUmb4rkgQoE6tQM2FxUpnPPcuc1A4wgYwGA1UdIwSBhDCBgYAU
 TY3cX6SZijhtrr+GiMX0Azkh9x+hX6RdMFsxDzANBgNVBAMUBkNBX1ZQTjELMAkG
 A1UEBhMCRlIxCzAJBgNVBAgTAlBPMRIwEAYDVQQHEwlQZXJwaWduYW4xDTALBgNV
 BAoTBENCQU8xCzAJBgNVBAsTAklUggh6N3Az7TR0tzATBgNVHSUEDDAKBggrBgEF
 BQcDAjAQBgNVHREECTAHggVmZ3JhczANBgkqhkiG9w0BAQsFAAOCAYEAPhSURAu3
 XNV+gV5r6OGHXZMRmgUNjpXyJVo5MjSEgnBBqRwGfIcBd0VpNfZnrWqy1DMROGkI
 g5aJ04Az5aD3CzUPfKcB8tAM4wT8+DteRZkGcMl7ZOqX++KoLsnA8AAejFinJ5FC
 ZFnb16r/HpOw4tEZQfvGZ/FS8IyU8urLPk1IgOIKAqD0xZQNh73eeQnCIeS7RMpS
 XVWDIGY/FlO+vBHqsusg3HlqRd4BQkxg22eKQFag01F9qCuu7VtRqiFH6G9RF0yU
 UqkOCJU5HR+0CnGhKVM7SKIinGxPB1XskgrHdlpUckf4rlSDKxnX5OD+ooPz70Ex
 L9hm0FmrHWVfCWjvZ0yDD8Sn2RslGT2XXZK57pRrh321SBFQDxIJgGlEmgBpjCaV
 OqTw67AxG0ay9R2dQjVz1P/iTaiWLaeCQnMfNpFX0H9P+XPigw0xy6zepW6bxRDg
 OYVgf+XV0yArTrnSJx9+/jW9xKRm+2DSRypxLWbZROJ5SErfpmGRX1NQ
 -----END CERTIFICATE-----
 </cert>
 <key>
 -----BEGIN PRIVATE KEY-----
 MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQChIgHQkTkwQePt
 BYlMc0lFtL1AUQSX3k7BV41BjDVIBo/k3BF/TkNzFm+dpFRVFsgWP62XNRXOpIIu
 jFVRNfmckjby8r3dcFdFVJjb0uimjAUxFf2tf5PPLgBojZX/6QVpcsApJQp95D+o
 SVPudHqqCC2raBVW7ov9d9B+2GgBc9DwNd6gJplZNbUaxTgqwBrdC6sCO45/nzKd
 XSKan7cdfWY5N8VbYR89rzwdkBnoI7qp8csKONymcXIgypHjj1MC7LdvOAYW6/PV
 8DJuzFneQmClRBRQBrCIu0lyGvAys5drdzllBllZ6xjm/cr9l4ShHGfGWtr1D7Fa
 J4umVyflAgMBAAECggEBAIMkZWu1tmDTT8lJ9zv0nI8SEfF1vxJHibuMIVhW38qW
 JKj0f30oWnchrIgWBdkW6JRLEWJ+fxgnBhkSeCHXlydaTUSgUe0XEMBJoPtQha9/
 SH5x0nxR2CAH5acyjQGySohPL5yNHCPoD/NMcvYhcPBQJbNay/trvR33VQbX5JZA
 vMtZ822PcnklWUV+JUyk4McW8sAOw7UTCYaJMObwjAdkvi0cTpXOC6kp2Cn9leXW
 Qvd6z26mXfk2gFgidkEKQkMNy5Np3/Nxssrth1+36WqQpMawZhWvrBtLnF21TUJW
 AuwOtAmaaTver38X81doLVKNjr/oV4MTKmBC8iklLMECgYEA06Z/SlznvQaTolS2
 hs9fu7En5jEgt+6b/chUUpRePPsCMG44T7CJNGtJ6rNnaJ3pC+zmvVh+xO9Sk6IB
 OWgyNJ5I/R5tXrSyTwSTJXoUEwq394mjG/b7nfcw0PrQumHbuFj/q6o3NIvNiDui
 KtfnzX4tVftaKCeOcEtJKuaznlECgYEAwuWciItvbmqaRHHAme5/yYYED4BsRbCY
 Kk8WOZ4NSGSfyMhonPXSpSuwF66yZmk864olSbWsTjXzGs13KX9NOeXCjUkohAY5
 tbps2CLoo/aDprjg4ZaWmXkFi6ceTzdoNIgSGf+Gro3P82fRPWLqcXfmWihPJh47
 3tvEVrKxZ1UCgYArgRrDD0u3CAYKpP0Lws57xxNbdpeyFwLLbIUgoEyqnjG6AL3k
 a3YYZ0E/U/cagvLnN5/KJcmQ81x26iL0SN2hATQhi0KR5/SK13bjii9cJqTf5dO4
 KNFZi/jly9hhp0HBp9GN2KQWUfJCYXeY9N452Ai7lrnWbSMTI/Z7MgaTsQKBgQCA
 GfOdPCjt5luniS0TAFQ13URl+/8ufzhE9t6g2GXao2jyy+cW4+yka+a+ajEQZzOH
 jbGclTC/5232u/4K5IWZ/I631tIulPjxnatVFPzcaHd36iTFofkyvv0KIbomT0DK
 5nUfaXjY2pVIY4CAXLfEBQ7/S2daopyviruuUJ2SUQKBgD1t4JloeG5yxXZmWLYY
 QIXAJifNx3/U64mcXoRWZQukxMe3NB169f5Kbu2aAWS0Y+wa6WTFp+q6UwkHOqOm
 dsFXc4IaYJICZQDpiuhqZi80c+TOJS5qUSHK5wKs4b1y2hxPeHNPoAWni+8292PB
 ICZWzYno1a07gWGJeBhWSmsm
 -----END PRIVATE KEY-----
 </key>
 key-direction 1
 <tls-auth>
 #
 # 2048 bit OpenVPN static key
 #
 -----BEGIN OpenVPN Static key V1-----
 ad7131884e4148812622148d14af6d13
 6ea1e84f92132eb8e5a18c56d908ddf2
 97ae8b7b47d47e673b78b660dc91dace
 7a97ce5934c20f49824bf8b7017b35ed
 3a2fd7af967b2af243c1ca9482bb34d1
 6e93478e695b9c86b29b5b79f472e67c
 740a09a341be6c2c1b13d4468e705768
 5dced700e2339a1e6f8b95165c869ec6
 9e24c48b4c9127ec99a9a9e7c7c63ac5
 aba05723e611ebbf76e343dcd1822b41
 245413c9398ee4b5d96087f3b360cb7b
 046576c1a45c3a804e9b958439c7f4cd
 2cc93666db736b8d9d62b522b61c6b01
 1d024d1b2c90fcee6c549a50e6e31062
 1e096b1d41ee6b22e53488c2d071429d
 e42476b191c7f9767e157aa9e45798b2
 -----END OpenVPN Static key V1-----
 </tls-auth>
--- a/results/README.md
+++ b/results/README.md
@ -0,0 +1,31 @@
 # Résultats des Traitements OCR
 Ce répertoire contient les résultats des différents traitements OCR effectués sur les images.
 ## Structure
 - `ocr_brut/` : Résultats de l'OCR basique (Tesseract)
  - Images prétraitées par les différents profils
  - Fichiers texte extraits des images
 - `ocr_avance/` : Résultats de l'OCR avancé (Llama Vision)
  - Images préparées pour le modèle Llama Vision
  - Texte extrait par le modèle avec structure enrichie
 ## Organisation des Fichiers
 ### OCR Brut
 - `preprocessed/[profil]/*.png` : Images prétraitées par profil
 - `optimized_*.png` : Version finale prétraitée
 - `ocr_*.txt` : Texte extrait par Tesseract
 ### OCR Avancé
 - `vision_ready_*.png` : Images préparées pour Llama Vision (672x672)
 - `ocr_*.txt` : Texte structuré extrait par le modèle
 ## Utilisation
 Ces résultats peuvent être utilisés pour :
 - Comparer les performances des différentes méthodes d'OCR
 - Vérifier les étapes de prétraitement des images
 - Fournir des données d'entraînement pour améliorer les dictionnaires de correction" 
--- a/results/ocr_avance/vision_ready_image_145435.png
+++ b/results/ocr_avance/vision_ready_image_145435.png
--- a/results/ocr_brut/ocr_image_145435.png.txt
+++ b/results/ocr_brut/ocr_image_145435.png.txt
@ -0,0 +1,45 @@
 =
 [e)
 “
 giraudibrg-lsb.com/BRG-LAB/PAGE programmetssai/zE4AAHEVNGOAA
 BAGLAB C9 Béton C9 Fouméseu labo © Masse
 FAT :1 Essai au bleu de méthylène (MB) - NF EN 933-9 (02-2022)
 £E Victor
 Echantilion _n°25-00075 réceptionné le 02/04/2028 par BOLLÉE Victor - prélevé le 02/04/2025 por BOLLEE Victor, n° prélbvement : 2500078
 Matériau Sable 0/2 C - CARRIERE ADCEG
 V HISTORIQUE
 72
 [VU Essai [ve marée ][M For Uo |[v osservanons
 [
 ]
 NREGISTRER
 MPRMER
 le l'essai
 2025
 (RE GIRAUD
 les statistiques
 lessai
 A
 impossible de trouver adresse IP du serveur de zk1.brg-lab.com.
--- a/results/ocr_brut/optimized_image_145435.png
+++ b/results/ocr_brut/optimized_image_145435.png
--- a/results/ocr_brut/preprocessed/document/optimized_image_145435.png
+++ b/results/ocr_brut/preprocessed/document/optimized_image_145435.png
--- a/test_agent_ocr.py
+++ b/test_agent_ocr.py
@ -1,6 +1,11 @@
 from agents.llama_vision.agent_vision_ocr import AgentVisionOCR
 from llm_classes.llama_vision import LlamaVision
 import json
 import os
 # Créer les répertoires de résultats s'ils n'existent pas
 os.makedirs("results/ocr_brut", exist_ok=True)
 os.makedirs("results/ocr_avance", exist_ok=True)
 # Instanciation du modèle
 model = LlamaVision()
@ -17,3 +22,7 @@ image_path = "output/ticket_T11143/T11143_20250422_084617/attachments/image_1454
 res = agent.executer(image_path)
 print(f"\n🔵 Résultat pour l'image {image_path}:")
 print(f"Texte extrait:\n{res['extracted_text']}\n")
 # Affichage des chemins de sortie
 print(f"✅ Résultats OCR brut sauvegardés dans: results/ocr_brut/")
 print(f"✅ Résultats OCR avancé sauvegardés dans: results/ocr_avance/")
--- a/utils/init.py
+++ b/utils/init.py
@ -5,5 +5,21 @@ Package utils contenant des utilitaires pour le traitement d'images et autres fo
 # Pas besoin de préfixer avec 'utils.' quand on est déjà dans le package
 from .image_dedup import filtrer_images_uniques
 # Compatibilité avec les anciens imports après la réorganisation des fichiers OCR
 from utils.ocr_brut import extraire_texte, extraire_texte_fr, preprocess_image, preprocess_image_with_profile, PREPROCESSING_PROFILES
 from utils.ocr_avance import prepare_image_for_llama_vision, clean_text_with_profiles
 # Exposer les fonctions principales
-__all__ = ['filtrer_images_uniques'] 
+__all__ = [
    # OCR Brut (Tesseract)
    'extraire_texte',
    'extraire_texte_fr',
    'preprocess_image',
    'preprocess_image_with_profile',
    'PREPROCESSING_PROFILES',
    # OCR Avancé (Llama Vision)
    'prepare_image_for_llama_vision',
    'clean_text_with_profiles',
    'filtrer_images_uniques'
 ] 
--- a/utils/image_preparer.py
+++ b/utils/image_preparer.py
@ -1,5 +1,12 @@
 """
 !!! FICHIER OBSOLÈTE !!!
 Ce fichier est maintenu uniquement pour la compatibilité.
 Veuillez utiliser les modules dans utils/ocr_avance/ à la place.
 """
 from PIL import Image, ImageOps
 from pathlib import Path
 import os
 BICUBIC = Image.Resampling.BICUBIC  # Nouvelle façon d'accéder à BICUBIC
--- a/utils/ocr_avance/README.md
+++ b/utils/ocr_avance/README.md
@ -0,0 +1,32 @@
 # Module OCR Avancé (Llama Vision)
 Ce module contient les outils pour l'extraction de texte avancée utilisant le modèle Llama Vision.
 ## Fichiers principaux
 - `image_preparer.py` : Prépare les images pour être utilisées avec le modèle Llama Vision
 - `ocr_cleaner.py` : Nettoie et améliore le texte extrait par l'OCR
 - Dictionnaires de nettoyage :
  - `ocr_clean_dict.json` : Corrections de base pour l'OCR
  - `hallucination_filter.json` : Filtres pour les hallucinations du modèle
  - `translation_clean_dict.json` : Corrections pour les erreurs de traduction
 ## Utilisation
 ```python
 from utils.ocr_avance import prepare_image_for_llama_vision, clean_text_with_profiles
 # Préparation d'une image pour Llama Vision
 image_pretraitee = prepare_image_for_llama_vision("chemin/vers/image.jpg")
 # Nettoyage du texte extrait
 texte_propre = clean_text_with_profiles(texte_brut, active_profiles=("ocr", "hallucination"))
 ```
 ## Résultats
 Les images prétraitées et les résultats de l'OCR sont sauvegardés dans le répertoire `results/ocr_avance/`.
 ## Intégration avec l'agent
 Ce module est utilisé par l'agent `AgentVisionOCR` situé dans `agents/llama_vision/agent_vision_ocr.py`. 
--- a/utils/ocr_avance/init.py
+++ b/utils/ocr_avance/init.py
@ -0,0 +1,8 @@
 from utils.ocr_avance.image_preparer import prepare_image_for_llama_vision
 from utils.ocr_avance.ocr_cleaner import clean_text_with_profiles
 # Compatibilité rétroactive
 __all__ = [
    'prepare_image_for_llama_vision',
    'clean_text_with_profiles'
 ] 
--- a/utils/ocr_avance/hallucination_filter.json
+++ b/utils/ocr_avance/hallucination_filter.json
@ -0,0 +1,10 @@
 {
    "The following information is fictional": "",
    "This content is autogenerated and may not reflect reality": "",
    "Lorem ipsum": "",
    "As an AI language model": "",
    "Note: The above is a sample output": "",
    "BRG-LAB is a fictional laboratory": "BRG-LAB",
    "This is a placeholder text": ""
  }
--- a/utils/ocr_avance/image_preparer.py
+++ b/utils/ocr_avance/image_preparer.py
@ -0,0 +1,32 @@
 from PIL import Image, ImageOps
 from pathlib import Path
 import os
 from typing import Optional
 BICUBIC = Image.Resampling.BICUBIC  # Nouvelle façon d'accéder à BICUBIC
 def prepare_image_for_llama_vision(input_path: str, output_path: Optional[str] = None, size=(672, 672)) -> str:
    """Prépare une image pour être utilisée avec le modèle Llama Vision en la redimensionnant et en ajoutant du padding."""
    from PIL import ImageOps, Image
    # Si aucun chemin de sortie n'est spécifié, utiliser le répertoire par défaut
    if output_path is None:
        os.makedirs("results/ocr_avance", exist_ok=True)
        image_name = Path(input_path).stem
        output_path = f"results/ocr_avance/vision_ready_{image_name}.png"
    img = Image.open(input_path)
    if img.mode != "RGB":
        img = img.convert("RGB")
    # Redimensionne en conservant le ratio
    img.thumbnail(size, Image.Resampling.BICUBIC)
    # Ajoute du padding pour obtenir exactement 672x672
    padded_img = Image.new("RGB", size, (255, 255, 255))  # fond blanc
    offset = ((size[0] - img.width) // 2, (size[1] - img.height) // 2)
    padded_img.paste(img, offset)
    padded_img.save(output_path, format="PNG", optimize=True)
    return output_path
--- a/utils/ocr_avance/ocr_clean_dict.json
+++ b/utils/ocr_avance/ocr_clean_dict.json
@ -0,0 +1,8 @@
 {
    "zkt1.brg-lab.com": "zk1.brg-lab.com",
    "ADEO": "ADCEG",
    "ADEIG": "ADCEG",
    "RA.Z.": "RAZ",
    "NF EN 9933-9": "NF EN 933-9"
  }
--- a/utils/ocr_avance/ocr_cleaner.py
+++ b/utils/ocr_avance/ocr_cleaner.py
@ -0,0 +1,59 @@
 import json
 from pathlib import Path
 # 🧩 Dictionnaires disponibles (clés = profils activables)
 CLEAN_DICT_FILES = {
    "ocr": "ocr_clean_dict.json",
    "translation": "translation_clean_dict.json",
    "hallucination": "hallucination_filter.json"
 }
 # 📁 Chemin racine de tous les dictionnaires
 BASE_PATH = Path(__file__).parent
 def load_cleaning_dict(path):
    """Charge un dictionnaire de nettoyage JSON."""
    if not path.exists():
        return {}
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)
 def load_multiple_dicts(active_keys):
    """Charge et fusionne plusieurs dictionnaires selon les profils sélectionnés."""
    merged_dict = {}
    for key in active_keys:
        filename = CLEAN_DICT_FILES.get(key)
        if filename:
            path = BASE_PATH / filename
            data = load_cleaning_dict(path)
            merged_dict.update(data)
    return merged_dict
 def clean_ocr_text(text, cleaning_dict=None):
    """Applique les corrections d’un dictionnaire sur un texte."""
    if cleaning_dict is None:
        return text
    for wrong, correct in cleaning_dict.items():
        text = text.replace(wrong, correct)
    return text
 def clean_text_with_profiles(text, active_profiles=("ocr",)):
    """
    Nettoie un texte avec un ou plusieurs profils activés.
    Profils possibles : "ocr", "translation", "hallucination"
    """
    cleaning_dict = load_multiple_dicts(active_profiles)
    return clean_ocr_text(text, cleaning_dict)
 def add_to_cleaning_dict(wrong, correct, profile="ocr"):
    """
    Ajoute une paire (erreur, correction) à un dictionnaire spécifique.
    """
    filename = CLEAN_DICT_FILES.get(profile)
    if not filename:
        raise ValueError(f"Profil inconnu : {profile}")
    path = BASE_PATH / filename
    data = load_cleaning_dict(path)
    data[wrong] = correct
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
--- a/utils/ocr_avance/translation_clean_dict.json
+++ b/utils/ocr_avance/translation_clean_dict.json
@ -0,0 +1,12 @@
 {
    "bearing capacity": "capacité portante",
    "liquid limit": "limite de liquidité",
    "air voids": "vides d'air",
    "CEMENT": "ciment",
    "AGGREGATE": "granulat",
    "IT IS NOT RELEVANT": "NON APPLICABLE",
    "SPECIMEN": "ÉCHANTILLON",
    "trial mixture": "mélange d'essai",
    "test": "essai"
  }
--- a/utils/ocr_brut/README.md
+++ b/utils/ocr_brut/README.md
@ -0,0 +1,30 @@
 # Module OCR Brut (Tesseract)
 Ce module contient les outils pour l'extraction de texte basique à partir d'images en utilisant Tesseract OCR.
 ## Fichiers principaux
 - `ocr_utils.py` : Fonctions principales d'extraction de texte avec Tesseract
 - `ocr_preprocessor.py` : Prétraitement d'images pour améliorer la qualité de l'OCR
 ## Configuration par défaut
 - Profil de prétraitement : "document"
 - PSM (Page Segmentation Mode) : 11 (Texte sparse)
 - OEM (OCR Engine Mode) : 3 (par défaut)
 ## Utilisation
 ```python
 from utils.ocr_brut import extraire_texte, extraire_texte_fr
 # OCR multilingue avec détection automatique
 texte, image_optimisee = extraire_texte("chemin/vers/image.jpg")
 # OCR français optimisé
 texte_fr = extraire_texte_fr("chemin/vers/image.jpg")
 ```
 ## Résultats
 Les résultats du prétraitement et de l'OCR sont sauvegardés dans le répertoire `results/ocr_brut/`. 
--- a/utils/ocr_brut/init.py
+++ b/utils/ocr_brut/init.py
@ -0,0 +1,11 @@
 from utils.ocr_brut.ocr_utils import extraire_texte, extraire_texte_fr
 from utils.ocr_brut.ocr_preprocessor import preprocess_image, preprocess_image_with_profile, PREPROCESSING_PROFILES
 # Compatibilité rétroactive
 __all__ = [
    'extraire_texte', 
    'extraire_texte_fr',
    'preprocess_image', 
    'preprocess_image_with_profile',
    'PREPROCESSING_PROFILES'
 ] 
--- a/utils/ocr_brut/ocr_preprocessor.py
+++ b/utils/ocr_brut/ocr_preprocessor.py
@ -0,0 +1,159 @@
 # ocr_preprocessor.py
 import os
 from PIL import Image, ImageEnhance
 import cv2
 import numpy as np
 import time
 # === 🎛️ PROFILS DE TRAITEMENT D'IMAGE OCR ===
 PREPROCESSING_PROFILES = {
    "default": {
        "resize_min_dim": 1000,        # 📏 Si largeur/hauteur < 1000px, image agrandie proportionnellement
        "enhance_contrast": True,      # 🔆 Active l'amélioration du contraste
        "contrast_factor": 1.2,        # >1 = plus contrasté, typique : 1.2 à 1.8
        "enhance_sharpness": False,    # 💥 Active la netteté
        "sharpness_factor": 1.0,       # >1 = plus net, typique : 1.2 à 2.0
        "apply_denoising": False,      # 🚿 Réduction de bruit
        "denoise_strength": {
            "h": 0,                    # 0 à 15 : intensité du lissage luminance
            "hColor": 0,               # 0 à 15 : lissage chroma
            "templateWindowSize": 7,   # Taille du patch à comparer (typiquement 7)
            "searchWindowSize": 21     # Zone autour du patch pour recherche (typiquement 21)
        },
        "invert_colors": False,        # ↕️ Inversion si texte clair sur fond sombre
        "apply_clahe": False,          # 📈 Égalisation du contraste local (utile en cas de zones très sombres/claires)
        "save_debug_output": True,
        "debug_output_dir": "results/ocr_brut/preprocessed"
    },
    "aggressive": {
        "resize_min_dim": 1400,
        "enhance_contrast": True,
        "contrast_factor": 1.8,
        "enhance_sharpness": True,
        "sharpness_factor": 1.5,
        "apply_denoising": True,
        "denoise_strength": {
            "h": 10,
            "hColor": 10,
            "templateWindowSize": 7,
            "searchWindowSize": 21
        },
        "invert_colors": False,
        "apply_clahe": False,
        "save_debug_output": True,
        "debug_output_dir": "results/ocr_brut/preprocessed"
    },
    "document": {
        "resize_min_dim": 1100,
        "enhance_contrast": True,
        "contrast_factor": 1.2,
        "enhance_sharpness": False,
        "sharpness_factor": 1.0,
        "apply_denoising": False,
        "denoise_strength": {"h": 0, "hColor": 0, "templateWindowSize": 7, "searchWindowSize": 21},
        "invert_colors": False,
        "apply_clahe": False,
        "save_debug_output": True,
        "debug_output_dir": "results/ocr_brut/preprocessed"
    },
    "clahe_high": {
        "resize_min_dim": 1200,
        "enhance_contrast": True,
        "contrast_factor": 1.4,
        "enhance_sharpness": True,
        "sharpness_factor": 1.3,
        "apply_denoising": True,
        "denoise_strength": {
            "h": 7,
            "hColor": 7,
            "templateWindowSize": 7,     # Taille du patch local utilisé
            "searchWindowSize": 21       # Zone de recherche du filtre
        },
        "invert_colors": False,
        "apply_clahe": True,
        "save_debug_output": True,
        "debug_output_dir": "results/ocr_brut/preprocessed"
    },
    "invert_light": {
        "resize_min_dim": 1200,
        "enhance_contrast": True,
        "contrast_factor": 1.3,
        "enhance_sharpness": True,
        "sharpness_factor": 1.4,
        "apply_denoising": False,
        "invert_colors": True,
        "apply_clahe": False,
        "save_debug_output": True,
        "debug_output_dir": "results/ocr_brut/preprocessed"
    }
 }
 def preprocess_image(image_path: str, **settings) -> Image.Image:
    img = Image.open(image_path).convert("RGB")
    base_name = os.path.basename(image_path)
    # Gestion des dossiers de debug
    debug_dir = settings.get("debug_output_dir", "results/ocr_brut/preprocessed")
    profile_name = settings.get("profile_name", "default")  # Ajout du nom du profil
    debug_profile_dir = os.path.join(debug_dir, profile_name)
    os.makedirs(debug_profile_dir, exist_ok=True)
    # Redimensionnement
    if settings.get("resize_min_dim", 0) > 0:
        width, height = img.size
        min_dim = min(width, height)
        if min_dim < settings["resize_min_dim"]:
            scale = settings["resize_min_dim"] / min_dim
            new_size = (int(width * scale), int(height * scale))
            img = img.resize(new_size, Image.Resampling.BICUBIC)
    # Contraste
    if settings.get("enhance_contrast", False):
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(settings.get("contrast_factor", 1.5))
    # Netteté
    if settings.get("enhance_sharpness", False):
        enhancer = ImageEnhance.Sharpness(img)
        img = enhancer.enhance(settings.get("sharpness_factor", 1.5))
    # Convert to OpenCV image
    img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    # Débruitage
    if settings.get("apply_denoising", False):
        strength = settings.get("denoise_strength", {})
        img_cv = cv2.fastNlMeansDenoisingColored(
            img_cv,
            None,
            h=strength.get("h", 10),
            hColor=strength.get("hColor", 10),
            templateWindowSize=strength.get("templateWindowSize", 7),
            searchWindowSize=strength.get("searchWindowSize", 21)
        )
    # CLAHE
    if settings.get("apply_clahe", False):
        lab = cv2.cvtColor(img_cv, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        cl = clahe.apply(l)
        img_cv = cv2.merge((cl, a, b))
        img_cv = cv2.cvtColor(img_cv, cv2.COLOR_LAB2BGR)
    # Inversion
    if settings.get("invert_colors", False):
        img_cv = cv2.bitwise_not(img_cv)
    # Sauvegarde image prétraitée (debug)
    if settings.get("save_debug_output", False):
        debug_path = os.path.join(debug_profile_dir, f"optimized_{base_name}")
        cv2.imwrite(debug_path, img_cv)
    return Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
 def preprocess_image_with_profile(image_path: str, profile_name="default") -> Image.Image:
    settings = PREPROCESSING_PROFILES[profile_name].copy()  # On fait une copie pour ne pas modifier l'original
    settings["profile_name"] = profile_name  # On ajoute le nom du profil aux paramètres
    return preprocess_image(image_path, **settings)
--- a/utils/ocr_brut/ocr_utils.py
+++ b/utils/ocr_brut/ocr_utils.py
@ -0,0 +1,103 @@
 import pytesseract
 import cv2
 import numpy as np
 import os
 from pathlib import Path
 from PIL import Image
 from langdetect import detect
 import re
 from utils.ocr_brut.ocr_preprocessor import preprocess_image_with_profile
 # ⬇️ PARAMÈTRES CENTRAUX D'ACTIVATION ⬇️
 USE_PREPROCESSING = True         # Active le prétraitement de l'image via ocr_preprocessor.py
 USE_TEXT_CORRECTION = True       # Corrige les mots tronqués après OCR
 SAVE_DEBUG_OUTPUT = True        # Sauvegarde image + texte dans debug_ocr/
 AUTO_DETECT_LANGUAGE = True      # Détecte automatiquement la langue
 # Paramètres Tesseract optimaux selon tests
 OCR_DEFAULT_PSM = 11             # Page segmentation mode 11 (sparse text)
 OCR_DEFAULT_OEM = 3              # OCR Engine mode 3 (default, based on what is available)
 OCR_DEFAULT_PROFILE = "document"  # Profil de prétraitement optimal selon tests
 # Complétion de mots tronqués (rudimentaire mais utile)
 def completer_mots_tronques(texte):
    lignes = texte.splitlines()
    lignes_corrigees = []
    for ligne in lignes:
        if ligne.strip().endswith("-"):
            ligne = ligne.strip()[:-1]
        lignes_corrigees.append(ligne)
    return "\n".join(lignes_corrigees)
 # Détection de langue automatique (si activée)
 def detect_language_tesseract(image_cv):
    try:
        text_sample = pytesseract.image_to_string(image_cv, config="--psm 6")
        lang = detect(text_sample)
        return {
            "fr": "fra",
            "en": "eng"
        }.get(lang, "fra+eng")
    except:
        return "fra+eng"
 # OCR principal
 def extraire_texte(image_path, lang="auto"):
    # Vérification de l'existence de l'image
    if not os.path.exists(image_path):
        print(f"[OCR] Image non trouvée: {image_path}")
        return "", None
    # Prétraitement de l'image avec le profil optimal
    if USE_PREPROCESSING:
        img_optimized = preprocess_image_with_profile(image_path, profile_name=OCR_DEFAULT_PROFILE)
    else:
        img_optimized = Image.open(image_path)
    # Détection de langue
    ocr_lang = lang
    if lang == "auto" and AUTO_DETECT_LANGUAGE:
        ocr_lang = detect_language_tesseract(img_optimized)
    if ocr_lang == "auto":
        ocr_lang = "fra+eng"
    # OCR avec paramètres optimaux
    config = f"--psm {OCR_DEFAULT_PSM} --oem {OCR_DEFAULT_OEM} -l {ocr_lang}"
    texte = pytesseract.image_to_string(img_optimized, config=config)
    # Correction des mots tronqués
    if USE_TEXT_CORRECTION:
        texte_corrige = completer_mots_tronques(texte)
        if len(texte_corrige) >= len(texte) * 0.9:
            texte = texte_corrige
    # Sauvegarde debug (si activée)
    if SAVE_DEBUG_OUTPUT and texte:
        try:
            debug_dir = "results/ocr_brut"
            os.makedirs(debug_dir, exist_ok=True)
            image_name = Path(image_path).stem
            # Conversion si image PIL
            if isinstance(img_optimized, Image.Image):
                img_optimized = np.array(img_optimized)
                if img_optimized.ndim == 3 and img_optimized.shape[2] == 3:
                    img_optimized = cv2.cvtColor(img_optimized, cv2.COLOR_RGB2BGR)
                elif img_optimized.ndim == 3 and img_optimized.shape[2] == 4:
                    img_optimized = cv2.cvtColor(img_optimized, cv2.COLOR_RGBA2BGR)
            if isinstance(img_optimized, np.ndarray):
                cv2.imwrite(f"{debug_dir}/optimized_{image_name}.png", img_optimized)
            with open(f"{debug_dir}/ocr_{image_name}.png.txt", "w", encoding="utf-8") as f:
                f.write(texte)
        except Exception as e:
            print(f"[OCR DEBUG] Erreur de sauvegarde debug: {e}")
    return texte, img_optimized
 # Raccourci rapide pour juste récupérer le texte en français
 def extraire_texte_fr(image_path):
    texte, _ = extraire_texte(image_path, lang="fra")
    return texte
--- a/utils/ocr_cleaner.py
+++ b/utils/ocr_cleaner.py
@ -1,3 +1,9 @@
 """
 !!! FICHIER OBSOLÈTE !!!
 Ce fichier est maintenu uniquement pour la compatibilité.
 Veuillez utiliser les modules dans utils/ocr_avance/ à la place.
 """
 import json
 from pathlib import Path
@ -30,7 +36,7 @@ def load_multiple_dicts(active_keys):
    return merged_dict
 def clean_ocr_text(text, cleaning_dict=None):
-    """Applique les corrections d’un dictionnaire sur un texte."""
+    """Applique les corrections d'un dictionnaire sur un texte."""
    if cleaning_dict is None:
        return text
    for wrong, correct in cleaning_dict.items():
--- a/utils/ocr_preprocessor.py
+++ b/utils/ocr_preprocessor.py
@ -1,3 +1,9 @@
 """
 !!! FICHIER OBSOLÈTE !!!
 Ce fichier est maintenu uniquement pour la compatibilité.
 Veuillez utiliser les modules dans utils/ocr_brut/ à la place.
 """
 # ocr_preprocessor.py
 import os
--- a/utils/ocr_utils.py
+++ b/utils/ocr_utils.py
@ -1,3 +1,9 @@
 """
 !!! FICHIER OBSOLÈTE !!!
 Ce fichier est maintenu uniquement pour la compatibilité.
 Veuillez utiliser les modules dans utils/ocr_brut/ à la place.
 """
 import pytesseract
 import cv2
 import numpy as np
@ -6,7 +12,7 @@ from pathlib import Path
 from PIL import Image
 from langdetect import detect
 import re
-from utils.ocr_preprocessor import preprocess_image_with_profile
+from utils.ocr_brut.ocr_preprocessor import preprocess_image_with_profile
 # ⬇️ PARAMÈTRES CENTRAUX D'ACTIVATION ⬇️
 USE_PREPROCESSING = True         # Active le prétraitement de l'image via ocr_preprocessor.py