mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 09:06:51 +01:00
2304-14:27triok
This commit is contained in:
parent
06b31b8663
commit
3190c5dd02
@ -6,15 +6,15 @@ from datetime import datetime
|
||||
|
||||
from ..base_agent import BaseAgent
|
||||
from ..utils.pipeline_logger import sauvegarder_donnees
|
||||
from ..utils.ocr_utils import extraire_texte_fr
|
||||
from ..utils.translate_utils import fr_to_en, en_to_fr, sauvegarder_ocr_traduction
|
||||
from utils.ocr_utils import extraire_texte_fr
|
||||
from utils.translate_utils import fr_to_en, en_to_fr, sauvegarder_ocr_traduction
|
||||
|
||||
logger = logging.getLogger("AgentImageSorter")
|
||||
|
||||
class AgentImageSorter(BaseAgent):
|
||||
"""
|
||||
Agent de tri d’image optimisé pour llama_vision.
|
||||
Réalise un OCR en français, le traduit en anglais, génère un prompt enrichi, et analyse l’image.
|
||||
Agent de tri d'image optimisé pour llama_vision.
|
||||
Réalise un OCR en français, le traduit en anglais, génère un prompt enrichi, et analyse l'image.
|
||||
"""
|
||||
|
||||
def __init__(self, llm):
|
||||
@ -46,24 +46,36 @@ class AgentImageSorter(BaseAgent):
|
||||
|
||||
def _generer_prompt(self, ocr_fr: str, ocr_en: str) -> str:
|
||||
return (
|
||||
"[ENGLISH RESPONSE REQUESTED]\n\n"
|
||||
"The following image is from a technical support ticket at CBAO "
|
||||
"for the BRG_Lab software system.\n\n"
|
||||
f"OCR detected French text:\n[FR] {ocr_fr or '—'}\n[EN] {ocr_en or '—'}\n\n"
|
||||
"Please analyze the image and determine:\n"
|
||||
"- Is it relevant for a technical support issue?\n"
|
||||
"- Answer only 'oui' or 'non', then briefly explain in French."
|
||||
"- Answer only 'yes' or 'no', then briefly explain why in English."
|
||||
)
|
||||
|
||||
def _analyser_reponse(self, response: str) -> Tuple[bool, str]:
|
||||
r = response.lower()
|
||||
first_line = r.split('\n')[0] if '\n' in r else r.strip()[:50]
|
||||
if first_line.startswith("non"):
|
||||
|
||||
# Détection pour réponses en anglais
|
||||
if first_line.startswith("yes"):
|
||||
return True, response.strip()
|
||||
if first_line.startswith("no"):
|
||||
return False, response.strip()
|
||||
|
||||
# Détection pour réponses en français (fallback)
|
||||
if first_line.startswith("oui"):
|
||||
return True, response.strip()
|
||||
if first_line.startswith("non"):
|
||||
return False, response.strip()
|
||||
|
||||
pos_keywords = ["pertinent", "utile", "interface", "message", "diagnostic"]
|
||||
neg_keywords = ["inutile", "photo", "irrelevant", "hors sujet"]
|
||||
# Analyse basée sur mots-clés (anglais et français)
|
||||
pos_keywords = ["pertinent", "utile", "interface", "message", "diagnostic",
|
||||
"useful", "relevant", "interface", "message", "diagnostic", "helpful"]
|
||||
neg_keywords = ["inutile", "photo", "irrelevant", "hors sujet",
|
||||
"useless", "irrelevant", "unrelated", "not relevant"]
|
||||
score = sum(kw in r for kw in pos_keywords) - sum(kw in r for kw in neg_keywords)
|
||||
return score > 0, response.strip()
|
||||
|
||||
@ -86,10 +98,37 @@ class AgentImageSorter(BaseAgent):
|
||||
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
def _extraire_ticket_id_depuis_path(self, image_path: str) -> str:
|
||||
parts = image_path.split(os.sep)
|
||||
"""
|
||||
Extrait l'ID du ticket depuis le chemin de l'image.
|
||||
Cherche d'abord dans les segments du chemin, puis examine la structure de dossiers.
|
||||
Supporte les formats comme:
|
||||
- output/ticket_T12345/T12345_date/...
|
||||
- .../T12345/...
|
||||
|
||||
Args:
|
||||
image_path: Chemin de l'image
|
||||
|
||||
Returns:
|
||||
ID du ticket ou "unknown_ticket" si non trouvé
|
||||
"""
|
||||
# Normaliser le chemin pour éviter les problèmes de séparateurs
|
||||
norm_path = os.path.normpath(image_path)
|
||||
parts = norm_path.split(os.sep)
|
||||
|
||||
# Première passe: chercher directement un segment "Txxxx"
|
||||
for part in parts:
|
||||
if part.startswith("T") and part[1:].isdigit():
|
||||
return part
|
||||
|
||||
# Deuxième passe: chercher dans la structure de dossiers output/ticket_Txxxx/
|
||||
for i, part in enumerate(parts):
|
||||
if part == "output" and i+1 < len(parts):
|
||||
next_part = parts[i+1]
|
||||
if next_part.startswith("ticket_T"):
|
||||
return next_part.replace("ticket_", "")
|
||||
|
||||
# Pas d'ID de ticket trouvé
|
||||
logger.warning(f"Impossible d'extraire l'ID de ticket depuis le chemin: {image_path}")
|
||||
return "unknown_ticket"
|
||||
|
||||
def executer(self, image_path: str) -> Dict[str, Any]:
|
||||
@ -107,7 +146,7 @@ class AgentImageSorter(BaseAgent):
|
||||
ocr_en_back_fr = en_to_fr(ocr_en) if ocr_en else ""
|
||||
|
||||
# Sauvegarde OCR + Traductions
|
||||
sauvegarder_ocr_traduction(image_path, ticket_id, ocr_fr, ocr_en, ocr_en_back_fr)
|
||||
sauvegarder_ocr_traduction(image_path, ticket_id, ocr_fr, ocr_en, ocr_en_back_fr, base_dir=None)
|
||||
|
||||
# Prompt en anglais enrichi
|
||||
prompt = self._generer_prompt(ocr_fr, ocr_en)
|
||||
@ -143,7 +182,7 @@ class AgentImageSorter(BaseAgent):
|
||||
}
|
||||
}
|
||||
|
||||
sauvegarder_donnees(ticket_id, "tri_image", result, base_dir="reports", is_resultat=True)
|
||||
sauvegarder_donnees(ticket_id, "tri_image", result, base_dir=None, is_resultat=True)
|
||||
self.ajouter_historique("tri_image", {"image_path": image_path, "prompt": prompt}, result)
|
||||
return result
|
||||
|
||||
|
||||
106
check_tesseract.py
Normal file
106
check_tesseract.py
Normal file
@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
def check_tesseract_installation():
|
||||
"""Vérifie l'installation de Tesseract-OCR"""
|
||||
print("=== VÉRIFICATION DE L'INSTALLATION TESSERACT ===\n")
|
||||
|
||||
# 1. Vérifier l'installation de pytesseract
|
||||
print("1. Module pytesseract:", "INSTALLÉ" if "pytesseract" in sys.modules else "NON INSTALLÉ")
|
||||
print(f" Version: {pytesseract.__version__}")
|
||||
|
||||
# 2. Vérifier la configuration du chemin tesseract
|
||||
print(f"2. Chemin tesseract configuré: {pytesseract.pytesseract.tesseract_cmd}")
|
||||
|
||||
# 3. Vérifier si le binaire existe
|
||||
try:
|
||||
if os.path.exists(pytesseract.pytesseract.tesseract_cmd):
|
||||
print(f" Le binaire existe: OUI")
|
||||
else:
|
||||
print(f" Le binaire existe: NON")
|
||||
|
||||
# Tenter de trouver tesseract dans le PATH
|
||||
try:
|
||||
which_output = subprocess.check_output(["which", "tesseract"], universal_newlines=True).strip()
|
||||
print(f" Tesseract trouvé dans le PATH: {which_output}")
|
||||
except subprocess.CalledProcessError:
|
||||
print(" Tesseract non trouvé dans le PATH")
|
||||
except:
|
||||
print(" Erreur lors de la vérification du binaire tesseract")
|
||||
|
||||
# 4. Tester l'exécution de tesseract
|
||||
try:
|
||||
version = subprocess.check_output([pytesseract.pytesseract.tesseract_cmd, "--version"],
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True)
|
||||
print("\n3. Version de Tesseract:")
|
||||
print(" " + version.split("\n")[0])
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("\n3. Impossible d'exécuter tesseract")
|
||||
|
||||
# Essayer avec just 'tesseract'
|
||||
try:
|
||||
version = subprocess.check_output(["tesseract", "--version"],
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True)
|
||||
print(" Tesseract est disponible avec la commande 'tesseract'")
|
||||
print(" " + version.split("\n")[0])
|
||||
print("\n SUGGESTION: Modifiez pytesseract.pytesseract.tesseract_cmd = 'tesseract'")
|
||||
except:
|
||||
print(" Tesseract n'est pas installé ou n'est pas dans le PATH")
|
||||
|
||||
# 5. Vérifier les langues disponibles
|
||||
try:
|
||||
langs = pytesseract.get_languages()
|
||||
print("\n4. Langues disponibles:")
|
||||
print(" " + ", ".join(langs))
|
||||
|
||||
# Vérifier si le français est disponible
|
||||
if "fra" in langs:
|
||||
print(" Le français est disponible: OUI")
|
||||
else:
|
||||
print(" Le français est disponible: NON")
|
||||
print(" ERREUR: Le pack de langue français (fra) n'est pas installé!")
|
||||
except:
|
||||
print("\n4. Impossible de récupérer les langues disponibles")
|
||||
|
||||
# 6. Test basique avec une image contenant du texte
|
||||
print("\n5. Test de base avec une image générée:")
|
||||
try:
|
||||
# Créer une image test avec du texte
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
# Créer une image blanche avec texte noir
|
||||
img = Image.new('RGB', (200, 50), color = (255, 255, 255))
|
||||
d = ImageDraw.Draw(img)
|
||||
|
||||
# Écrire du texte (texte simple sans police spécifique)
|
||||
d.text((10,10), "TEST OCR 123", fill=(0,0,0))
|
||||
|
||||
# Sauvegarder et tester
|
||||
test_image_path = "test_ocr_image.png"
|
||||
img.save(test_image_path)
|
||||
|
||||
# OCR
|
||||
text = pytesseract.image_to_string(Image.open(test_image_path), lang='fra')
|
||||
print(f" Image créée: {test_image_path}")
|
||||
print(f" Texte détecté: '{text.strip()}'")
|
||||
|
||||
if "TEST" in text or "TESTOCR" in text:
|
||||
print(" Test réussi: OUI")
|
||||
else:
|
||||
print(" Test réussi: NON")
|
||||
except Exception as e:
|
||||
print(f" Erreur lors du test: {str(e)}")
|
||||
|
||||
print("\n=== FIN DE LA VÉRIFICATION ===")
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_tesseract_installation()
|
||||
BIN
debug_ocr/pretreated_image.png
Normal file
BIN
debug_ocr/pretreated_image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 199 KiB |
BIN
debug_ocr/pretreated_image_145435.png
Normal file
BIN
debug_ocr/pretreated_image_145435.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 93 KiB |
@ -48,12 +48,9 @@ class LlamaVision(BaseLLM):
|
||||
"""
|
||||
Prépare le contenu de la requête spécifique pour Ollama avec le modèle Llama Vision.
|
||||
"""
|
||||
# Ajout d'instructions pour réponses concises
|
||||
prompt_prefixe = "Réponds de manière concise et directe à la question suivante: "
|
||||
|
||||
contenu = {
|
||||
"model": self.modele,
|
||||
"prompt": prompt_prefixe + question,
|
||||
"prompt": question,
|
||||
"options": {
|
||||
"temperature": self.params["temperature"],
|
||||
"top_p": self.params["top_p"],
|
||||
@ -144,12 +141,10 @@ class LlamaVision(BaseLLM):
|
||||
def _optimiser_prompt(self, question: str) -> str:
|
||||
"""
|
||||
Optimise le prompt pour des réponses plus rapides.
|
||||
Ne modifie pas la langue ou le contenu de la question.
|
||||
"""
|
||||
# Ajouter des instructions pour limiter la longueur et être direct
|
||||
optimised_question = f"""Réponds à cette question de façon concise et directe. Limite ta réponse à 3-4 phrases maximum.
|
||||
|
||||
Question: {question}"""
|
||||
return optimised_question
|
||||
# On retourne la question telle quelle sans imposer de format
|
||||
return question
|
||||
|
||||
def interroger_avec_image(self, image_path: str, question: str) -> str:
|
||||
"""
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
{
|
||||
"image_name": "543d7da1b54c29ff43ce5712d1a9aa4962ed21795c4e943fcb8cb84fd4d7465a.jpg",
|
||||
"ocr_fr": "",
|
||||
"translation_en": "",
|
||||
"translation_en_back_fr": "",
|
||||
"metadata": {
|
||||
"ticket_id": "T11143",
|
||||
"timestamp": "20250423_142039",
|
||||
"source_module": "ocr_utils + translate_utils",
|
||||
"lang_detected": "fr"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
{
|
||||
"image_name": "a20f7697fd5e1d1fca3296c6d01228220e0e112c46b4440cc938f74d10934e98.gif",
|
||||
"ocr_fr": "",
|
||||
"translation_en": "",
|
||||
"translation_en_back_fr": "",
|
||||
"metadata": {
|
||||
"ticket_id": "T11143",
|
||||
"timestamp": "20250423_142058",
|
||||
"source_module": "ocr_utils + translate_utils",
|
||||
"lang_detected": "fr"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
{
|
||||
"image_name": "image.png",
|
||||
"ocr_fr": "Apache Tomcat x +\n\nCG A ‘3 zkl.brg-lab.com\n\n@ Andre 7 Demo 7% Devmat @ Base modèle\n\nIt works !\n\nIf you're seeing this page via a web browser, it means you've setup Tomcat successfully. Congratulations!\n\nThis is the default Tomcat home page. It can be found on the local filesystem at: /var/lib/tomcat7/webapps/ROOT/index.html\n\nTomcat}? veterans might be pleased to learn that this system instance of Tomcat is installed with CATALINA_HOME in /usr/share/tomcat7 and CATALINA BASE in /var/lib/tomcat7, following the rules from /usr/share/doc/tomcat7-common/RUNNING. txt. gz.\nYou might consider installing the following packages, if you haven't already done so:\n\ntomcat7-docs: This package installs a web application that allows to browse the Tomcat 7 documentation locally. Once installed, you can access it by clicking here.\n\ntomcat7-examples: This package installs a web application that allows to access the Tomcat 7 Servlet and JSP examples. Once installed, you can access it by clicking here.\n\ntomcat7-admin: This package installs two web applications that can help managing this Tomcat instance. Once installed, you can access the manager webapp and the host-manager webapp.\n\nNOTE: For security reasons, using the manager webapp is restricted to users with role \"“manager-gui\". The host-manager webapp is restricted to users with role \"admin-gui\". Users are defined in /etc/tomcat7/tomcat-users.xml.",
|
||||
"translation_en": "Apache Tomcat x +\n\nCG A ‘3 zkl.brg-lab.com\n\n@ Andre 7 Demo 7% Devmat @ model base\n\nIt works!\n\nIf you are seeing this page via a web browser, it means you've setup tomcat successfully. Congratulations!\n\nThis is the Default Tomcat Home Page. It can be found on the local Filesystem at: /var/lib/tomcat7/webapps/root/index.html\n\nTomcat}? veterans might be pleased to read this system instance of tomcat is installed with catalina_home in/usr/tomcat7 and catalina base in/var/lib/tomcat7, following the rules from/usr/share/doc/tomcat7-common/Running. TXT. Gz.\nYou might consider installing the following packages, if you have alreni done so:\n\nTomcat7-Docs: This Package Installes A Web Application that Allows to Browse the Tomcat 7 Locally documentation. Once Installed, you can access it by clicking here.\n\nTomcat7-Example: This Package Installes A Web Application that Allows to Access the Tomcat 7 Servlet and JSP Examples. Once Installed, you can access it by clicking here.\n\nTomcat7-Admin: This Package Installes Two Web Applications that can help managing this tomcat instance. Once Installed, you can access the Manager Webapp and the Host-Manager Webapp.\n\nNote: For Security Reasons, Using the Manager Webapp is restricted to users with Role \"Manager-Gui\". The Host-Manager Webapp is restricted to users with role \"admin-guui\". USERS are defined in /etc/tomcat7/tomcat-users.xml.",
|
||||
"translation_en_back_fr": "Apache Tomcat x +\n\nCG a ‘3 zkl.brg-lab.com\n\n@ Andre 7 Demo 7% Devmat @ Model Base\n\nÇa marche!\n\nSi vous voyez cette page via un navigateur Web, cela signifie que vous avez configuré Tomcat avec succès. Félicitations!\n\nIl s'agit de la page d'accueil par défaut de Tomcat. Il peut être trouvé sur le système de fichiers local à: /var/lib/tomcat7/webapps/root/index.html\n\nMatou}? Les vétérans pourraient être heureux de lire cette instance système de Tomcat est installé avec Catalina_Home dans / USR / Tomcat7 et Catalina Base dans / var / lib / tomcat7, en suivant les règles de / usr / share / doc / tomcat7-Common / Running. SMS. GZ.\nVous pourriez envisager d'installer les packages suivants, si vous avez fait Alreni:\n\nTomcat7-Docs: Ce package installe une application Web qui permet de parcourir la documentation Tomcat 7 localement. Une fois installé, vous pouvez y accéder en cliquant ici.\n\nTomcat7-Exemple: Ce package installe une application Web qui permet d'accéder aux exemples de servlet Tomcat 7 et JSP. Une fois installé, vous pouvez y accéder en cliquant ici.\n\nTomcat7-admin: Ce package installe deux applications Web qui peuvent aider à gérer cette instance Tomcat. Une fois installé, vous pouvez accéder au Manager WebApp et au Host-Manager WebApp.\n\nRemarque: Pour des raisons de sécurité, l'utilisation du gestionnaire WebApp est limitée aux utilisateurs avec le rôle \"Manager-Gui\". Le manager host-manager est limité aux utilisateurs avec un rôle \"Admin-GUUI\". Les utilisateurs sont définis dans /etc/tomcat7/tomcat-users.xml.",
|
||||
"metadata": {
|
||||
"ticket_id": "T11143",
|
||||
"timestamp": "20250423_141944",
|
||||
"source_module": "ocr_utils + translate_utils",
|
||||
"lang_detected": "fr"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
{
|
||||
"image_name": "image_145435.png",
|
||||
"ocr_fr": "[6] 25 giraudbrg-lobcom/BRG-LAB/PAGE_ programmeEssai/xE4AAHDVNGOAA\n\n| BRGLAS CD Béton C9 foumétew bo 4 Masse\n\nEchantillon n°2500075 réceptionné le 02/04/2025 par BOLLEE Victor - prlevii Le 02/04/2025 por BOLLEE Victor n° prétèvement : 25-6007\nMatériau Sable 0/20 - CARRIERE ADCEG\n\nNREGISTRER\n\nLMPRMER\n\nle de trouver Fadeessé IP du serveur de zk1.brg-lab.com.",
|
||||
"translation_en": "[6] 25 GIRAUDBRG-LOBCOM/BRG-LAB/PAGE_ PROGRAMESSAI/XE4AAHDVNGOAAA\n\n| Brglas CD concrete C9 Foumetew Bo 4 Mass\n\nSample n ° 2500075 received on 02/04/2025 by Bollee Victor - PRLEVII on 02/04/2025 POR BOLLEE Victor N ° PRETREMENT: 25-6007\nSand material 0/20 - CARRIERE ADCEG\n\nRegister\n\nLmprmer\n\nThe to find Fadeessé IP of the ZK1.brg-lab.com server.",
|
||||
"translation_en_back_fr": "[6] 25 Giraudbrg-Lobcom / Brg-Lab / Page_ Programessai / Xe4aahdvngoaaa\n\n| Brglas cd béton c9 foumetew bo 4 masse\n\nÉchantillon N ° 2500075 Reçu le 02/04/2025 par Bollee Victor - Prllevii le 02/04/2025 Por Bollee Victor N ° Pretection: 25-6007\nMatériau de sable 0/20 - Carriere adceg\n\nRegistre\n\nLMPRMER\n\nLe pour trouver Fadeessé IP du serveur ZK1.brg-lab.com.",
|
||||
"metadata": {
|
||||
"ticket_id": "T11143",
|
||||
"timestamp": "20250423_142010",
|
||||
"source_module": "ocr_utils + translate_utils",
|
||||
"lang_detected": "fr"
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,116 @@
|
||||
image.png
|
||||
[FR] Apache Tomcat x +
|
||||
|
||||
CG A ‘3 zkl.brg-lab.com
|
||||
|
||||
@ Andre 7 Demo 7% Devmat @ Base modèle
|
||||
|
||||
It works !
|
||||
|
||||
If you're seeing this page via a web browser, it means you've setup Tomcat successfully. Congratulations!
|
||||
|
||||
This is the default Tomcat home page. It can be found on the local filesystem at: /var/lib/tomcat7/webapps/ROOT/index.html
|
||||
|
||||
Tomcat}? veterans might be pleased to learn that this system instance of Tomcat is installed with CATALINA_HOME in /usr/share/tomcat7 and CATALINA BASE in /var/lib/tomcat7, following the rules from /usr/share/doc/tomcat7-common/RUNNING. txt. gz.
|
||||
You might consider installing the following packages, if you haven't already done so:
|
||||
|
||||
tomcat7-docs: This package installs a web application that allows to browse the Tomcat 7 documentation locally. Once installed, you can access it by clicking here.
|
||||
|
||||
tomcat7-examples: This package installs a web application that allows to access the Tomcat 7 Servlet and JSP examples. Once installed, you can access it by clicking here.
|
||||
|
||||
tomcat7-admin: This package installs two web applications that can help managing this Tomcat instance. Once installed, you can access the manager webapp and the host-manager webapp.
|
||||
|
||||
NOTE: For security reasons, using the manager webapp is restricted to users with role "“manager-gui". The host-manager webapp is restricted to users with role "admin-gui". Users are defined in /etc/tomcat7/tomcat-users.xml.
|
||||
[EN] Apache Tomcat x +
|
||||
|
||||
CG A ‘3 zkl.brg-lab.com
|
||||
|
||||
@ Andre 7 Demo 7% Devmat @ model base
|
||||
|
||||
It works!
|
||||
|
||||
If you are seeing this page via a web browser, it means you've setup tomcat successfully. Congratulations!
|
||||
|
||||
This is the Default Tomcat Home Page. It can be found on the local Filesystem at: /var/lib/tomcat7/webapps/root/index.html
|
||||
|
||||
Tomcat}? veterans might be pleased to read this system instance of tomcat is installed with catalina_home in/usr/tomcat7 and catalina base in/var/lib/tomcat7, following the rules from/usr/share/doc/tomcat7-common/Running. TXT. Gz.
|
||||
You might consider installing the following packages, if you have alreni done so:
|
||||
|
||||
Tomcat7-Docs: This Package Installes A Web Application that Allows to Browse the Tomcat 7 Locally documentation. Once Installed, you can access it by clicking here.
|
||||
|
||||
Tomcat7-Example: This Package Installes A Web Application that Allows to Access the Tomcat 7 Servlet and JSP Examples. Once Installed, you can access it by clicking here.
|
||||
|
||||
Tomcat7-Admin: This Package Installes Two Web Applications that can help managing this tomcat instance. Once Installed, you can access the Manager Webapp and the Host-Manager Webapp.
|
||||
|
||||
Note: For Security Reasons, Using the Manager Webapp is restricted to users with Role "Manager-Gui". The Host-Manager Webapp is restricted to users with role "admin-guui". USERS are defined in /etc/tomcat7/tomcat-users.xml.
|
||||
[EN→FR] Apache Tomcat x +
|
||||
|
||||
CG a ‘3 zkl.brg-lab.com
|
||||
|
||||
@ Andre 7 Demo 7% Devmat @ Model Base
|
||||
|
||||
Ça marche!
|
||||
|
||||
Si vous voyez cette page via un navigateur Web, cela signifie que vous avez configuré Tomcat avec succès. Félicitations!
|
||||
|
||||
Il s'agit de la page d'accueil par défaut de Tomcat. Il peut être trouvé sur le système de fichiers local à: /var/lib/tomcat7/webapps/root/index.html
|
||||
|
||||
Matou}? Les vétérans pourraient être heureux de lire cette instance système de Tomcat est installé avec Catalina_Home dans / USR / Tomcat7 et Catalina Base dans / var / lib / tomcat7, en suivant les règles de / usr / share / doc / tomcat7-Common / Running. SMS. GZ.
|
||||
Vous pourriez envisager d'installer les packages suivants, si vous avez fait Alreni:
|
||||
|
||||
Tomcat7-Docs: Ce package installe une application Web qui permet de parcourir la documentation Tomcat 7 localement. Une fois installé, vous pouvez y accéder en cliquant ici.
|
||||
|
||||
Tomcat7-Exemple: Ce package installe une application Web qui permet d'accéder aux exemples de servlet Tomcat 7 et JSP. Une fois installé, vous pouvez y accéder en cliquant ici.
|
||||
|
||||
Tomcat7-admin: Ce package installe deux applications Web qui peuvent aider à gérer cette instance Tomcat. Une fois installé, vous pouvez accéder au Manager WebApp et au Host-Manager WebApp.
|
||||
|
||||
Remarque: Pour des raisons de sécurité, l'utilisation du gestionnaire WebApp est limitée aux utilisateurs avec le rôle "Manager-Gui". Le manager host-manager est limité aux utilisateurs avec un rôle "Admin-GUUI". Les utilisateurs sont définis dans /etc/tomcat7/tomcat-users.xml.
|
||||
|
||||
image_145435.png
|
||||
[FR] [6] 25 giraudbrg-lobcom/BRG-LAB/PAGE_ programmeEssai/xE4AAHDVNGOAA
|
||||
|
||||
| BRGLAS CD Béton C9 foumétew bo 4 Masse
|
||||
|
||||
Echantillon n°2500075 réceptionné le 02/04/2025 par BOLLEE Victor - prlevii Le 02/04/2025 por BOLLEE Victor n° prétèvement : 25-6007
|
||||
Matériau Sable 0/20 - CARRIERE ADCEG
|
||||
|
||||
NREGISTRER
|
||||
|
||||
LMPRMER
|
||||
|
||||
le de trouver Fadeessé IP du serveur de zk1.brg-lab.com.
|
||||
[EN] [6] 25 GIRAUDBRG-LOBCOM/BRG-LAB/PAGE_ PROGRAMESSAI/XE4AAHDVNGOAAA
|
||||
|
||||
| Brglas CD concrete C9 Foumetew Bo 4 Mass
|
||||
|
||||
Sample n ° 2500075 received on 02/04/2025 by Bollee Victor - PRLEVII on 02/04/2025 POR BOLLEE Victor N ° PRETREMENT: 25-6007
|
||||
Sand material 0/20 - CARRIERE ADCEG
|
||||
|
||||
Register
|
||||
|
||||
Lmprmer
|
||||
|
||||
The to find Fadeessé IP of the ZK1.brg-lab.com server.
|
||||
[EN→FR] [6] 25 Giraudbrg-Lobcom / Brg-Lab / Page_ Programessai / Xe4aahdvngoaaa
|
||||
|
||||
| Brglas cd béton c9 foumetew bo 4 masse
|
||||
|
||||
Échantillon N ° 2500075 Reçu le 02/04/2025 par Bollee Victor - Prllevii le 02/04/2025 Por Bollee Victor N ° Pretection: 25-6007
|
||||
Matériau de sable 0/20 - Carriere adceg
|
||||
|
||||
Registre
|
||||
|
||||
LMPRMER
|
||||
|
||||
Le pour trouver Fadeessé IP du serveur ZK1.brg-lab.com.
|
||||
|
||||
543d7da1b54c29ff43ce5712d1a9aa4962ed21795c4e943fcb8cb84fd4d7465a.jpg
|
||||
[FR] _
|
||||
[EN] _
|
||||
[EN→FR] _
|
||||
|
||||
a20f7697fd5e1d1fca3296c6d01228220e0e112c46b4440cc938f74d10934e98.gif
|
||||
[FR] _
|
||||
[EN] _
|
||||
[EN→FR] _
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
[
|
||||
{
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/image.png",
|
||||
"status": "unique"
|
||||
},
|
||||
{
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png",
|
||||
"status": "unique"
|
||||
},
|
||||
{
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/image_145453.png",
|
||||
"status": "duplicate"
|
||||
},
|
||||
{
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/543d7da1b54c29ff43ce5712d1a9aa4962ed21795c4e943fcb8cb84fd4d7465a.jpg",
|
||||
"status": "unique"
|
||||
},
|
||||
{
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/5ad281b63492e31c9e66bf27518b816cdd3766cab9812bd4ff16b736e9e98265.jpg",
|
||||
"status": "duplicate"
|
||||
},
|
||||
{
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/a20f7697fd5e1d1fca3296c6d01228220e0e112c46b4440cc938f74d10934e98.gif",
|
||||
"status": "unique"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,98 @@
|
||||
[
|
||||
{
|
||||
"is_relevant": true,
|
||||
"reason": "Yes.\n\nThis image appears to be a screenshot of the default Apache Tomcat homepage, indicating that the server is running successfully. However, this does not necessarily indicate a technical support issue. In fact, it suggests that the setup process has been completed correctly. \n\nA technical support issue would typically involve an error message or unexpected behavior, which is not present in this image. Therefore, while the image may be relevant to a technical support conversation (e.g., as proof of successful installation), it does not in itself indicate a problem requiring support.",
|
||||
"raw_response": "Yes.\n\nThis image appears to be a screenshot of the default Apache Tomcat homepage, indicating that the server is running successfully. However, this does not necessarily indicate a technical support issue. In fact, it suggests that the setup process has been completed correctly. \n\nA technical support issue would typically involve an error message or unexpected behavior, which is not present in this image. Therefore, while the image may be relevant to a technical support conversation (e.g., as proof of successful installation), it does not in itself indicate a problem requiring support.",
|
||||
"ocr_fr": "Apache Tomcat x +\n\nCG A ‘3 zkl.brg-lab.com\n\n@ Andre 7 Demo 7% Devmat @ Base modèle\n\nIt works !\n\nIf you're seeing this page via a web browser, it means you've setup Tomcat successfully. Congratulations!\n\nThis is the default Tomcat home page. It can be found on the local filesystem at: /var/lib/tomcat7/webapps/ROOT/index.html\n\nTomcat}? veterans might be pleased to learn that this system instance of Tomcat is installed with CATALINA_HOME in /usr/share/tomcat7 and CATALINA BASE in /var/lib/tomcat7, following the rules from /usr/share/doc/tomcat7-common/RUNNING. txt. gz.\nYou might consider installing the following packages, if you haven't already done so:\n\ntomcat7-docs: This package installs a web application that allows to browse the Tomcat 7 documentation locally. Once installed, you can access it by clicking here.\n\ntomcat7-examples: This package installs a web application that allows to access the Tomcat 7 Servlet and JSP examples. Once installed, you can access it by clicking here.\n\ntomcat7-admin: This package installs two web applications that can help managing this Tomcat instance. Once installed, you can access the manager webapp and the host-manager webapp.\n\nNOTE: For security reasons, using the manager webapp is restricted to users with role \"“manager-gui\". The host-manager webapp is restricted to users with role \"admin-gui\". Users are defined in /etc/tomcat7/tomcat-users.xml.",
|
||||
"ocr_en": "Apache Tomcat x +\n\nCG A ‘3 zkl.brg-lab.com\n\n@ Andre 7 Demo 7% Devmat @ model base\n\nIt works!\n\nIf you are seeing this page via a web browser, it means you've setup tomcat successfully. Congratulations!\n\nThis is the Default Tomcat Home Page. It can be found on the local Filesystem at: /var/lib/tomcat7/webapps/root/index.html\n\nTomcat}? veterans might be pleased to read this system instance of tomcat is installed with catalina_home in/usr/tomcat7 and catalina base in/var/lib/tomcat7, following the rules from/usr/share/doc/tomcat7-common/Running. TXT. Gz.\nYou might consider installing the following packages, if you have alreni done so:\n\nTomcat7-Docs: This Package Installes A Web Application that Allows to Browse the Tomcat 7 Locally documentation. Once Installed, you can access it by clicking here.\n\nTomcat7-Example: This Package Installes A Web Application that Allows to Access the Tomcat 7 Servlet and JSP Examples. Once Installed, you can access it by clicking here.\n\nTomcat7-Admin: This Package Installes Two Web Applications that can help managing this tomcat instance. Once Installed, you can access the Manager Webapp and the Host-Manager Webapp.\n\nNote: For Security Reasons, Using the Manager Webapp is restricted to users with Role \"Manager-Gui\". The Host-Manager Webapp is restricted to users with role \"admin-guui\". USERS are defined in /etc/tomcat7/tomcat-users.xml.",
|
||||
"metadata": {
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/image.png",
|
||||
"image_name": "image.png",
|
||||
"timestamp": "20250423_142009",
|
||||
"model_info": {
|
||||
"model": "llama3.2-vision:90b-instruct-q8_0",
|
||||
"temperature": 0.2,
|
||||
"top_p": 0.8,
|
||||
"max_tokens": 300,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0,
|
||||
"stop": [],
|
||||
"stream": false,
|
||||
"n": 1
|
||||
},
|
||||
"source_agent": "AgentImageSorter"
|
||||
}
|
||||
},
|
||||
{
|
||||
"is_relevant": true,
|
||||
"reason": "Yes.\n\nThis image appears to be a screenshot from the BRG_Lab software system, showing a specific page related to concrete testing (Béton C9 Foumétew Bo 4 Masse). The text includes details about a sample received on a particular date, material information, and an error message or prompt (\"le de trouver Fadeessé IP du serveur de zk1.brg-lab.com\") that suggests a technical issue with connecting to the server. This information is relevant for a technical support issue as it provides context about the problem being experienced by the user.",
|
||||
"raw_response": "Yes.\n\nThis image appears to be a screenshot from the BRG_Lab software system, showing a specific page related to concrete testing (Béton C9 Foumétew Bo 4 Masse). The text includes details about a sample received on a particular date, material information, and an error message or prompt (\"le de trouver Fadeessé IP du serveur de zk1.brg-lab.com\") that suggests a technical issue with connecting to the server. This information is relevant for a technical support issue as it provides context about the problem being experienced by the user.",
|
||||
"ocr_fr": "[6] 25 giraudbrg-lobcom/BRG-LAB/PAGE_ programmeEssai/xE4AAHDVNGOAA\n\n| BRGLAS CD Béton C9 foumétew bo 4 Masse\n\nEchantillon n°2500075 réceptionné le 02/04/2025 par BOLLEE Victor - prlevii Le 02/04/2025 por BOLLEE Victor n° prétèvement : 25-6007\nMatériau Sable 0/20 - CARRIERE ADCEG\n\nNREGISTRER\n\nLMPRMER\n\nle de trouver Fadeessé IP du serveur de zk1.brg-lab.com.",
|
||||
"ocr_en": "[6] 25 GIRAUDBRG-LOBCOM/BRG-LAB/PAGE_ PROGRAMESSAI/XE4AAHDVNGOAAA\n\n| Brglas CD concrete C9 Foumetew Bo 4 Mass\n\nSample n ° 2500075 received on 02/04/2025 by Bollee Victor - PRLEVII on 02/04/2025 POR BOLLEE Victor N ° PRETREMENT: 25-6007\nSand material 0/20 - CARRIERE ADCEG\n\nRegister\n\nLmprmer\n\nThe to find Fadeessé IP of the ZK1.brg-lab.com server.",
|
||||
"metadata": {
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png",
|
||||
"image_name": "image_145435.png",
|
||||
"timestamp": "20250423_142039",
|
||||
"model_info": {
|
||||
"model": "llama3.2-vision:90b-instruct-q8_0",
|
||||
"temperature": 0.2,
|
||||
"top_p": 0.8,
|
||||
"max_tokens": 300,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0,
|
||||
"stop": [],
|
||||
"stream": false,
|
||||
"n": 1
|
||||
},
|
||||
"source_agent": "AgentImageSorter"
|
||||
}
|
||||
},
|
||||
{
|
||||
"is_relevant": false,
|
||||
"reason": "No.\n\nThe image appears to be a jumbled collection of characters, including French text, but it does not contain any meaningful information that would be relevant to a technical support issue. The text is likely the result of an OCR (Optical Character Recognition) error or a corrupted file, rather than an actual screenshot or log from the BRG_Lab software system.",
|
||||
"raw_response": "No.\n\nThe image appears to be a jumbled collection of characters, including French text, but it does not contain any meaningful information that would be relevant to a technical support issue. The text is likely the result of an OCR (Optical Character Recognition) error or a corrupted file, rather than an actual screenshot or log from the BRG_Lab software system.",
|
||||
"ocr_fr": "",
|
||||
"ocr_en": "",
|
||||
"metadata": {
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/543d7da1b54c29ff43ce5712d1a9aa4962ed21795c4e943fcb8cb84fd4d7465a.jpg",
|
||||
"image_name": "543d7da1b54c29ff43ce5712d1a9aa4962ed21795c4e943fcb8cb84fd4d7465a.jpg",
|
||||
"timestamp": "20250423_142058",
|
||||
"model_info": {
|
||||
"model": "llama3.2-vision:90b-instruct-q8_0",
|
||||
"temperature": 0.2,
|
||||
"top_p": 0.8,
|
||||
"max_tokens": 300,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0,
|
||||
"stop": [],
|
||||
"stream": false,
|
||||
"n": 1
|
||||
},
|
||||
"source_agent": "AgentImageSorter"
|
||||
}
|
||||
},
|
||||
{
|
||||
"is_relevant": true,
|
||||
"reason": "Yes.\n\nThe image appears to be a screenshot of an error message or a system log from the BRG_Lab software system, which is likely relevant to a technical support issue. The presence of French text and what seems to be a stack trace or debug information suggests that it may be related to a specific problem or bug that the user is experiencing with the software.",
|
||||
"raw_response": "Yes.\n\nThe image appears to be a screenshot of an error message or a system log from the BRG_Lab software system, which is likely relevant to a technical support issue. The presence of French text and what seems to be a stack trace or debug information suggests that it may be related to a specific problem or bug that the user is experiencing with the software.",
|
||||
"ocr_fr": "",
|
||||
"ocr_en": "",
|
||||
"metadata": {
|
||||
"image_path": "output/ticket_T11143/T11143_20250422_084617/attachments/a20f7697fd5e1d1fca3296c6d01228220e0e112c46b4440cc938f74d10934e98.gif",
|
||||
"image_name": "a20f7697fd5e1d1fca3296c6d01228220e0e112c46b4440cc938f74d10934e98.gif",
|
||||
"timestamp": "20250423_142115",
|
||||
"model_info": {
|
||||
"model": "llama3.2-vision:90b-instruct-q8_0",
|
||||
"temperature": 0.2,
|
||||
"top_p": 0.8,
|
||||
"max_tokens": 300,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0,
|
||||
"stop": [],
|
||||
"stream": false,
|
||||
"n": 1
|
||||
},
|
||||
"source_agent": "AgentImageSorter"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,40 @@
|
||||
RÉSULTATS DE L'ANALYSE TRI_IMAGE - TICKET T11143
|
||||
================================================================================
|
||||
|
||||
--- ÉLÉMENT 1 ---
|
||||
|
||||
Yes.
|
||||
|
||||
This image appears to be a screenshot of the default Apache Tomcat homepage, indicating that the server is running successfully. However, this does not necessarily indicate a technical support issue. In fact, it suggests that the setup process has been completed correctly.
|
||||
|
||||
A technical support issue would typically involve an error message or unexpected behavior, which is not present in this image. Therefore, while the image may be relevant to a technical support conversation (e.g., as proof of successful installation), it does not in itself indicate a problem requiring support.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
--- ÉLÉMENT 2 ---
|
||||
|
||||
Yes.
|
||||
|
||||
This image appears to be a screenshot from the BRG_Lab software system, showing a specific page related to concrete testing (Béton C9 Foumétew Bo 4 Masse). The text includes details about a sample received on a particular date, material information, and an error message or prompt ("le de trouver Fadeessé IP du serveur de zk1.brg-lab.com") that suggests a technical issue with connecting to the server. This information is relevant for a technical support issue as it provides context about the problem being experienced by the user.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
--- ÉLÉMENT 3 ---
|
||||
|
||||
No.
|
||||
|
||||
The image appears to be a jumbled collection of characters, including French text, but it does not contain any meaningful information that would be relevant to a technical support issue. The text is likely the result of an OCR (Optical Character Recognition) error or a corrupted file, rather than an actual screenshot or log from the BRG_Lab software system.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
--- ÉLÉMENT 4 ---
|
||||
|
||||
Yes.
|
||||
|
||||
The image appears to be a screenshot of an error message or a system log from the BRG_Lab software system, which is likely relevant to a technical support issue. The presence of French text and what seems to be a stack trace or debug information suggests that it may be related to a specific problem or bug that the user is experiencing with the software.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
|
||||
================================================================================
|
||||
Fichier original: tri_image_llama3.2-vision:90b-instruct-q8_0_results.json
|
||||
@ -1,36 +0,0 @@
|
||||
RAPPORT D'ANALYSE DU TICKET T11143
|
||||
==================================================
|
||||
|
||||
### Rapport Final
|
||||
|
||||
#### 1. Contexte général
|
||||
Le client ne parvient pas à accéder à l'essai au bleu, mais le problème s'est résolu de lui-même par la suite.
|
||||
|
||||
#### 2. Problèmes ou questions identifiés
|
||||
- Pourquoi l'essai au bleu est-il inaccessible ?
|
||||
- Comment résoudre le problème d'accès à l'essai au bleu ?
|
||||
|
||||
#### 3. Résumé croisé image/texte pour chaque question
|
||||
|
||||
**Pourquoi l'essai au bleu est-il inaccessible ?**
|
||||
- **Texte du ticket** : Le client mentionne qu'il ne parvient pas à accéder à l'essai au bleu.
|
||||
- **Image** : La capture d'écran montre un message indiquant "Impossible de trouver l'adresse IP du serveur de zk1.brg-lab.com".
|
||||
|
||||
**Comment résoudre le problème d'accès à l'essai au bleu ?**
|
||||
- **Texte du ticket** : Le client indique que le problème s'est résolu de lui-même par la suite.
|
||||
- **Image** : La capture d'écran de la page https://zk1.brg-lab.com/ montre que le serveur Tomcat fonctionne correctement, ce qui est confirmé par le client.
|
||||
|
||||
#### 4. Liste d'observations supplémentaires pertinentes
|
||||
- Le client a confirmé que l'adresse https://zk1.brg-lab.com/ fonctionne correctement.
|
||||
- Le message "Impossible de trouver l'adresse IP du serveur de zk1.brg-lab.com" est visible dans la capture d'écran de l'essai au bleu.
|
||||
- Le serveur Tomcat est correctement installé et configuré, comme indiqué par la page "It works !".
|
||||
|
||||
#### 5. Tableau chronologique d'échanges
|
||||
|
||||
| ÉMETTEUR | TYPE | DATE | CONTENU | ÉLÉMENTS VISUELS |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| CLIENT | question | 03/04/2025 08:34 | Bonjour, Je ne parviens pas à accéder au l’essai au bleu. Merci par avance pour votre. Cordialement | Essai au bleu de méthylène de méthylène (MB) - NF EN 933-9 (02-2022), Message : "Impossible de trouver l'adresse IP du serveur de zk1.brg-lab.com" |
|
||||
| SUPPORT | réponse | 03/04/2025 12:17 | Bonjour, Pouvez-vous vérifier si vous avez bien accès à la page suivante en l'ouvrant dans votre navigateur : https://zk1.brg-lab.com/ Voici ce que vous devriez voir affiché : Si ce n'est pas le cas, pouvez-vous me faire une capture d'écran de ce qui est affiché? Je reste à votre entière disposition pour toute information complémentaire. Cordialement, --- Support technique | Page "It works !" de Tomcat, Message : "If you're seeing this page via a web browser, it means you've setup Tomcat successfully. Congratulations!" |
|
||||
| CLIENT | information | 03/04/2025 12:21 | Bonjour, Le problème s’est résolu seul par la suite. Je vous remercie pour votre retour. Bonne journée PS : l’adresse fonctionne | Confirmation que l'adresse https://zk1.brg-lab.com/ fonctionne |
|
||||
|
||||
### Fin du rapport
|
||||
152
test_image_processing.py
Normal file
152
test_image_processing.py
Normal file
@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
import base64
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Test de prétraitement d'images")
|
||||
parser.add_argument("image_path", help="Chemin vers l'image à analyser")
|
||||
parser.add_argument("--save", "-s", action="store_true", help="Sauvegarder les images traitées")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Mode verbeux")
|
||||
return parser.parse_args()
|
||||
|
||||
def encoder_image_base64(image_path: str) -> str:
|
||||
"""
|
||||
Encode une image en base64, avec optimisation de la taille si nécessaire.
|
||||
Implémentation identique à celle de LlamaVision._encoder_image_base64
|
||||
"""
|
||||
try:
|
||||
# Vérifier la taille de l'image et la réduire si trop grande
|
||||
with Image.open(image_path) as img:
|
||||
# Afficher les informations de l'image originale
|
||||
print(f"Image originale: {image_path}")
|
||||
print(f"Format: {img.format}, Mode: {img.mode}, Taille: {img.size}")
|
||||
|
||||
# Si l'image est trop grande, la redimensionner
|
||||
max_dim = 800 # Dimension maximale
|
||||
width, height = img.size
|
||||
|
||||
if width > max_dim or height > max_dim:
|
||||
# Calculer le ratio pour conserver les proportions
|
||||
ratio = min(max_dim / width, max_dim / height)
|
||||
new_width = int(width * ratio)
|
||||
new_height = int(height * ratio)
|
||||
|
||||
# Redimensionner l'image
|
||||
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
print(f"Image redimensionnée: {new_width}x{new_height}")
|
||||
|
||||
# Convertir en RGB si nécessaire (pour les formats comme PNG avec canal alpha)
|
||||
original_mode = img.mode
|
||||
if img.mode in ("RGBA", "LA", "P"):
|
||||
# Créer un fond blanc et composer l'image dessus pour gérer la transparence
|
||||
background = Image.new("RGB", img.size, (255, 255, 255))
|
||||
if img.mode == "P":
|
||||
img = img.convert("RGBA")
|
||||
background.paste(img, mask=img.split()[3] if img.mode == "RGBA" else None)
|
||||
img = background
|
||||
print(f"Mode converti: {original_mode} -> RGB (avec fond blanc)")
|
||||
elif img.mode != "RGB":
|
||||
img = img.convert("RGB")
|
||||
print(f"Mode converti: {original_mode} -> RGB")
|
||||
|
||||
# Sauvegarder temporairement l'image redimensionnée
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=85)
|
||||
buffer.seek(0)
|
||||
|
||||
# Sauvegarder l'image traitée si demandé
|
||||
if args.save:
|
||||
output_path = f"processed_{os.path.basename(image_path)}.jpg"
|
||||
img.save(output_path, format="JPEG", quality=85)
|
||||
print(f"Image traitée sauvegardée: {output_path}")
|
||||
|
||||
# Encoder en base64
|
||||
encoded = base64.b64encode(buffer.read()).decode("utf-8")
|
||||
print(f"Taille du base64: {len(encoded)} caractères")
|
||||
return encoded
|
||||
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'optimisation de l'image: {str(e)}")
|
||||
try:
|
||||
# Seconde tentative avec une approche plus simple
|
||||
print("Tentative de secours...")
|
||||
with Image.open(image_path) as img:
|
||||
# Convertir directement en RGB quelle que soit l'image
|
||||
img = img.convert("RGB")
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=75)
|
||||
buffer.seek(0)
|
||||
|
||||
if args.save:
|
||||
output_path = f"fallback_{os.path.basename(image_path)}.jpg"
|
||||
img.save(output_path, format="JPEG", quality=75)
|
||||
print(f"Image de secours sauvegardée: {output_path}")
|
||||
|
||||
encoded = base64.b64encode(buffer.read()).decode("utf-8")
|
||||
print(f"Taille du base64 (secours): {len(encoded)} caractères")
|
||||
return encoded
|
||||
except Exception as e2:
|
||||
print(f"Deuxième erreur lors de l'optimisation de l'image: {str(e2)}")
|
||||
# Dernier recours: encoder l'image originale sans optimisation
|
||||
print("Dernier recours: encodage sans optimisation...")
|
||||
with open(image_path, "rb") as image_file:
|
||||
encoded = base64.b64encode(image_file.read()).decode("utf-8")
|
||||
print(f"Taille du base64 (brut): {len(encoded)} caractères")
|
||||
return encoded
|
||||
|
||||
def test_image_processing(image_path, verbose=False):
|
||||
"""
|
||||
Teste le prétraitement d'image utilisé par LlamaVision
|
||||
"""
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Erreur: L'image {image_path} n'existe pas")
|
||||
return
|
||||
|
||||
# Analyser l'image avec Pillow
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
print("\n=== INFORMATIONS SUR L'IMAGE ===")
|
||||
print(f"Format: {img.format}")
|
||||
print(f"Mode: {img.mode}")
|
||||
print(f"Taille: {img.size}")
|
||||
print(f"Palette: {hasattr(img, 'palette')}")
|
||||
if hasattr(img, 'info'):
|
||||
print(f"Info supplémentaires: {img.info.keys()}")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'analyse de l'image: {e}")
|
||||
return
|
||||
|
||||
# Encoder l'image
|
||||
print("\n=== TRAITEMENT DE L'IMAGE ===")
|
||||
encoded = encoder_image_base64(image_path)
|
||||
|
||||
# Sauvegarder des métadonnées
|
||||
metadata = {
|
||||
"filename": os.path.basename(image_path),
|
||||
"path": image_path,
|
||||
"base64_length": len(encoded),
|
||||
"first_20_chars": encoded[:20],
|
||||
"timestamp": __import__('datetime').datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
|
||||
output_file = f"image_info_{os.path.basename(image_path)}.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\nMétadonnées enregistrées dans {output_file}")
|
||||
|
||||
# Sauvegarder un extrait du base64
|
||||
if verbose:
|
||||
print("\n=== EXTRAIT DU BASE64 ===")
|
||||
print(encoded[:100] + "...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
test_image_processing(args.image_path, args.verbose)
|
||||
78
test_image_sorter.py
Normal file
78
test_image_sorter.py
Normal file
@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from llm_classes.llama_vision import LlamaVision
|
||||
from agents.llama_vision.agent_image_sorter import AgentImageSorter
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Test de l'agent de tri d'images")
|
||||
parser.add_argument("image_path", help="Chemin vers l'image à analyser")
|
||||
parser.add_argument("--debug", "-d", action="store_true", help="Mode debug")
|
||||
parser.add_argument("--ticket_id", "-t", default="T9999", help="ID du ticket pour les tests")
|
||||
return parser.parse_args()
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
image_path = args.image_path
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Erreur: L'image {image_path} n'existe pas")
|
||||
return 1
|
||||
|
||||
print(f"=== TEST DE L'AGENT DE TRI D'IMAGES ===")
|
||||
print(f"Image: {image_path}")
|
||||
|
||||
# Initialiser le modèle LlamaVision
|
||||
try:
|
||||
print("Initialisation du modèle LlamaVision...")
|
||||
llm = LlamaVision() # modèle par défaut
|
||||
print(f"Modèle initialisé: {llm.modele}")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'initialisation du modèle: {e}")
|
||||
return 1
|
||||
|
||||
# Initialiser l'agent de tri
|
||||
try:
|
||||
print("Initialisation de l'agent de tri d'images...")
|
||||
agent = AgentImageSorter(llm)
|
||||
print("Agent initialisé")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'initialisation de l'agent: {e}")
|
||||
return 1
|
||||
|
||||
# Exécuter l'agent sur l'image
|
||||
try:
|
||||
print("\nExécution de l'agent sur l'image...")
|
||||
result = agent.executer(image_path)
|
||||
|
||||
# Afficher le résultat
|
||||
print("\n=== RÉSULTAT ===")
|
||||
print(f"Image pertinente: {result['is_relevant']}")
|
||||
print(f"Raison: {result['reason']}")
|
||||
|
||||
if args.debug:
|
||||
print("\n=== RÉPONSE BRUTE ===")
|
||||
print(result['raw_response'])
|
||||
print("\n=== OCR ===")
|
||||
print(f"OCR FR: {result['ocr_fr'] or 'Aucun texte détecté'}")
|
||||
print(f"OCR EN: {result['ocr_en'] or 'Aucune traduction'}")
|
||||
|
||||
# Sauvegarder le résultat dans un fichier JSON
|
||||
output_file = f"test_tri_{os.path.basename(image_path)}.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nRésultat enregistré dans {output_file}")
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'exécution de l'agent: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
82
test_ocr.py
Normal file
82
test_ocr.py
Normal file
@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from PIL import Image
|
||||
from utils.ocr_utils import extraire_texte_fr
|
||||
from utils.translate_utils import fr_to_en, en_to_fr
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Test direct d'OCR et traduction")
|
||||
parser.add_argument("image_path", help="Chemin vers l'image à analyser")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Mode verbeux")
|
||||
parser.add_argument("--info", "-i", action="store_true", help="Afficher les infos de l'image")
|
||||
return parser.parse_args()
|
||||
|
||||
def test_ocr_traduction(image_path, verbose=False, show_info=False):
|
||||
"""
|
||||
Teste l'OCR et la traduction sur une image spécifique
|
||||
"""
|
||||
# Vérification de l'existence du fichier
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Erreur: Le fichier {image_path} n'existe pas")
|
||||
return
|
||||
|
||||
# Afficher les infos sur l'image si demandé
|
||||
if show_info:
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
print(f"Format: {img.format}")
|
||||
print(f"Mode: {img.mode}")
|
||||
print(f"Taille: {img.size}")
|
||||
print(f"Palette: {hasattr(img, 'palette')}")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'analyse de l'image: {e}")
|
||||
return
|
||||
|
||||
# Exécution de l'OCR
|
||||
print(f"Exécution de l'OCR sur {image_path}...")
|
||||
ocr_fr = extraire_texte_fr(image_path)
|
||||
|
||||
# Affichage du résultat OCR
|
||||
if ocr_fr:
|
||||
print("\n--- TEXTE DÉTECTÉ (FR) ---")
|
||||
print(ocr_fr)
|
||||
print("-------------------------")
|
||||
else:
|
||||
print("Aucun texte détecté par l'OCR")
|
||||
|
||||
# Traduction si du texte a été détecté
|
||||
if ocr_fr:
|
||||
print("\nTraduction FR -> EN...")
|
||||
ocr_en = fr_to_en(ocr_fr)
|
||||
print("\n--- TRADUCTION (EN) ---")
|
||||
print(ocr_en)
|
||||
print("-------------------------")
|
||||
|
||||
print("\nTraduction EN -> FR (vérification)...")
|
||||
ocr_en_back_fr = en_to_fr(ocr_en)
|
||||
print("\n--- TRADUCTION RETOUR (FR) ---")
|
||||
print(ocr_en_back_fr)
|
||||
print("-------------------------")
|
||||
|
||||
# Enregistrer les résultats dans un JSON
|
||||
results = {
|
||||
"filename": os.path.basename(image_path),
|
||||
"ocr_fr": ocr_fr,
|
||||
"ocr_en": ocr_fr and fr_to_en(ocr_fr) or "",
|
||||
"ocr_en_back_fr": ocr_fr and en_to_fr(fr_to_en(ocr_fr)) or "",
|
||||
}
|
||||
|
||||
output_file = f"ocr_test_{os.path.basename(image_path)}.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\nRésultats enregistrés dans {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
test_ocr_traduction(args.image_path, args.verbose, args.info)
|
||||
BIN
test_ocr_image.png
Normal file
BIN
test_ocr_image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.0 KiB |
232
test_ocr_to_image_sorter.py
Normal file
232
test_ocr_to_image_sorter.py
Normal file
@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
import time
|
||||
from PIL import Image
|
||||
import pytesseract
|
||||
from utils.ocr_utils import extraire_texte_fr, pretraiter_image
|
||||
from utils.translate_utils import fr_to_en, en_to_fr
|
||||
from llm_classes.llama_vision import LlamaVision
|
||||
from agents.llama_vision.agent_image_sorter import AgentImageSorter
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Test complet OCR + Traduction + Tri d'images")
|
||||
parser.add_argument("image_path", help="Chemin vers l'image à analyser")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Mode verbeux")
|
||||
parser.add_argument("--only-ocr", "-o", action="store_true", help="Tester uniquement l'OCR")
|
||||
parser.add_argument("--skip-llm", "-s", action="store_true", help="Sauter l'appel au LLM (pour tester rapidement OCR)")
|
||||
parser.add_argument("--test-dir", "-d", help="Tester toutes les images d'un répertoire")
|
||||
return parser.parse_args()
|
||||
|
||||
def test_image(image_path, verbose=False, only_ocr=False, skip_llm=False):
|
||||
"""
|
||||
Teste la chaîne de traitement complète sur une image
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
print(f"\n=== TEST DE L'IMAGE: {os.path.basename(image_path)} ===")
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Erreur: Le fichier {image_path} n'existe pas")
|
||||
return None
|
||||
|
||||
# Afficher les infos sur l'image
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
print(f"Format: {img.format}")
|
||||
print(f"Mode: {img.mode}")
|
||||
print(f"Taille: {img.size}")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'analyse de l'image: {e}")
|
||||
return None
|
||||
|
||||
# ÉTAPE 1: OCR
|
||||
print("\n=== ÉTAPE 1: OCR ===")
|
||||
ocr_time_start = time.time()
|
||||
|
||||
# Test avec l'image originale
|
||||
print("OCR sur image originale...")
|
||||
ocr_fr_original = ""
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
ocr_fr_original = pytesseract.image_to_string(img, lang="fra").strip()
|
||||
except Exception as e:
|
||||
print(f"Erreur OCR image originale: {e}")
|
||||
|
||||
if ocr_fr_original:
|
||||
print(f"Texte détecté (original): {ocr_fr_original}")
|
||||
else:
|
||||
print("Aucun texte détecté sur l'image originale")
|
||||
|
||||
# Test avec l'image prétraitée
|
||||
print("\nOCR avec image prétraitée...")
|
||||
try:
|
||||
# Utiliser notre fonction de prétraitement
|
||||
ocr_fr = extraire_texte_fr(image_path)
|
||||
if ocr_fr:
|
||||
print(f"Texte détecté (prétraité): {ocr_fr}")
|
||||
else:
|
||||
print("Aucun texte détecté après prétraitement")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'OCR prétraité: {e}")
|
||||
ocr_fr = ""
|
||||
|
||||
ocr_time = time.time() - ocr_time_start
|
||||
print(f"Temps OCR: {ocr_time:.2f} secondes")
|
||||
|
||||
# Si on s'arrête à l'OCR
|
||||
if only_ocr:
|
||||
total_time = time.time() - start_time
|
||||
print(f"\nTemps total: {total_time:.2f} secondes")
|
||||
return {
|
||||
"image": os.path.basename(image_path),
|
||||
"ocr_fr_original": ocr_fr_original,
|
||||
"ocr_fr": ocr_fr,
|
||||
"processing_time": {
|
||||
"ocr": ocr_time,
|
||||
"total": total_time
|
||||
}
|
||||
}
|
||||
|
||||
# ÉTAPE 2: TRADUCTION
|
||||
print("\n=== ÉTAPE 2: TRADUCTION ===")
|
||||
translation_time_start = time.time()
|
||||
|
||||
ocr_en = ""
|
||||
if ocr_fr:
|
||||
try:
|
||||
ocr_en = fr_to_en(ocr_fr)
|
||||
print(f"Traduction EN: {ocr_en}")
|
||||
|
||||
# Traduction de vérification
|
||||
ocr_en_back_fr = en_to_fr(ocr_en)
|
||||
print(f"Traduction retour FR: {ocr_en_back_fr}")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de la traduction: {e}")
|
||||
else:
|
||||
print("Aucun texte à traduire")
|
||||
|
||||
translation_time = time.time() - translation_time_start
|
||||
print(f"Temps traduction: {translation_time:.2f} secondes")
|
||||
|
||||
# Si on saute l'appel au LLM
|
||||
if skip_llm:
|
||||
total_time = time.time() - start_time
|
||||
print(f"\nTemps total: {total_time:.2f} secondes")
|
||||
return {
|
||||
"image": os.path.basename(image_path),
|
||||
"ocr_fr": ocr_fr,
|
||||
"ocr_en": ocr_en,
|
||||
"processing_time": {
|
||||
"ocr": ocr_time,
|
||||
"translation": translation_time,
|
||||
"total": total_time
|
||||
}
|
||||
}
|
||||
|
||||
# ÉTAPE 3: ANALYSE LLM
|
||||
print("\n=== ÉTAPE 3: ANALYSE LLM ===")
|
||||
llm_time_start = time.time()
|
||||
|
||||
llm_result = None
|
||||
try:
|
||||
# Initialiser le modèle LlamaVision
|
||||
llm = LlamaVision()
|
||||
|
||||
# Initialiser l'agent
|
||||
agent = AgentImageSorter(llm)
|
||||
|
||||
# Exécuter l'agent
|
||||
llm_result = agent.executer(image_path)
|
||||
|
||||
if llm_result:
|
||||
print(f"Image pertinente: {llm_result['is_relevant']}")
|
||||
print(f"Raison: {llm_result['reason']}")
|
||||
else:
|
||||
print("Erreur: Aucun résultat retourné par l'agent")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de l'analyse LLM: {e}")
|
||||
|
||||
llm_time = time.time() - llm_time_start
|
||||
print(f"Temps LLM: {llm_time:.2f} secondes")
|
||||
|
||||
# Temps total
|
||||
total_time = time.time() - start_time
|
||||
print(f"\nTemps total: {total_time:.2f} secondes")
|
||||
|
||||
# Résultat complet
|
||||
result = {
|
||||
"image": os.path.basename(image_path),
|
||||
"ocr_fr": ocr_fr,
|
||||
"ocr_en": ocr_en,
|
||||
"llm_result": llm_result,
|
||||
"processing_time": {
|
||||
"ocr": ocr_time,
|
||||
"translation": translation_time,
|
||||
"llm": llm_time,
|
||||
"total": total_time
|
||||
}
|
||||
}
|
||||
|
||||
# Enregistrer le résultat
|
||||
output_file = f"complete_test_{os.path.basename(image_path)}.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
print(f"Résultat enregistré dans: {output_file}")
|
||||
|
||||
return result
|
||||
|
||||
def test_directory(dir_path, verbose=False, only_ocr=False, skip_llm=False):
|
||||
"""
|
||||
Teste toutes les images d'un répertoire
|
||||
"""
|
||||
if not os.path.exists(dir_path) or not os.path.isdir(dir_path):
|
||||
print(f"Le répertoire {dir_path} n'existe pas")
|
||||
return
|
||||
|
||||
# Extensions d'images à tester
|
||||
extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp']
|
||||
|
||||
# Trouver toutes les images
|
||||
image_files = []
|
||||
for root, _, files in os.walk(dir_path):
|
||||
for file in files:
|
||||
if any(file.lower().endswith(ext) for ext in extensions):
|
||||
image_files.append(os.path.join(root, file))
|
||||
|
||||
print(f"=== TEST DE {len(image_files)} IMAGES DANS {dir_path} ===")
|
||||
|
||||
results = []
|
||||
for i, image_path in enumerate(image_files, 1):
|
||||
print(f"\nImage {i}/{len(image_files)}: {os.path.basename(image_path)}")
|
||||
result = test_image(image_path, verbose, only_ocr, skip_llm)
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
# Enregistrer les résultats combinés
|
||||
if results:
|
||||
output_file = f"batch_test_results.json"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nRésultats combinés enregistrés dans: {output_file}")
|
||||
|
||||
return results
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# Test d'un répertoire
|
||||
if args.test_dir:
|
||||
test_directory(args.test_dir, args.verbose, args.only_ocr, args.skip_llm)
|
||||
# Test d'une seule image
|
||||
else:
|
||||
test_image(args.image_path, args.verbose, args.only_ocr, args.skip_llm)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -1,23 +1,101 @@
|
||||
# utils/ocr_utils.py
|
||||
|
||||
from PIL import Image
|
||||
from PIL import Image, ImageEnhance
|
||||
import pytesseract
|
||||
import logging
|
||||
import os
|
||||
import io
|
||||
|
||||
logger = logging.getLogger("OCR")
|
||||
|
||||
def pretraiter_image(image_path: str) -> Image.Image:
|
||||
"""
|
||||
Prétraite l'image pour améliorer la qualité de l'OCR.
|
||||
|
||||
Args:
|
||||
image_path: Chemin de l'image
|
||||
|
||||
Returns:
|
||||
Image prétraitée
|
||||
"""
|
||||
try:
|
||||
# Ouvrir l'image
|
||||
with Image.open(image_path) as img:
|
||||
# Convertir en niveaux de gris si l'image est en couleur
|
||||
if img.mode != 'L':
|
||||
img = img.convert('L')
|
||||
|
||||
# Améliorer le contraste
|
||||
enhancer = ImageEnhance.Contrast(img)
|
||||
img = enhancer.enhance(1.5) # Facteur de contraste 1.5
|
||||
|
||||
# Augmenter la netteté
|
||||
enhancer = ImageEnhance.Sharpness(img)
|
||||
img = enhancer.enhance(1.5) # Facteur de netteté 1.5
|
||||
|
||||
# Agrandir l'image si elle est petite
|
||||
width, height = img.size
|
||||
if width < 1000 or height < 1000:
|
||||
ratio = max(1000 / width, 1000 / height)
|
||||
new_width = int(width * ratio)
|
||||
new_height = int(height * ratio)
|
||||
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
|
||||
return img
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur lors du prétraitement de l'image {image_path}: {e}")
|
||||
return Image.open(image_path) # Retourner l'image originale en cas d'erreur
|
||||
|
||||
def extraire_texte_fr(image_path: str) -> str:
|
||||
"""
|
||||
Effectue un OCR sur une image en langue française.
|
||||
Retrourne le texte brut extrait (vide si échec).
|
||||
Retourne le texte brut extrait (chaîne vide si aucun texte détecté ou en cas d'erreur).
|
||||
Chaque appel est isolé et tracé dans les logs.
|
||||
"""
|
||||
try:
|
||||
image = Image.open(image_path)
|
||||
texte = pytesseract.image_to_string(image, lang="fra").strip()
|
||||
logger.debug(f"OCR FR pour {image_path}: {texte}")
|
||||
if not os.path.exists(image_path) or not os.access(image_path, os.R_OK):
|
||||
logger.warning(f"Image inaccessible ou introuvable: {image_path}")
|
||||
return ""
|
||||
|
||||
logger.info(f"Traitement OCR pour {image_path}")
|
||||
|
||||
# Configurer pytesseract
|
||||
config = '--psm 3 --oem 3' # Page segmentation mode: 3 (auto), OCR Engine mode: 3 (default)
|
||||
|
||||
# Prétraiter l'image
|
||||
img = pretraiter_image(image_path)
|
||||
logger.info(f"Image prétraitée: dimensions={img.size}, mode={img.mode}")
|
||||
|
||||
# Réaliser l'OCR avec fallback
|
||||
try:
|
||||
texte = pytesseract.image_to_string(img, lang="fra", config=config)
|
||||
except Exception as ocr_err:
|
||||
logger.warning(f"Première tentative OCR échouée: {ocr_err}, tentative avec image originale")
|
||||
# En cas d'échec, essayer avec l'image originale
|
||||
with Image.open(image_path) as original_img:
|
||||
texte = pytesseract.image_to_string(original_img, lang="fra", config=config)
|
||||
|
||||
# Nettoyer le texte
|
||||
texte = texte.strip()
|
||||
|
||||
# Sauvegarder l'image prétraitée pour debug si OCR réussi
|
||||
if texte:
|
||||
try:
|
||||
debug_dir = "debug_ocr"
|
||||
os.makedirs(debug_dir, exist_ok=True)
|
||||
img_name = os.path.basename(image_path)
|
||||
img.save(os.path.join(debug_dir, f"pretreated_{img_name}"), format="JPEG")
|
||||
logger.info(f"Image prétraitée sauvegardée dans {debug_dir}/pretreated_{img_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de sauvegarder l'image prétraitée: {e}")
|
||||
|
||||
# Journaliser le résultat
|
||||
if texte:
|
||||
logger.info(f"OCR réussi [{image_path}] — {len(texte)} caractères: {texte[:100]}...")
|
||||
else:
|
||||
logger.warning(f"OCR vide (aucun texte détecté) pour {image_path}")
|
||||
|
||||
return texte
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur lors de l'OCR de {image_path}: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger("Translate")
|
||||
|
||||
@ -26,24 +27,81 @@ def en_to_fr(text: str) -> str:
|
||||
logger.error(f"Traduction EN->FR échouée: {e}")
|
||||
return ""
|
||||
|
||||
def determiner_repertoire_ticket(ticket_id: str):
|
||||
"""
|
||||
Détermine dynamiquement le répertoire du ticket.
|
||||
|
||||
Args:
|
||||
ticket_id: str, le code du ticket
|
||||
|
||||
Returns:
|
||||
str, le chemin du répertoire pour ce ticket ou None si non trouvé
|
||||
"""
|
||||
# Base de recherche des tickets
|
||||
output_dir = "output"
|
||||
|
||||
# Format attendu du répertoire de ticket
|
||||
ticket_dir = f"ticket_{ticket_id}"
|
||||
ticket_path = os.path.join(output_dir, ticket_dir)
|
||||
|
||||
if not os.path.exists(ticket_path):
|
||||
print(f"Répertoire de ticket non trouvé: {ticket_path}")
|
||||
return None
|
||||
|
||||
# Trouver la dernière extraction (par date)
|
||||
extractions = []
|
||||
for extraction in os.listdir(ticket_path):
|
||||
extraction_path = os.path.join(ticket_path, extraction)
|
||||
if os.path.isdir(extraction_path) and extraction.startswith(ticket_id):
|
||||
extractions.append(extraction_path)
|
||||
|
||||
if not extractions:
|
||||
print(f"Aucune extraction trouvée pour le ticket {ticket_id}")
|
||||
return None
|
||||
|
||||
# Trier par date de modification (plus récente en premier)
|
||||
extractions.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
||||
|
||||
# Retourner le chemin de la dernière extraction
|
||||
return extractions[0]
|
||||
|
||||
def sauvegarder_ocr_traduction(
|
||||
image_path: str,
|
||||
ticket_id: str,
|
||||
ocr_fr: str,
|
||||
ocr_en: str,
|
||||
ocr_en_back_fr: str = "", # <- Ajout facultatif
|
||||
base_dir: str = "reports"
|
||||
ocr_en_back_fr: str = "",
|
||||
base_dir: Optional[str] = None # Utiliser Optional[str]
|
||||
) -> None:
|
||||
"""
|
||||
Sauvegarde les résultats OCR + TRAD en JSON + ajoute une ligne dans le fichier texte global.
|
||||
Inclut éventuellement une traduction EN → FR.
|
||||
Sauvegarde les résultats OCR + TRAD en JSON (par image) et
|
||||
ajoute une ligne dans un fichier texte global (append sécurisé).
|
||||
Utilise le répertoire de sortie output/ticket_X/X_YYYYMMDD_HHMMSS/X_rapports/pipeline
|
||||
pour la sauvegarde des données.
|
||||
"""
|
||||
try:
|
||||
image_name = os.path.basename(image_path)
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
rapport_dir = os.path.join(base_dir, ticket_id, "pipeline", "ocr_traduction")
|
||||
|
||||
# Déterminer le répertoire de sortie basé sur le ticket_id
|
||||
if not base_dir:
|
||||
# Utiliser le répertoire de sortie approprié dans output
|
||||
extraction_dir = determiner_repertoire_ticket(ticket_id)
|
||||
if not extraction_dir:
|
||||
# Fallback vers reports si impossible de trouver le répertoire
|
||||
base_dir = "reports"
|
||||
rapport_dir = os.path.join(base_dir, ticket_id, "pipeline", "ocr_traduction")
|
||||
else:
|
||||
# Utiliser le répertoire rapports du ticket
|
||||
rapports_dir = os.path.join(extraction_dir, f"{ticket_id}_rapports")
|
||||
rapport_dir = os.path.join(rapports_dir, "pipeline", "ocr_traduction")
|
||||
else:
|
||||
rapport_dir = os.path.join(base_dir, ticket_id, "pipeline", "ocr_traduction")
|
||||
|
||||
os.makedirs(rapport_dir, exist_ok=True)
|
||||
|
||||
# Sauvegarde JSON (1 par image, réécrit à chaque passage)
|
||||
json_path = os.path.join(rapport_dir, f"{image_name}.json")
|
||||
result = {
|
||||
"image_name": image_name,
|
||||
"ocr_fr": ocr_fr,
|
||||
@ -57,19 +115,22 @@ def sauvegarder_ocr_traduction(
|
||||
}
|
||||
}
|
||||
|
||||
# Fichier JSON par image
|
||||
with open(os.path.join(rapport_dir, f"{image_name}.json"), "w", encoding="utf-8") as f:
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"Sauvegarde JSON OCR/TRAD réussie pour {image_name}")
|
||||
|
||||
# Append texte global
|
||||
# Append TXT global sécurisé (évite l'écrasement)
|
||||
txt_path = os.path.join(rapport_dir, "ocr_traduction.txt")
|
||||
ligne = (
|
||||
f"{image_name}\n"
|
||||
f"[FR] {ocr_fr or '_'}\n"
|
||||
f"[EN] {ocr_en or '_'}\n"
|
||||
f"[EN→FR] {ocr_en_back_fr or '_'}\n\n"
|
||||
)
|
||||
with open(os.path.join(rapport_dir, "ocr_traduction.txt"), "a", encoding="utf-8") as f:
|
||||
with open(txt_path, "a", encoding="utf-8") as f:
|
||||
f.write(ligne)
|
||||
|
||||
logger.info(f"Ligne ajoutée dans ocr_traduction.txt pour {image_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur sauvegarde OCR+TRAD pour {image_path}: {e}")
|
||||
logger.error(f"Erreur lors de la sauvegarde OCR+TRAD pour {image_path}: {e}")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user