From 1b8776552c9c1964bac79a1dc1a70c18f58695ea Mon Sep 17 00:00:00 2001 From: Ladebeze66 Date: Tue, 6 May 2025 17:09:05 +0200 Subject: [PATCH] 0605-17:09 --- CSV/T11123/T11123.csv | 5 + CSV/T11142/T11142.csv | 3 + CSV/T11143/T11143.csv | 4 + agents/RAG/agent_rag_responder.py | 39 +++ agents/llama_vision/agent_vision_ocr.bak2 | 223 ++++++++++++++++ prompts/test_prompt_ocr4.txt | 231 ++++++++++++++++ prompts/test_prompt_ocr5.txt | 306 ++++++++++++++++++++++ 7 files changed, 811 insertions(+) create mode 100644 CSV/T11123/T11123.csv create mode 100644 CSV/T11142/T11142.csv create mode 100644 CSV/T11143/T11143.csv create mode 100644 agents/RAG/agent_rag_responder.py create mode 100644 agents/llama_vision/agent_vision_ocr.bak2 create mode 100644 prompts/test_prompt_ocr4.txt create mode 100644 prompts/test_prompt_ocr5.txt diff --git a/CSV/T11123/T11123.csv b/CSV/T11123/T11123.csv new file mode 100644 index 0000000..f214e59 --- /dev/null +++ b/CSV/T11123/T11123.csv @@ -0,0 +1,5 @@ +ÉMETTEUR,TYPE,DATE,CONTENU,ÉLÉMENTS VISUELS +CLIENT,question,28/03/2025 15:00,Les parties d’ouvrage n’apparaissent plus.,"L'image montre une interface de gestion de chantier fonctionnelle, mais aucune mention explicite des ""parties d’ouvrage"" n'est visible." +SUPPORT,réponse,28/03/2025 15:59,"Nous venons d'appliquer un correctif sur votre site, les parties de chantier sont de nouveau accessibles.",L'image confirme que l'interface est accessible avec des champs modifiables et des boutons actifs. +CLIENT,question,28/03/2025 16:02,Je ne peux plus accéder à CBAO. +SUPPORT,réponse,28/03/2025 16:06,Votre site est bien accessible à l'adresse suivante : https://nob.brg-lab.com/.,"L'image confirme que le client ""NORD OUEST BETON"" est bien visible dans la liste des laboratoires." diff --git a/CSV/T11142/T11142.csv b/CSV/T11142/T11142.csv new file mode 100644 index 0000000..adb544d --- /dev/null +++ b/CSV/T11142/T11142.csv @@ -0,0 +1,3 @@ +ÉMETTEUR,TYPE,DATE,CONTENU,ÉLÉMENTS VISUELS +CLIENT,question,02/04/2025 15:52,Il est impossible de mettre des décimales sur la valeur de CO2/Tonne dans les caractéristiques des granulats de carrière.,Aucune image disponible +SUPPORT,réponse,03/04/2025 07:44,Nous constatons bien ce dysfonctionnement. Un ticket a été ouvert auprès de notre équipe de développement. Vous serez automatiquement informé de sa résolution.,Aucune image disponible diff --git a/CSV/T11143/T11143.csv b/CSV/T11143/T11143.csv new file mode 100644 index 0000000..ecaecb6 --- /dev/null +++ b/CSV/T11143/T11143.csv @@ -0,0 +1,4 @@ +ÉMETTEUR,TYPE,DATE,CONTENU,ÉLÉMENTS VISUELS +CLIENT,question,03/04/2025 08:34,"Bonjour, Je ne parviens pas à accéder au l’essai au bleu. Merci par avance pour votre. Cordialement",Essai au bleu de méthylène de méthylène (MB) - NF EN 933-9 (02-2022) +SUPPORT,réponse,03/04/2025 12:17,"Bonjour, Pouvez-vous vérifier si vous avez bien accès à la page suivante en l'ouvrant dans votre navigateur : https://zk1.brg-lab.com/ Voici ce que vous devriez voir affiché : Si ce n'est pas le cas, pouvez-vous me faire une capture d'écran de ce qui est affiché? Je reste à votre entière disposition pour toute information complémentaire. Cordialement,","Page d'accueil par défaut d'Apache Tomcat avec le message ""It works!""" +CLIENT,information,03/04/2025 12:21,"Bonjour, Le problème s’est résolu seul par la suite. Je vous remercie pour votre retour. Bonne journée PS : l’adresse fonctionne",Essai au bleu de méthylène de méthylène (MB) - NF EN 933-9 (02-2022) diff --git a/agents/RAG/agent_rag_responder.py b/agents/RAG/agent_rag_responder.py new file mode 100644 index 0000000..bd3523e --- /dev/null +++ b/agents/RAG/agent_rag_responder.py @@ -0,0 +1,39 @@ +# agents/RAG/agent_rag_responder.py + +from ..base_agent import BaseAgent + +class AgentRagResponder(BaseAgent): + def __init__(self, llm, ragflow): + """ + Initialise l'agent avec un LLM et un client Ragflow. + """ + super().__init__("AgentRagResponder", llm) + self.ragflow = ragflow + + def executer(self, question: str, top_k: int = 5) -> str: + """ + Effectue une recherche dans Ragflow et interroge le LLM avec le contexte. + """ + # 1. Recherche les documents similaires + documents = self.ragflow.rechercher(question, top_k=top_k) + + if not documents: + return "Aucun document pertinent trouvé dans la base de connaissance." + + # 2. Construit le contexte à envoyer au LLM + contexte = "\n---\n".join([doc.get("content", "") for doc in documents]) + + # 3. Construit le prompt final + prompt = ( + f"Voici des extraits de documents liés à la question suivante :\n\n" + f"{contexte}\n\n" + f"Réponds de manière précise et complète à la question suivante :\n{question}" + ) + + # 4. Interroge le LLM + reponse = self.llm.interroger(prompt) + + # 5. Ajoute à l'historique (facultatif) + self.ajouter_historique(prompt, reponse) + + return reponse diff --git a/agents/llama_vision/agent_vision_ocr.bak2 b/agents/llama_vision/agent_vision_ocr.bak2 new file mode 100644 index 0000000..a9557f8 --- /dev/null +++ b/agents/llama_vision/agent_vision_ocr.bak2 @@ -0,0 +1,223 @@ +import os +import json +import logging +from datetime import datetime +from typing import Optional + +from ..base_agent import BaseAgent +from ..utils.pipeline_logger import sauvegarder_donnees +from utils.ocr_cleaner import clean_text_with_profiles # AJOUT +from utils.ocr_utils import extraire_texte_fr + + +logger = logging.getLogger("AgentVisionOCR") + +class AgentVisionOCR(BaseAgent): + """ + Agent LlamaVision qui extrait du texte (OCR avancé) depuis une image. + """ + + def __init__(self, llm): + super().__init__("AgentVisionOCR", llm) + + # Configuration des paramètres du modèle + self.params = { + "stream": False, + "seed": 0, + #"stop_sequence": [], + "temperature": 1.3, + #"reasoning_effort": 0.5, + #"logit_bias": {}, + "mirostat": 0, + "mirostat_eta": 0.1, + "mirostat_tau": 5.0, + "top_k": 35, + "top_p": 0.85, + "min_p": 0.06, + "frequency_penalty": 0.15, + "presence_penalty": 0.1, + "repeat_penalty": 1.15, + "repeat_last_n": 128, + "tfs_z": 1.0, + "num_keep": 0, + "num_predict": 2048, + "num_ctx": 16384, + #"repeat_penalty": 1.1, + "num_batch": 2048, + #"mmap": True, + #"mlock": False, + #"num_thread": 4, + #"num_gpu": 1 + } + + # Prompt OCR optimisé + self.system_prompt = ("""You are tasked with performing an exhaustive OCR extraction on a technical or administrative web interface screenshot. + +GOAL: Extract **every legible piece of text**, even partially visible, faded, or cropped. Structure your output for clarity. Do not guess, but always report what is visible. + +FORMAT USING THESE CATEGORIES: + +1. PAGE STRUCTURE +- Page titles +- Interface headers or section labels +- Navigation bars or visible URLs + +2. IDENTIFIERS & DATA +- Operator or user names +- Sample IDs, test references +- Materials, dates, batch numbers + +3. INTERFACE ELEMENTS (MANDATORY SCAN) +- Button labels (e.g., RAZ, SAVE) +- Tabs (e.g., MATERIAL, OBSERVATIONS) +- Sidebars, form field labels + +4. SYSTEM MESSAGES +- Connection or server errors +- Domains, IP addresses, server notices + +5. METADATA +- Standard references (e.g., "NF EN ####-#") +- Version numbers, document codes, timestamps + +6. UNCLEAR / CROPPED TEXT +- Logos, partial lines (use “[...]” for truncated) +- Background/faded elements, labels not fully legible + +RULES: +- Preserve punctuation, case, accents exactly. +- Include duplicates if text appears more than once. +- Never skip faint or partial text; use “[...]” if incomplete. +- Even if cropped, report as much as possible from any UI region. + +This prompt is designed to generalize across all web portals, technical forms, or reports. Prioritize completeness over certainty. Do not ignore UI components or system messages. +""") + + self._configurer_llm() + self.resultats = [] + self.images_traitees = set() + logger.info("AgentVisionOCR initialisé avec prompt amélioré.") + + def _configurer_llm(self): + if hasattr(self.llm, "prompt_system"): + self.llm.prompt_system = self.system_prompt + if hasattr(self.llm, "configurer"): + self.llm.configurer(**self.params) + + def _extraire_ticket_id(self, image_path): + if not image_path: + return "UNKNOWN" + segments = image_path.replace('\\', '/').split('/') + for segment in segments: + if segment.startswith('T') and segment[1:].isdigit(): + return segment + if segment.startswith('ticket_T') and segment[8:].isdigit(): + return 'T' + segment[8:] + return "UNKNOWN" + + def executer(self, image_path: str, ocr_baseline: str = "", ticket_id: Optional[str] = None) -> dict: + image_path_abs = os.path.abspath(image_path) + image_name = os.path.basename(image_path) + + if image_path_abs in self.images_traitees: + logger.warning(f"[OCR-LLM] Image déjà traitée, ignorée: {image_name}") + print(f" AgentVisionOCR: Image déjà traitée, ignorée: {image_name}") + return { + "extracted_text": "DUPLICATE - Already processed", + "image_name": image_name, + "image_path": image_path_abs, + "ticket_id": ticket_id or self._extraire_ticket_id(image_path), + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "source_agent": self.nom, + "is_duplicate": True + } + + self.images_traitees.add(image_path_abs) + logger.info(f"[OCR-LLM] Extraction OCR sur {image_name}") + print(f" AgentVisionOCR: Extraction OCR sur {image_name}") + + ticket_id = ticket_id or self._extraire_ticket_id(image_path) + + try: + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image introuvable: {image_path}") + + if not hasattr(self.llm, "interroger_avec_image"): + raise RuntimeError("Le modèle ne supporte pas l'analyse d'images.") + # Etape 1: OCR br + # Interroger le modèle + response = self.llm.interroger_avec_image(image_path, self.system_prompt) + + if not response or "i cannot" in response.lower(): + raise ValueError("Réponse vide ou invalide du modèle") + + cleaned_text = clean_text_with_profiles(response.strip(), active_profiles=("ocr",)) + + model_name = getattr(self.llm, "pipeline_normalized_name", + getattr(self.llm, "modele", "llama3-vision-90b-instruct")) + model_name = model_name.replace(".", "-").replace(":", "-").replace("_", "-") + + result = { + "extracted_text": cleaned_text, + "image_name": image_name, + "image_path": image_path_abs, + "ticket_id": ticket_id, + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "source_agent": self.nom, + "model_info": { + "model": model_name, + **self.params + } + } + + self.resultats.append(result) + logger.info(f"[OCR-LLM] OCR réussi ({len(cleaned_text)} caractères) pour {image_name}") + return result + + except Exception as e: + error_result = { + "extracted_text": "", + "image_name": image_name, + "image_path": image_path_abs, + "ticket_id": ticket_id or "UNKNOWN", + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "source_agent": self.nom, + "error": str(e), + "model_info": { + "model": getattr(self.llm, "pipeline_normalized_name", "llama3-vision-90b-instruct"), + **self.params + } + } + self.resultats.append(error_result) + logger.error(f"[OCR-LLM] Erreur OCR pour {image_name}: {e}") + return error_result + + def sauvegarder_resultats(self, ticket_id: str = "T11143") -> None: + if not self.resultats: + logger.warning("[OCR-LLM] Aucun résultat à sauvegarder") + return + + resultats_dedupliques = {} + for resultat in self.resultats: + image_path = resultat.get("image_path") + if not image_path: + continue + if image_path not in resultats_dedupliques or \ + resultat.get("timestamp", "") > resultats_dedupliques[image_path].get("timestamp", ""): + resultats_dedupliques[image_path] = resultat + + resultats_finaux = list(resultats_dedupliques.values()) + + try: + logger.info(f"[OCR-LLM] Sauvegarde de {len(resultats_finaux)} résultats") + sauvegarder_donnees( + ticket_id=ticket_id, + step_name="ocr_llm", + data=resultats_finaux, + base_dir=None, + is_resultat=True + ) + self.resultats = [] + self.images_traitees = set() + except Exception as e: + logger.error(f"[OCR-LLM] Erreur sauvegarde résultats: {e}") diff --git a/prompts/test_prompt_ocr4.txt b/prompts/test_prompt_ocr4.txt new file mode 100644 index 0000000..34e359e --- /dev/null +++ b/prompts/test_prompt_ocr4.txt @@ -0,0 +1,231 @@ +Prompt de base: +You are tasked with performing a high-precision OCR extraction on a partially cropped screenshot of a technical web interface. + +GOAL: Extract **all visible and partially visible text** from the image, no matter how small, faint, or cropped. Structure the output clearly to maximize usability. + +FORMAT THE OUTPUT AS FOLLOWS: + +--- + +1. PAGE STRUCTURE +* Page title or headers (e.g., test names, document references) +* Section labels or grouping titles +* URL or navigation path (if visible) + +2. LABORATORY DATA +* Sample references or IDs (e.g., 25-00075) +* Material descriptions (e.g., Sable 0/2 C) +* Dates, operators (e.g., 02/04/2025 – BOLLÉE Victor) + +3. UI ELEMENTS +* Sidebar text, tabs, and form labels +* Buttons (e.g., RAZ, IMPRIMER, ENREGISTRER) +* Navigation or functional labels (e.g., ESSAI, SMQ) + +4. SYSTEM MESSAGES & ERRORS +* Any status or error message (e.g., "Impossible de trouver l'adresse IP du serveur de zkt1.brg-lab.com") +* Domain names, IPs, server paths + +5. METADATA +* Document standards (e.g., NF EN 933-9) +* Version/date (e.g., 02-2022) +* Any hidden identifiers, codes, or repeated numbers + +6. PERIPHERAL / UNCERTAIN TEXT +* Logos (e.g., BRG-LAB), watermarks, faded/blurred text +* Partially cropped lines: use “[...]” to indicate incomplete text + +RULES: +- Do NOT interpret or translate. +- Preserve all punctuation, accents, and casing. +- Include duplicated text if present multiple times. +- Mark unclear/partial lines explicitly. +- Report even faint or background text. + +This is a high-precision extraction. Output clean bullet points grouped by section. Stay fully factual. + +Résultats: +🔵 Résultat pour l'image output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png: +Texte extrait: +### 1\. Page Structure + +\- **Page title**: Essai au bleu de méthylène \(MB\) - NF EN 933-9 \((02\)-2022) + +\- **Header/Section labels**: + +\* BRG-LAB logo (top left) + +\* "Fournisseur Labo:" label with dropdown menu (right side) + +\- URL/path: zkt1.brg-lab.com (from the error message at the bottom) [](https://zk1.brg-lab.com) + + + +### 2\. Laboratory Data + +\- Sample reference: 25-00075 + +\- Material description: Sable 0/2 C + +\- Date and operator: 02/04/2025 - BOLLÉE Victor + + + +### 3\. UI Elements + +\- Sidebar text/tabs/form labels: + + + +\+ **Tab bar**: + +\# Essai | Matériel | Observations | SMQ + + + +\* Form fields: + + \+ "ESSAI" + + - N° de fiche \(=\) ... (partially visible) + + \+ "MATERIEL" / "Matériau": Sable 0/2 C + + \+ "OBSERVATIONS" with empty field + + \+ "SMQ" with an empty dropdown menu + + + +\* Buttons: RAZ, IMPRIMER, ENREGISTRER + + + +### 4\. System Messages & Errors + +\- Status/error message at the bottom of the page: + +Impossible de trouver l'adresse IP du serveur zk1.brg-lab.com. + + + +### 5\. Metadata + +\- Document standard/version/date: NF EN 933-9 \(02\)-2022 + + + +### 6. Peripheral/Uncertain Text + + + +\- BRG-LAB logo (top left) + +\- Faded/blurred text near buttons (right side) - partially readable words include "[...]tél", "dossier", and parts of what seems to be a form submission button or similar ("[...]" ) + + +Prompt corrigé sans éléments spécifiques: +"""You are tasked with performing a high-precision OCR extraction on a partially cropped screenshot of a technical web interface. + +GOAL: Extract **all visible and partially visible text** from the image, no matter how small, faint, or cropped. Structure the output clearly to maximize usability. + +FORMAT THE OUTPUT AS FOLLOWS: + +--- + +1. PAGE STRUCTURE +* Page title or headers (e.g., test names, document references) +* Section labels or grouping titles +* URL or navigation path (if visible) + +2. LABORATORY DATA +* Sample references or IDs +* Material descriptions +* Dates, operators + +3. UI ELEMENTS +* Sidebar text, tabs, and form labels +* Buttons +* Navigation or functional labels + +4. SYSTEM MESSAGES & ERRORS +* Any status or error message +* Domain names, IPs, server paths + +5. METADATA +* Document standards +* Version/date ) +* Any hidden identifiers, codes, or repeated numbers + +6. PERIPHERAL / UNCERTAIN TEXT +* Logos (e.g., BRG-LAB), watermarks, faded/blurred text +* Partially cropped lines: use “[...]” to indicate incomplete text + +RULES: +- Do NOT interpret or translate. +- Preserve all punctuation, accents, and casing. +- Include duplicated text if present multiple times. +- Mark unclear/partial lines explicitly. +- Report even faint or background text. + +This is a high-precision extraction. Output clean bullet points grouped by section. Stay fully factual. +""" +Résultats: +🔵 Résultat pour l'image output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png: +Texte extrait: +### **PAGE STRUCTURE** + +#### *Page title or headers* + +* Essai au bleu de méthylène (MB) - NF EN 933-9 (02-2022) +* BOLLEE Victor +* PRELEVEUR: 25/06/75 +* LABORATOIRE DE CONTROLE ET D'ANALYSE DES MATERIAUX + +#### *Section labels or grouping titles*: None visible. + + + +### **LABORATORY DATA** + +* Sample references or IDs: + * N/A, no samples were referenced in the image provided. +* Material descriptions: + * Echantillon n° 25-00073 réceptionné le 04/05/2025 par BOLLÉE Victo + (sample number partially obscured due to cropping.) + * Matériau Sable C7C - CARRETERE ADCEG +* Dates and operators: + + Date not legible; operator unclear. + + * Prélevée la 02/06/2025 p + +### **UI ELEMENTS** + + + +#### \_Sidebar text, tabs, and form labels: + +* ESSAI +* MATÉRIEL +* PORTFOLIO + +#### Buttons: None visible. + + + +### **SYSTEM MESSAGES & ERRORS** + +None apparent from the image provided. The partial nature of the screenshot precludes any conclusive assessment in this regard. + + + +### PERIPHERAL / UNCERTAIN TEXT: + + + +Logo or watermark: BRG-LAB (cropped due to formatting) + + + +Note: This extraction strictly adheres to visible content without interpretation. Some fields are left blank where data is partially obscured by cropping or not discernible from the given snapshot. \ No newline at end of file diff --git a/prompts/test_prompt_ocr5.txt b/prompts/test_prompt_ocr5.txt new file mode 100644 index 0000000..8c2cd84 --- /dev/null +++ b/prompts/test_prompt_ocr5.txt @@ -0,0 +1,306 @@ +You are tasked with performing a high-precision OCR extraction on a screenshot of a technical or administrative web interface. + +GOAL: Extract all visible and partially visible text — no matter how small, faint, or cropped. Remain strictly factual. Do not interpret, guess, or reword. + +📄 FORMAT THE OUTPUT USING THESE CATEGORIES: + +--- + +1. PAGE STRUCTURE +* Page title(s) +* Section or interface headers +* Visible URLs, tabs, or menu paths + +2. DATA & IDENTIFIERS +* Sample codes, test references, user/operator names +* Material or item descriptions +* Dates, times, unique identifiers + +3. INTERFACE ELEMENTS +* Button labels +* Tab names +* Sidebar/menu content +* Field or dropdown labels + +4. SYSTEM MESSAGES & ERRORS +* Status messages, warnings, or connection errors +* Domain names, IPs, server notices + +5. METADATA +* Version numbers, standard references, document codes +* Any duplicated text or footer content + +6. UNCLEAR OR CROPPED TEXT +* Logos, watermarks, truncated words or symbols +* Use “[...]” to mark incomplete or partially cropped text + +--- + +RULES: +- Do not translate or paraphrase. +- Preserve original casing, spelling, punctuation. +- Include repeated elements as they appear. +- Report faint or background text if legible. +- Leave blank sections if no relevant text is found. + +This prompt is designed to work across a wide range of web interfaces, dashboards, and structured forms. Output clearly grouped bullet points per section. + +Résultats: +🔵 Paramètres actifs LLM: +{ + "temperature": 1.3, + "top_p": 0.85, + "presence_penalty": 0.1, + "frequency_penalty": 0.15, + "stop": [], + "stream": false, + "n": 1, + "seed": 0, + "mirostat": 0, + "mirostat_eta": 0.1, + "mirostat_tau": 5.0, + "top_k": 35, + "min_p": 0.06, + "repeat_penalty": 1.15, + "repeat_last_n": 128, + "tfs_z": 1.0, + "num_keep": 0, + "num_predict": 2048, + "num_ctx": 16384, + "num_batch": 2048 +} + AgentVisionOCR: Extraction OCR sur image_145435.png + +🔵 Résultat pour l'image output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png: +Texte extrait: +### 1) PAGE STRUCTURE + +* **Page title:** Essai au bleu de méthylène (MB) +* **Section/Interface headers:** + * BRG-LAB +* **Visible URLs, tabs, menu paths:** + * No visible URLs are displayed in the image. + +### 2) DATA & IDENTIFIERS + +* **Sample codes/test references/user/operator names:** + * BOLLEE Victor + * Echantillon n° 25-00075 réceptionné le 02/04/2025 par BOLLEE Victor. +* **Material/item descriptions:** + * Blue Methylene MB Testing Report + +**Note:** There is no mention of dates or times within the given page snippet. Additionally, there seems to be no unique identifiers on this web page interface screenshot provided. + +### 3) INTERFACE ELEMENTS + +This section cannot be determined based on the information provided in the image. + +### 4) SYSTEM MESSAGES & ERRORS + +There are no system messages or errors present in this snapshot as it only provides a view of what appears to be an itemized report rather than any kind of error message. + +### 5) METADATA + +No metadata elements can be identified from the image alone such as version numbers standard references document codes etcetera without having access full context surrounding these items listed under "BRG-LAB". As far we know all text shown here pertains directly related subject matter topic discussed above thus falling outside scope defined categories listed above according guidelines outlined prompt. + +Nouveau prompt: +You are tasked with performing a high-precision OCR extraction on a screenshot of a technical or administrative web interface. + +GOAL: Extract all visible and partially visible text — no matter how small, faint, or cropped. Remain strictly factual. Do not interpret, guess, or reword. + +📄 FORMAT THE OUTPUT USING THESE CATEGORIES: + +--- + +1. PAGE STRUCTURE +* Page titles (e.g., test names or document references) +* Interface headers, menus or section titles +* Navigation breadcrumbs, web paths, or visible URLs + +2. DATA & IDENTIFIERS +* Sample numbers, user names, dates +* Material types or item descriptions +* Unique codes or test references + +3. INTERFACE ELEMENTS +* Button labels (e.g., RESET, PRINT, SAVE) +* Tabs or navigation elements (e.g., RESULTS, MATERIAL, OBSERVATIONS) +* Form field labels or sidebar sections + +4. SYSTEM MESSAGES & ERRORS +* Status or connection errors (e.g., “unable to connect”, IP/domain errors) +* Visible domain names, system paths + +5. METADATA +* Document codes, version numbers, dates (e.g., standard formats like “NF EN ####-#”) +* Any repeated or footer elements + +6. UNCLEAR / CROPPED TEXT +* Logos, watermarks, or faded UI elements +* Truncated words or text: use “[...]” to indicate partial content + +--- + +RULES: +- Do not interpret or translate. +- Preserve all accents, casing, punctuation. +- Include all duplicates or repeated labels. +- Capture system messages even if incomplete or faded. +- Leave a section empty only if no corresponding text is found. + +Output a clean, bullet-pointed list per section. Be exhaustive and literal. + +Résultats: +🔵 Paramètres actifs LLM: +{ + "temperature": 1.3, + "top_p": 0.85, + "presence_penalty": 0.1, + "frequency_penalty": 0.15, + "stop": [], + "stream": false, + "n": 1, + "seed": 0, + "mirostat": 0, + "mirostat_eta": 0.1, + "mirostat_tau": 5.0, + "top_k": 35, + "min_p": 0.06, + "repeat_penalty": 1.15, + "repeat_last_n": 128, + "tfs_z": 1.0, + "num_keep": 0, + "num_predict": 2048, + "num_ctx": 16384, + "num_batch": 2048 +} + AgentVisionOCR: Extraction OCR sur image_145435.png + +🔵 Résultat pour l'image output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png: +Texte extrait: +### Page Structure +Page title: Essai au bleu de méthylène (MB) - NF EN 933-9 (02-2022) +Navigation breadcrumbs/path: none visible + +### Data & Identifiers +Sample numbers/usernames/dates/material types/item descriptions/unique codes/test references: +none are clearly readable in the provided image snippet; however, it appears there might be data within tables or sections not fully captured in this view. + +### Interface Elements +Button labels/tabs/navigation elements/form field labels/sidebar sections: None explicitly identifiable from the given portion of the interface screenshot. + +### System Messages and Errors +Status/connection errors/domain names/system paths: None evident from the visible part of the page. + +### Metadata +Document codes/version numbers/dates/repeated/footer elements: +NF EN 933-9 is mentioned which suggests a European standard reference but lacks specifics like date without further context. No version number, repeated text, or footer content is visible due to cropping. + +### Unclear/Cropped Text +Logos/watermarks/faded UI elements/truncated words/text with “[...]” indicating partial content: +Due to significant portions being cropped out by the edges of this image snippet, several pieces of information are truncated including what seems to be a document title/standard reference and possibly interface labels or data entries. The full text for these items cannot be determined from the provided visual alone. + +Autre prompt: +You are tasked with performing an exhaustive OCR extraction on a technical or administrative web interface screenshot. + +GOAL: Extract **every legible piece of text**, even partially visible, faded, or cropped. Structure your output for clarity. Do not guess, but always report what is visible. + +📄 FORMAT USING THESE CATEGORIES: + +--- + +1. PAGE STRUCTURE +- Page titles +- Interface headers or section labels +- Navigation bars or visible URLs + +2. IDENTIFIERS & DATA +- Operator or user names +- Sample IDs, test references +- Materials, dates, batch numbers + +3. INTERFACE ELEMENTS (MANDATORY SCAN) +- Button labels (e.g., RAZ, SAVE) +- Tabs (e.g., MATERIAL, OBSERVATIONS) +- Sidebars, form field labels + +4. SYSTEM MESSAGES +- Connection or server errors +- Domains, IP addresses, server notices + +5. METADATA +- Standard references (e.g., "NF EN ####-#") +- Version numbers, document codes, timestamps + +6. UNCLEAR / CROPPED TEXT +- Logos, partial lines (use “[...]” for truncated) +- Background/faded elements, labels not fully legible + +--- + +RULES: +- Preserve punctuation, case, accents exactly. +- Include duplicates if text appears more than once. +- Never skip faint or partial text; use “[...]” if incomplete. +- Even if cropped, report as much as possible from any UI region. + +This prompt is designed to generalize across all web portals, technical forms, or reports. Prioritize completeness over certainty. Do not ignore UI components or system messages. + +Résultats: +🔵 Paramètres actifs LLM: +{ + "temperature": 1.3, + "top_p": 0.85, + "presence_penalty": 0.1, + "frequency_penalty": 0.15, + "stop": [], + "stream": false, + "n": 1, + "seed": 0, + "mirostat": 0, + "mirostat_eta": 0.1, + "mirostat_tau": 5.0, + "top_k": 35, + "min_p": 0.06, + "repeat_penalty": 1.15, + "repeat_last_n": 128, + "tfs_z": 1.0, + "num_keep": 0, + "num_predict": 2048, + "num_ctx": 16384, + "num_batch": 2048 +} + AgentVisionOCR: Extraction OCR sur image_145435.png + +🔵 Résultat pour l'image output/ticket_T11143/T11143_20250422_084617/attachments/image_145435.png: +Texte extrait: +### **Page Structure:** + +* Page title: "Essai au bleu de méthylène (MB) - NF EN 933-9 (02-2022)" +* Interface header: "RG-LAB" +* Navigation bar/visible URL: Not visible +* Sidebars/form field labels: + * "MATERIEL" + * "OBSERVATIONS" + +### **Identifiers & Data:** + +No legible identifiers/data present in the image. + +### **Interface Elements:** + +* Button labels: None fully visible. One partially cropped button appears to start with an ellipsis "...". + +### **System Messages** + +None are apparent from the interface elements shown, although partial text could suggest a server message or error code ("[...]", "[...]"). + +### **Metadata** + +* Standard references: NF EN 933-9 (02-2022) + +### **Unclear/Cropped Text**: + +The lower section contains a faded URL and some metadata fields that appear not to be filled out or have been intentionally hidden for privacy/security reasons ("[...]"). The top left corner shows part of what might be another standard reference or version number ("RG-LAB") but is too cropped to interpret clearly. + +Nouveau prompt: \ No newline at end of file