""" Module for Llama Vision support. Optimized for English-only mode, eliminating intermediate translations. """ import os import json import logging import time import base64 import requests from typing import Dict, Any, Optional, List, Union from PIL import Image import io from datetime import datetime, timedelta from .base_llm import BaseLLM logger = logging.getLogger("LlamaVision") class LlamaVision(BaseLLM): """ Interface class with Llama Vision model via its API. Optimized to work exclusively in English. """ def __init__(self, modele: str = "llama3.2-vision:90b-instruct-q8_0"): super().__init__(modele) # Default configuration optimized for English self.configurer( temperature=0.2, top_p=0.8, max_tokens=4000 ) # Request timeout in seconds self.request_timeout = 600 logger.info(f"Initializing LlamaVision with model {modele} (multilingual mode)") def urlBase(self) -> str: """ Returns the base URL of the Ollama API. """ return "http://217.182.105.173:11434/" def cleAPI(self) -> str: """ Ollama doesn't require an API key by default. """ return "" def urlFonction(self) -> str: """ Returns the specific Ollama URL for generating a response. """ return "api/generate" def _encoder_image_base64(self, chemin_image: str) -> str: try: with open(chemin_image, "rb") as image_file: encoded = base64.b64encode(image_file.read()).decode("utf-8") return encoded except Exception as e: logger.error(f"Erreur d'encodage Base64 sur {chemin_image}: {e}") return "" def _preparer_contenu(self, question: str) -> Dict[str, Any]: return { "model": self.modele, "prompt": question, "options": self.params, "stream": False } def _traiter_reponse(self, reponse: requests.Response) -> str: """ Processes and returns the response provided by Ollama. """ data = reponse.json() return data.get("response", "") def interroger_avec_image(self, chemin_image: str, prompt: str) -> str: """ Interroge le modèle Llama Vision avec une vraie image via multipart/form-data. """ if not os.path.exists(chemin_image): logger.error(f"Image introuvable: {chemin_image}") return "" try: with open(chemin_image, "rb") as f: fichiers = { "image": (os.path.basename(chemin_image), f, "application/octet-stream") } donnees = { "prompt": prompt, "model": self.modele, **self.params } logger.info(f"[LlamaVision] Envoi image {chemin_image} au modèle {self.modele}") url = self.urlBase() + "v1/vision" response = requests.post( url, files=fichiers, data=donnees, timeout=self.request_timeout ) if response.status_code not in [200, 201]: logger.error(f"Erreur API {response.status_code}: {response.text}") return "" resultats = response.json() return resultats.get("text", "") except Exception as e: logger.error(f"Erreur lors de l'interrogation multimodale: {e}") return ""