llm_ticket3/llm_classes/llama_vision.py
2025-04-30 17:45:17 +02:00

133 lines
3.9 KiB
Python

"""
Module for Llama Vision support.
Optimized for English-only mode, eliminating intermediate translations.
"""
import os
import json
import logging
import base64
import requests
from typing import Dict, Any
from .base_llm import BaseLLM
logger = logging.getLogger("LlamaVision")
class LlamaVision(BaseLLM):
"""
Interface class with Llama Vision model via its API.
Optimized to work exclusively in English.
"""
def __init__(self, modele: str = "llama3.2-vision:90b-instruct-q8_0"):
super().__init__(modele)
self.configurer()
self.request_timeout = 600
logger.info(f"Initializing LlamaVision with model {modele}")
def urlBase(self) -> str:
return "http://217.182.105.173:11434/"
def cleAPI(self) -> str:
return ""
def urlFonction(self) -> str:
return "api/generate"
def _encoder_image_base64(self, chemin_image: str) -> str:
try:
with open(chemin_image, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
except Exception as e:
logger.error(f"Erreur d'encodage Base64 sur {chemin_image}: {e}")
return ""
def _traiter_reponse(self, reponse: requests.Response) -> str:
try:
data = reponse.json()
return data.get("response", "")
except Exception as e:
logger.error(f"Erreur de parsing réponse JSON: {e}")
return ""
def interroger(self, prompt: str) -> str:
"""
Interroge Llama Vision avec un prompt texte uniquement (pas d'image).
"""
try:
donnees = {
"model": self.modele,
"prompt": prompt,
"options": self.params,
"stream": False
}
url = self.urlBase() + self.urlFonction()
response = requests.post(
url,
json=donnees,
timeout=self.request_timeout
)
if response.status_code not in [200, 201]:
logger.error(f"Erreur API {response.status_code}: {response.text}")
return ""
return self._traiter_reponse(response)
except Exception as e:
logger.error(f"Erreur lors de l'interrogation textuelle : {e}")
return ""
def interroger_avec_image(self, chemin_image: str, prompt: str) -> str:
"""
Interroge Llama Vision avec une image via le champ `images`.
Prompt texte simple, image envoyée en parallèle.
"""
if not os.path.exists(chemin_image):
logger.error(f"Image introuvable: {chemin_image}")
return ""
try:
base64_image = self._encoder_image_base64(chemin_image)
donnees = {
"model": self.modele,
"prompt": prompt, # Texte simple (pas besoin de <image>)
"images": [base64_image], # ✅ Champ images utilisé
"options": self.params,
"stream": False
}
url = self.urlBase() + self.urlFonction()
response = requests.post(
url,
json=donnees,
timeout=self.request_timeout
)
if response.status_code not in [200, 201]:
logger.error(f"Erreur API {response.status_code}: {response.text}")
return ""
return self._traiter_reponse(response)
except Exception as e:
logger.error(f"Erreur lors de l'interrogation multimodale : {e}")
return ""
def _preparer_contenu(self, prompt: str) -> Dict[str, Any]:
"""
Prépare les données de la requête pour un prompt texte uniquement (sans image).
"""
return {
"model": self.modele,
"prompt": prompt,
"options": self.params,
"stream": False
}