llm_ticket3/llm_classes/llama_vision.py
2025-04-29 10:04:52 +02:00

125 lines
3.7 KiB
Python

"""
Module for Llama Vision support.
Optimized for English-only mode, eliminating intermediate translations.
"""
import os
import json
import logging
import time
import base64
import requests
from typing import Dict, Any, Optional, List, Union
from PIL import Image
import io
from datetime import datetime, timedelta
from .base_llm import BaseLLM
logger = logging.getLogger("LlamaVision")
class LlamaVision(BaseLLM):
"""
Interface class with Llama Vision model via its API.
Optimized to work exclusively in English.
"""
def __init__(self, modele: str = "llama3.2-vision:90b-instruct-q8_0"):
super().__init__(modele)
# Default configuration optimized for English
self.configurer(
temperature=0.2,
top_p=0.8,
max_tokens=4000
)
# Request timeout in seconds
self.request_timeout = 600
logger.info(f"Initializing LlamaVision with model {modele} (multilingual mode)")
def urlBase(self) -> str:
"""
Returns the base URL of the Ollama API.
"""
return "http://217.182.105.173:11434/"
def cleAPI(self) -> str:
"""
Ollama doesn't require an API key by default.
"""
return ""
def urlFonction(self) -> str:
"""
Returns the specific Ollama URL for generating a response.
"""
return "api/generate"
def _encoder_image_base64(self, chemin_image: str) -> str:
try:
with open(chemin_image, "rb") as image_file:
encoded = base64.b64encode(image_file.read()).decode("utf-8")
return encoded
except Exception as e:
logger.error(f"Erreur d'encodage Base64 sur {chemin_image}: {e}")
return ""
def _preparer_contenu(self, question: str) -> Dict[str, Any]:
return {
"model": self.modele,
"prompt": question,
"options": self.params,
"stream": False
}
def _traiter_reponse(self, reponse: requests.Response) -> str:
"""
Processes and returns the response provided by Ollama.
"""
data = reponse.json()
return data.get("response", "")
def interroger_avec_image(self, chemin_image: str, prompt: str) -> str:
"""
Interroge le modèle Llama Vision avec une vraie image via multipart/form-data.
"""
if not os.path.exists(chemin_image):
logger.error(f"Image introuvable: {chemin_image}")
return ""
try:
with open(chemin_image, "rb") as f:
fichiers = {
"image": (os.path.basename(chemin_image), f, "application/octet-stream")
}
donnees = {
"prompt": prompt,
"model": self.modele,
**self.params
}
logger.info(f"[LlamaVision] Envoi image {chemin_image} au modèle {self.modele}")
url = self.urlBase() + "v1/vision"
response = requests.post(
url,
files=fichiers,
data=donnees,
timeout=self.request_timeout
)
if response.status_code not in [200, 201]:
logger.error(f"Erreur API {response.status_code}: {response.text}")
return ""
resultats = response.json()
return resultats.get("text", "")
except Exception as e:
logger.error(f"Erreur lors de l'interrogation multimodale: {e}")
return ""