llm_ticket3/llm_classes/llama_vision.py
2025-04-11 10:53:54 +02:00

144 lines
4.9 KiB
Python

from .base_llm import BaseLLM
import requests
import base64
import os
from PIL import Image
import io
from datetime import datetime, timedelta
from typing import Dict, Any
class LlamaVision(BaseLLM):
"""
Classe optimisée pour interagir avec l'API Llama Vision.
"""
def __init__(self, modele: str = "llama3.2-vision:90b-instruct-q8_0"):
super().__init__(modele)
self.params: Dict[str, Any] = {
"temperature": 0.2,
"top_p": 1,
"max_tokens": 4000,
"presence_penalty": 0,
"frequency_penalty": 0,
"stop": []
}
def urlBase(self) -> str:
"""
Retourne l'URL de base de l'API Llama.
"""
return "https://api.llama3.ai/v1/"
def cleAPI(self) -> str:
"""
Retourne la clé API pour Llama.
"""
return os.getenv("LLAMA_API_KEY", "")
def urlFonction(self) -> str:
"""
Retourne l'URL spécifique pour Llama.
"""
return "chat/completions"
def _preparer_contenu(self, question: str) -> Dict[str, Any]:
"""
Prépare le contenu de la requête spécifique pour Llama.
"""
contenu = {
"model": self.modele,
"messages": [
{"role": "system", "content": self.prompt_system},
{"role": "user", "content": question}
],
"temperature": self.params["temperature"],
"top_p": self.params["top_p"],
"max_tokens": self.params["max_tokens"],
"presence_penalty": self.params["presence_penalty"],
"frequency_penalty": self.params["frequency_penalty"],
"stop": self.params["stop"]
}
return contenu
def _traiter_reponse(self, reponse: requests.Response) -> str:
"""
Traite et retourne la réponse fournie par Llama.
"""
data = reponse.json()
return data["choices"][0]["message"]["content"]
def _encoder_image_base64(self, image_path: str) -> str:
"""
Encode une image en base64 pour l'API Llama Vision.
"""
with open(image_path, "rb") as image_file:
encoded = base64.b64encode(image_file.read()).decode("utf-8")
ext = os.path.splitext(image_path)[1].lower().replace(".", "")
mime = f"image/{ext}" if ext in ["png", "jpeg", "jpg", "webp"] else "image/jpeg"
return f"data:{mime};base64,{encoded}"
def interroger_avec_image(self, image_path: str, question: str) -> str:
"""
Interroge le modèle Llama Vision avec une image et une question.
Args:
image_path: Chemin vers l'image à analyser
question: Question ou instructions pour l'analyse
Returns:
Réponse du modèle à la question
"""
url = self.urlBase() + self.urlFonction()
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.cleAPI()}"
}
try:
encoded_image = self._encoder_image_base64(image_path)
contenu = {
"model": self.modele,
"messages": [
{"role": "system", "content": self.prompt_system},
{
"role": "user",
"content": [
{"type": "text", "text": question},
{"type": "image_url", "image_url": {"url": encoded_image}}
]
}
],
"temperature": self.params["temperature"],
"top_p": self.params["top_p"],
"max_tokens": self.params["max_tokens"],
"presence_penalty": self.params["presence_penalty"],
"frequency_penalty": self.params["frequency_penalty"],
"stop": self.params["stop"]
}
self.heureDepart = datetime.now()
response = requests.post(url=url, headers=headers, json=contenu, timeout=180)
self.heureFin = datetime.now()
if self.heureDepart is not None:
self.dureeTraitement = self.heureFin - self.heureDepart
else:
self.dureeTraitement = timedelta(0)
if response.status_code in [200, 201]:
self.reponseErreur = False
return self._traiter_reponse(response)
else:
self.reponseErreur = True
return f"Erreur API ({response.status_code}): {response.text}"
except Exception as e:
self.heureFin = datetime.now()
if self.heureDepart is not None:
self.dureeTraitement = self.heureFin - self.heureDepart
else:
self.dureeTraitement = timedelta(0)
self.reponseErreur = True
return f"Erreur lors de l'analyse de l'image: {str(e)}"