llm_lab/core/llama_vision90b.py
2025-03-26 15:05:21 +01:00

58 lines
2.1 KiB
Python

from core.base_llm import BaseLLM
import requests
import json
import os
from deep_translator import GoogleTranslator
class LlamaVision90B(BaseLLM):
def __init__(self):
model_name = "llama3.2-vision:90b"
engine = "Ollama"
self.api_url = "http://217.182.105.173:11434/api/chat"
default_params = {
"temperature": 0.3, #Créativité basse pour analyse technique
"top_p": 1.0, # Conserve toute la distribution
"top_k": 40, #Limite vocabulaire
"repeat_penalty": 1.1, #Réduction des répétitions
"num_predict": 512, #longueur max sortie
"num-ctx": 4096, #Contexte étendu
"format": "json", #Réponse structurée JSON (optionnel)
"stream": False, #Réponse d'un seul bloc
"raw": False, #laisse le formatage systèmes
"keep_alive": "5m" #Durée de vie de la connexion
}
super().__init__(model_name=model_name, engine=engine, base_params=default_params)
def generate(self, user_prompt: str, images: list = None, translate: bool = False):
prompt = self._format_prompt(user_prompt)
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": prompt,
"images": images if images else []
}
],
"options": self.params,
"stream": self.params.get("stream", False)
}
response = requests.post(self.api_url, json=payload)
if not response.ok:
raise Exception(f"Erreur API Ollama : {response.status_code} - {response.text}")
result_data = response.json()
result_text = result_data.get("message", {}).get("content", "")
self._log_result(user_prompt, result_text)
if translate:
result_fr = GoogleTranslator(source="auto", target="fr").translate(result_text)
return result_text, result_fr
return result_text