ragflow_preprocess/utils/api_ollama.py
2025-03-27 17:59:10 +01:00

534 lines
22 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
API Interface for Ollama
"""
import requests
import json
import base64
import time
import os
import threading
from typing import List, Dict, Any, Optional, Union, Callable
# Verrouillage global pour les appels Ollama
_ollama_lock = threading.Lock()
_model_in_use = None
_last_call_time = 0.0 # Float pour le temps (secondes)
_min_delay_between_calls = 3.0 # Délai minimum en secondes entre les appels à Ollama
class OllamaAPI:
"""
Simplified interface for Ollama API
"""
def __init__(self, base_url: str = "http://217.182.105.173:11434"):
"""
Initialize the API with the server's base URL
Args:
base_url (str): Base URL of the Ollama server
"""
self.base_url = base_url
self.generate_endpoint = f"{self.base_url}/api/generate"
self.chat_endpoint = f"{self.base_url}/api/chat"
self.models_endpoint = f"{self.base_url}/api/tags"
self.timeout = 120 # Increase timeout to 2 minutes
self.max_retries = 2 # Nombre maximum de tentatives pour les requêtes
self.retry_delay = 2 # Délai entre les tentatives en secondes
# Check connection on startup
self._check_connection()
@staticmethod
def wait_for_ollama(model_name: str, timeout: int = 120) -> bool:
"""
Attend que le serveur Ollama soit disponible pour le modèle spécifié
Args:
model_name (str): Nom du modèle à attendre
timeout (int): Délai d'attente maximum en secondes
Returns:
bool: True si le serveur est disponible, False si timeout
"""
global _ollama_lock, _model_in_use, _last_call_time, _min_delay_between_calls
# Calculer le temps à attendre depuis le dernier appel
time_since_last_call = time.time() - _last_call_time
if time_since_last_call < _min_delay_between_calls:
delay = _min_delay_between_calls - time_since_last_call
print(f"Attente de {delay:.1f}s pour respecter le délai minimal entre appels...")
time.sleep(delay)
start_time = time.time()
while True:
with _ollama_lock:
# Si aucun modèle n'est en cours d'utilisation ou si c'est le modèle demandé
if _model_in_use is None:
_model_in_use = model_name
_last_call_time = time.time()
return True
# Si le temps d'attente est dépassé
if time.time() - start_time > timeout:
print(f"Timeout en attendant Ollama pour le modèle {model_name}")
return False
# Attendre et réessayer
wait_time = min(5, (timeout - (time.time() - start_time)))
if wait_time <= 0:
return False
print(f"En attente d'Ollama ({_model_in_use} est en cours d'utilisation)... Nouvel essai dans {wait_time:.1f}s")
time.sleep(wait_time)
@staticmethod
def release_ollama():
"""Libère le verrouillage sur Ollama"""
global _ollama_lock, _model_in_use, _last_call_time
with _ollama_lock:
_model_in_use = None
_last_call_time = time.time()
print("Ollama libéré et disponible pour de nouveaux appels")
def _check_connection(self) -> bool:
"""
Checks if the Ollama server is accessible
Returns:
bool: True if server is accessible, False otherwise
"""
try:
response = requests.get(f"{self.base_url}/api/version", timeout=10)
if response.status_code == 200:
version_info = response.json()
print(f"Connection to Ollama established. Version: {version_info.get('version', 'unknown')}")
return True
else:
print(f"Error connecting to Ollama: status {response.status_code}")
return False
except requests.exceptions.RequestException as e:
print(f"Unable to connect to Ollama server: {str(e)}")
print(f"URL: {self.base_url}")
print("Check that the server is running and accessible.")
return False
def list_models(self) -> List[Dict[str, Any]]:
"""
Lists available models on Ollama server
Returns:
List[Dict[str, Any]]: List of available models
"""
try:
response = requests.get(self.models_endpoint, timeout=self.timeout)
if response.status_code == 200:
return response.json().get("models", [])
else:
print(f"Error retrieving models: status {response.status_code}")
return []
except requests.exceptions.RequestException as e:
print(f"Connection error while retrieving models: {str(e)}")
return []
def _is_model_available(self, model_name: str) -> bool:
"""
Vérifie si un modèle spécifique est disponible sur le serveur
Args:
model_name (str): Nom du modèle à vérifier
Returns:
bool: True si le modèle est disponible, False sinon
"""
models = self.list_models()
available_models = [model["name"] for model in models]
# Vérification exacte
if model_name in available_models:
return True
# Vérification partielle (pour gérer les versions)
for available_model in available_models:
# Si le modèle demandé est une partie d'un modèle disponible
if model_name in available_model or available_model in model_name:
print(f"Note: Le modèle '{model_name}' correspond partiellement à '{available_model}'")
return True
return False
def _make_request_with_retry(self, method: str, url: str, json_data: Dict[str, Any],
timeout: Optional[int] = None) -> requests.Response:
"""
Effectue une requête HTTP avec mécanisme de réessai
Args:
method (str): Méthode HTTP (POST, GET, etc.)
url (str): URL de la requête
json_data (Dict): Données JSON à envoyer
timeout (int, optional): Timeout en secondes
Returns:
requests.Response: Réponse HTTP
Raises:
requests.exceptions.RequestException: Si toutes les tentatives échouent
"""
# Utiliser la valeur par défaut de l'instance si aucun timeout n'est spécifié
request_timeout = self.timeout if timeout is None else timeout
attempt = 0
last_error = None
while attempt < self.max_retries:
try:
if method.upper() == "POST":
return requests.post(url, json=json_data, timeout=request_timeout)
elif method.upper() == "GET":
return requests.get(url, json=json_data, timeout=request_timeout)
else:
raise ValueError(f"Méthode HTTP non supportée: {method}")
except requests.exceptions.RequestException as e:
last_error = e
attempt += 1
if attempt < self.max_retries:
print(f"Tentative {attempt} échouée. Nouvelle tentative dans {self.retry_delay}s...")
time.sleep(self.retry_delay)
# Si on arrive ici, c'est que toutes les tentatives ont échoué
raise last_error or requests.exceptions.RequestException("Toutes les tentatives ont échoué")
def generate(self, model: str, prompt: str, images: Optional[List[bytes]] = None,
options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Generates a response from an Ollama model
Args:
model (str): Model name to use
prompt (str): Prompt text
images (List[bytes], optional): Images to send to model (for multimodal models)
options (Dict, optional): Generation options
Returns:
Dict[str, Any]: Model response
"""
# Default response in case of errors
result: Dict[str, Any] = {"error": "Unknown error", "response": "Error during generation"}
# Input validation
if not model:
return {"error": "Model parameter is required", "response": "Error: no model specified"}
if not prompt and not images:
return {"error": "Either prompt or images must be provided", "response": "Error: no content to generate from"}
if options is None:
options = {}
# Vérifier si le modèle est disponible
if not self._is_model_available(model):
model_error = f"Le modèle '{model}' n'est pas disponible sur le serveur Ollama. Utilisez la commande: ollama pull {model}"
print(model_error)
return {"error": model_error, "response": f"Error: model '{model}' not found, try pulling it first"}
# Attendre que le serveur Ollama soit disponible
if not self.wait_for_ollama(model, timeout=180):
return {"error": "Timeout waiting for Ollama", "response": "Timeout waiting for Ollama server to be available"}
try:
# Prepare payload
payload = {
"model": model,
"prompt": prompt,
"options": options,
"stream": False # Important: disable streaming to avoid JSON parsing errors
}
# Add images if provided (for multimodal models)
if images:
base64_images = []
for img in images:
if isinstance(img, bytes):
base64_img = base64.b64encode(img).decode("utf-8")
base64_images.append(base64_img)
payload["images"] = base64_images
# Make request
print(f"Sending request to {self.generate_endpoint} for model {model}...")
start_time = time.time()
try:
response = self._make_request_with_retry("POST", self.generate_endpoint, payload)
except requests.exceptions.RequestException as e:
self.release_ollama() # Libérer Ollama en cas d'erreur
return {"error": f"Connection error: {str(e)}", "response": "Error connecting to model server"}
elapsed_time = time.time() - start_time
# Handle response
if response.status_code == 200:
print(f"Response received in {elapsed_time:.2f} seconds")
try:
result = response.json()
except Exception as e:
# In case of JSON parsing error, try to process line by line
print(f"JSON parsing error: {e}")
print("Trying to process line by line...")
# If the response contains multiple JSON lines, take the first valid line
lines = response.text.strip().split("\n")
if len(lines) > 0:
try:
result = json.loads(lines[0])
except:
# If that still doesn't work, return the raw text
result = {"response": response.text[:1000], "model": model}
elif response.status_code == 404:
# Modèle spécifiquement non trouvé
error_msg = f"Model '{model}' not found on the server. Try running: ollama pull {model}"
print(error_msg)
result = {"error": error_msg, "response": f"Error: model '{model}' not found, try pulling it first"}
else:
error_msg = f"Error during generation: status {response.status_code}"
try:
error_json = response.json()
if "error" in error_json:
error_msg += f", message: {error_json['error']}"
except:
error_msg += f", body: {response.text[:100]}"
print(error_msg)
result = {"error": error_msg, "response": "Error communicating with model"}
except Exception as e:
# Catch any other unexpected errors
error_msg = f"Unexpected error: {str(e)}"
print(error_msg)
result = {"error": error_msg, "response": "An unexpected error occurred"}
finally:
# Toujours libérer Ollama à la fin
self.release_ollama()
# Ensure we always return a dictionary
return result
def chat(self, model: str, messages: List[Dict[str, Any]],
images: Optional[List[bytes]] = None,
options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Generates a response from a chat history
Args:
model (str): Model name to use
messages (List[Dict]): List of chat messages (format [{"role": "user", "content": "..."}])
images (List[bytes], optional): Images to send to model (for multimodal models)
options (Dict, optional): Generation options
Returns:
Dict[str, Any]: Model response
"""
# Default response in case of errors
result: Dict[str, Any] = {"error": "Unknown error", "response": "Error during chat generation"}
# Input validation
if not model:
return {"error": "Model parameter is required", "response": "Error: no model specified"}
if not messages:
return {"error": "Messages parameter is required", "response": "Error: no chat messages provided"}
if options is None:
options = {}
# Vérifier si le modèle est disponible
if not self._is_model_available(model):
model_error = f"Le modèle '{model}' n'est pas disponible sur le serveur Ollama. Utilisez la commande: ollama pull {model}"
print(model_error)
return {"error": model_error, "response": f"Error: model '{model}' not found, try pulling it first"}
# Attendre que le serveur Ollama soit disponible
if not self.wait_for_ollama(model, timeout=180):
return {"error": "Timeout waiting for Ollama", "response": "Timeout waiting for Ollama server to be available"}
try:
# Prepare payload
payload = {
"model": model,
"messages": messages,
"options": options,
"stream": False # Important: disable streaming to avoid JSON parsing errors
}
# Add images to the last user message if provided
if images and messages and messages[-1]["role"] == "user":
base64_images = []
for img in images:
if isinstance(img, bytes):
base64_img = base64.b64encode(img).decode("utf-8")
base64_images.append(base64_img)
# Modify the last message to include images
last_message = messages[-1].copy()
last_message["images"] = base64_images
# Replace the last message
payload["messages"] = messages[:-1] + [last_message]
# Make request
print(f"Sending chat request to {self.chat_endpoint} for model {model}...")
start_time = time.time()
try:
response = self._make_request_with_retry("POST", self.chat_endpoint, payload)
except requests.exceptions.RequestException as e:
self.release_ollama() # Libérer Ollama en cas d'erreur
return {"error": f"Connection error: {str(e)}", "response": "Error connecting to model server"}
elapsed_time = time.time() - start_time
# Handle response
if response.status_code == 200:
print(f"Chat response received in {elapsed_time:.2f} seconds")
try:
result = response.json()
except Exception as e:
# In case of JSON parsing error, try to process line by line
print(f"JSON parsing error: {e}")
lines = response.text.strip().split("\n")
if len(lines) > 0:
try:
result = json.loads(lines[0])
except:
result = {"message": {"content": response.text[:1000]}, "model": model}
elif response.status_code == 404:
# Modèle spécifiquement non trouvé
error_msg = f"Model '{model}' not found on the server. Try running: ollama pull {model}"
print(error_msg)
result = {"error": error_msg, "response": f"Error: model '{model}' not found, try pulling it first"}
else:
error_msg = f"Error during chat generation: status {response.status_code}"
try:
error_json = response.json()
if "error" in error_json:
error_msg += f", message: {error_json['error']}"
except:
error_msg += f", body: {response.text[:100]}"
print(error_msg)
result = {"error": error_msg, "response": "Error communicating with model"}
except Exception as e:
# Catch any other unexpected errors
error_msg = f"Unexpected error: {str(e)}"
print(error_msg)
result = {"error": error_msg, "response": "An unexpected error occurred"}
finally:
# Toujours libérer Ollama à la fin
self.release_ollama()
# Ensure we always return a dictionary
return result
def stream_generate(self, model: str, prompt: str,
callback: Callable[[str], None],
images: Optional[List[bytes]] = None,
options: Optional[Dict[str, Any]] = None) -> str:
"""
Generate a response in streaming mode with a callback function
Args:
model (str): Model name
prompt (str): Prompt to send
callback (Callable): Function called for each received chunk
images (List[bytes], optional): Images to send
options (Dict, optional): Generation options
Returns:
str: Complete generated text
"""
if options is None:
options = {}
payload = {
"model": model,
"prompt": prompt,
"options": options,
"stream": True # Enable streaming
}
# Add images if provided
if images:
base64_images = []
for img in images:
if isinstance(img, bytes):
base64_img = base64.b64encode(img).decode("utf-8")
base64_images.append(base64_img)
payload["images"] = base64_images
full_response = ""
try:
with requests.post(
self.generate_endpoint,
json=payload,
stream=True,
timeout=self.timeout
) as response:
if response.status_code != 200:
error_msg = f"Error during streaming: status {response.status_code}"
callback(error_msg)
return error_msg
for line in response.iter_lines():
if line:
try:
chunk = json.loads(line)
if "response" in chunk:
text_chunk = chunk["response"]
full_response += text_chunk
callback(text_chunk)
except json.JSONDecodeError:
# Ignore lines that are not valid JSON
pass
return full_response
except Exception as e:
error_msg = f"Error during streaming: {str(e)}"
callback(error_msg)
return error_msg
# Test the API if executed directly
if __name__ == "__main__":
api = OllamaAPI()
print("Testing connection to Ollama...")
if api._check_connection():
print("Connection successful!")
print("\nList of available models:")
models = api.list_models()
for model in models:
print(f"- {model.get('name', 'Unknown')} ({model.get('size', 'Unknown size')})")
print("\nTesting a model (if available):")
if models and "name" in models[0]:
model_name = models[0]["name"]
print(f"Testing model {model_name} with a simple prompt...")
response = api.generate(model_name, "Say hello in English")
if "response" in response:
print(f"Response: {response['response']}")
else:
print(f"Error: {response.get('error', 'Unknown error')}")
else:
print("Failed to connect to Ollama.")
print("Check that the server is running at the specified address.")