mirror of
https://github.com/Ladebeze66/ragflow_preprocess.git
synced 2026-02-04 05:50:26 +01:00
534 lines
22 KiB
Python
534 lines
22 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
API Interface for Ollama
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import base64
|
|
import time
|
|
import os
|
|
import threading
|
|
from typing import List, Dict, Any, Optional, Union, Callable
|
|
|
|
# Verrouillage global pour les appels Ollama
|
|
_ollama_lock = threading.Lock()
|
|
_model_in_use = None
|
|
_last_call_time = 0.0 # Float pour le temps (secondes)
|
|
_min_delay_between_calls = 3.0 # Délai minimum en secondes entre les appels à Ollama
|
|
|
|
class OllamaAPI:
|
|
"""
|
|
Simplified interface for Ollama API
|
|
"""
|
|
|
|
def __init__(self, base_url: str = "http://217.182.105.173:11434"):
|
|
"""
|
|
Initialize the API with the server's base URL
|
|
|
|
Args:
|
|
base_url (str): Base URL of the Ollama server
|
|
"""
|
|
self.base_url = base_url
|
|
self.generate_endpoint = f"{self.base_url}/api/generate"
|
|
self.chat_endpoint = f"{self.base_url}/api/chat"
|
|
self.models_endpoint = f"{self.base_url}/api/tags"
|
|
self.timeout = 120 # Increase timeout to 2 minutes
|
|
self.max_retries = 2 # Nombre maximum de tentatives pour les requêtes
|
|
self.retry_delay = 2 # Délai entre les tentatives en secondes
|
|
|
|
# Check connection on startup
|
|
self._check_connection()
|
|
|
|
@staticmethod
|
|
def wait_for_ollama(model_name: str, timeout: int = 120) -> bool:
|
|
"""
|
|
Attend que le serveur Ollama soit disponible pour le modèle spécifié
|
|
|
|
Args:
|
|
model_name (str): Nom du modèle à attendre
|
|
timeout (int): Délai d'attente maximum en secondes
|
|
|
|
Returns:
|
|
bool: True si le serveur est disponible, False si timeout
|
|
"""
|
|
global _ollama_lock, _model_in_use, _last_call_time, _min_delay_between_calls
|
|
|
|
# Calculer le temps à attendre depuis le dernier appel
|
|
time_since_last_call = time.time() - _last_call_time
|
|
if time_since_last_call < _min_delay_between_calls:
|
|
delay = _min_delay_between_calls - time_since_last_call
|
|
print(f"Attente de {delay:.1f}s pour respecter le délai minimal entre appels...")
|
|
time.sleep(delay)
|
|
|
|
start_time = time.time()
|
|
|
|
while True:
|
|
with _ollama_lock:
|
|
# Si aucun modèle n'est en cours d'utilisation ou si c'est le modèle demandé
|
|
if _model_in_use is None:
|
|
_model_in_use = model_name
|
|
_last_call_time = time.time()
|
|
return True
|
|
|
|
# Si le temps d'attente est dépassé
|
|
if time.time() - start_time > timeout:
|
|
print(f"Timeout en attendant Ollama pour le modèle {model_name}")
|
|
return False
|
|
|
|
# Attendre et réessayer
|
|
wait_time = min(5, (timeout - (time.time() - start_time)))
|
|
if wait_time <= 0:
|
|
return False
|
|
|
|
print(f"En attente d'Ollama ({_model_in_use} est en cours d'utilisation)... Nouvel essai dans {wait_time:.1f}s")
|
|
time.sleep(wait_time)
|
|
|
|
@staticmethod
|
|
def release_ollama():
|
|
"""Libère le verrouillage sur Ollama"""
|
|
global _ollama_lock, _model_in_use, _last_call_time
|
|
|
|
with _ollama_lock:
|
|
_model_in_use = None
|
|
_last_call_time = time.time()
|
|
print("Ollama libéré et disponible pour de nouveaux appels")
|
|
|
|
def _check_connection(self) -> bool:
|
|
"""
|
|
Checks if the Ollama server is accessible
|
|
|
|
Returns:
|
|
bool: True if server is accessible, False otherwise
|
|
"""
|
|
try:
|
|
response = requests.get(f"{self.base_url}/api/version", timeout=10)
|
|
if response.status_code == 200:
|
|
version_info = response.json()
|
|
print(f"Connection to Ollama established. Version: {version_info.get('version', 'unknown')}")
|
|
return True
|
|
else:
|
|
print(f"Error connecting to Ollama: status {response.status_code}")
|
|
return False
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Unable to connect to Ollama server: {str(e)}")
|
|
print(f"URL: {self.base_url}")
|
|
print("Check that the server is running and accessible.")
|
|
return False
|
|
|
|
def list_models(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Lists available models on Ollama server
|
|
|
|
Returns:
|
|
List[Dict[str, Any]]: List of available models
|
|
"""
|
|
try:
|
|
response = requests.get(self.models_endpoint, timeout=self.timeout)
|
|
if response.status_code == 200:
|
|
return response.json().get("models", [])
|
|
else:
|
|
print(f"Error retrieving models: status {response.status_code}")
|
|
return []
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Connection error while retrieving models: {str(e)}")
|
|
return []
|
|
|
|
def _is_model_available(self, model_name: str) -> bool:
|
|
"""
|
|
Vérifie si un modèle spécifique est disponible sur le serveur
|
|
|
|
Args:
|
|
model_name (str): Nom du modèle à vérifier
|
|
|
|
Returns:
|
|
bool: True si le modèle est disponible, False sinon
|
|
"""
|
|
models = self.list_models()
|
|
available_models = [model["name"] for model in models]
|
|
|
|
# Vérification exacte
|
|
if model_name in available_models:
|
|
return True
|
|
|
|
# Vérification partielle (pour gérer les versions)
|
|
for available_model in available_models:
|
|
# Si le modèle demandé est une partie d'un modèle disponible
|
|
if model_name in available_model or available_model in model_name:
|
|
print(f"Note: Le modèle '{model_name}' correspond partiellement à '{available_model}'")
|
|
return True
|
|
|
|
return False
|
|
|
|
def _make_request_with_retry(self, method: str, url: str, json_data: Dict[str, Any],
|
|
timeout: Optional[int] = None) -> requests.Response:
|
|
"""
|
|
Effectue une requête HTTP avec mécanisme de réessai
|
|
|
|
Args:
|
|
method (str): Méthode HTTP (POST, GET, etc.)
|
|
url (str): URL de la requête
|
|
json_data (Dict): Données JSON à envoyer
|
|
timeout (int, optional): Timeout en secondes
|
|
|
|
Returns:
|
|
requests.Response: Réponse HTTP
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: Si toutes les tentatives échouent
|
|
"""
|
|
# Utiliser la valeur par défaut de l'instance si aucun timeout n'est spécifié
|
|
request_timeout = self.timeout if timeout is None else timeout
|
|
|
|
attempt = 0
|
|
last_error = None
|
|
|
|
while attempt < self.max_retries:
|
|
try:
|
|
if method.upper() == "POST":
|
|
return requests.post(url, json=json_data, timeout=request_timeout)
|
|
elif method.upper() == "GET":
|
|
return requests.get(url, json=json_data, timeout=request_timeout)
|
|
else:
|
|
raise ValueError(f"Méthode HTTP non supportée: {method}")
|
|
except requests.exceptions.RequestException as e:
|
|
last_error = e
|
|
attempt += 1
|
|
if attempt < self.max_retries:
|
|
print(f"Tentative {attempt} échouée. Nouvelle tentative dans {self.retry_delay}s...")
|
|
time.sleep(self.retry_delay)
|
|
|
|
# Si on arrive ici, c'est que toutes les tentatives ont échoué
|
|
raise last_error or requests.exceptions.RequestException("Toutes les tentatives ont échoué")
|
|
|
|
def generate(self, model: str, prompt: str, images: Optional[List[bytes]] = None,
|
|
options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
"""
|
|
Generates a response from an Ollama model
|
|
|
|
Args:
|
|
model (str): Model name to use
|
|
prompt (str): Prompt text
|
|
images (List[bytes], optional): Images to send to model (for multimodal models)
|
|
options (Dict, optional): Generation options
|
|
|
|
Returns:
|
|
Dict[str, Any]: Model response
|
|
"""
|
|
# Default response in case of errors
|
|
result: Dict[str, Any] = {"error": "Unknown error", "response": "Error during generation"}
|
|
|
|
# Input validation
|
|
if not model:
|
|
return {"error": "Model parameter is required", "response": "Error: no model specified"}
|
|
|
|
if not prompt and not images:
|
|
return {"error": "Either prompt or images must be provided", "response": "Error: no content to generate from"}
|
|
|
|
if options is None:
|
|
options = {}
|
|
|
|
# Vérifier si le modèle est disponible
|
|
if not self._is_model_available(model):
|
|
model_error = f"Le modèle '{model}' n'est pas disponible sur le serveur Ollama. Utilisez la commande: ollama pull {model}"
|
|
print(model_error)
|
|
return {"error": model_error, "response": f"Error: model '{model}' not found, try pulling it first"}
|
|
|
|
# Attendre que le serveur Ollama soit disponible
|
|
if not self.wait_for_ollama(model, timeout=180):
|
|
return {"error": "Timeout waiting for Ollama", "response": "Timeout waiting for Ollama server to be available"}
|
|
|
|
try:
|
|
# Prepare payload
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"options": options,
|
|
"stream": False # Important: disable streaming to avoid JSON parsing errors
|
|
}
|
|
|
|
# Add images if provided (for multimodal models)
|
|
if images:
|
|
base64_images = []
|
|
for img in images:
|
|
if isinstance(img, bytes):
|
|
base64_img = base64.b64encode(img).decode("utf-8")
|
|
base64_images.append(base64_img)
|
|
|
|
payload["images"] = base64_images
|
|
|
|
# Make request
|
|
print(f"Sending request to {self.generate_endpoint} for model {model}...")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
response = self._make_request_with_retry("POST", self.generate_endpoint, payload)
|
|
except requests.exceptions.RequestException as e:
|
|
self.release_ollama() # Libérer Ollama en cas d'erreur
|
|
return {"error": f"Connection error: {str(e)}", "response": "Error connecting to model server"}
|
|
|
|
elapsed_time = time.time() - start_time
|
|
|
|
# Handle response
|
|
if response.status_code == 200:
|
|
print(f"Response received in {elapsed_time:.2f} seconds")
|
|
try:
|
|
result = response.json()
|
|
except Exception as e:
|
|
# In case of JSON parsing error, try to process line by line
|
|
print(f"JSON parsing error: {e}")
|
|
print("Trying to process line by line...")
|
|
|
|
# If the response contains multiple JSON lines, take the first valid line
|
|
lines = response.text.strip().split("\n")
|
|
if len(lines) > 0:
|
|
try:
|
|
result = json.loads(lines[0])
|
|
except:
|
|
# If that still doesn't work, return the raw text
|
|
result = {"response": response.text[:1000], "model": model}
|
|
elif response.status_code == 404:
|
|
# Modèle spécifiquement non trouvé
|
|
error_msg = f"Model '{model}' not found on the server. Try running: ollama pull {model}"
|
|
print(error_msg)
|
|
result = {"error": error_msg, "response": f"Error: model '{model}' not found, try pulling it first"}
|
|
else:
|
|
error_msg = f"Error during generation: status {response.status_code}"
|
|
try:
|
|
error_json = response.json()
|
|
if "error" in error_json:
|
|
error_msg += f", message: {error_json['error']}"
|
|
except:
|
|
error_msg += f", body: {response.text[:100]}"
|
|
|
|
print(error_msg)
|
|
result = {"error": error_msg, "response": "Error communicating with model"}
|
|
|
|
except Exception as e:
|
|
# Catch any other unexpected errors
|
|
error_msg = f"Unexpected error: {str(e)}"
|
|
print(error_msg)
|
|
result = {"error": error_msg, "response": "An unexpected error occurred"}
|
|
|
|
finally:
|
|
# Toujours libérer Ollama à la fin
|
|
self.release_ollama()
|
|
|
|
# Ensure we always return a dictionary
|
|
return result
|
|
|
|
def chat(self, model: str, messages: List[Dict[str, Any]],
|
|
images: Optional[List[bytes]] = None,
|
|
options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
"""
|
|
Generates a response from a chat history
|
|
|
|
Args:
|
|
model (str): Model name to use
|
|
messages (List[Dict]): List of chat messages (format [{"role": "user", "content": "..."}])
|
|
images (List[bytes], optional): Images to send to model (for multimodal models)
|
|
options (Dict, optional): Generation options
|
|
|
|
Returns:
|
|
Dict[str, Any]: Model response
|
|
"""
|
|
# Default response in case of errors
|
|
result: Dict[str, Any] = {"error": "Unknown error", "response": "Error during chat generation"}
|
|
|
|
# Input validation
|
|
if not model:
|
|
return {"error": "Model parameter is required", "response": "Error: no model specified"}
|
|
|
|
if not messages:
|
|
return {"error": "Messages parameter is required", "response": "Error: no chat messages provided"}
|
|
|
|
if options is None:
|
|
options = {}
|
|
|
|
# Vérifier si le modèle est disponible
|
|
if not self._is_model_available(model):
|
|
model_error = f"Le modèle '{model}' n'est pas disponible sur le serveur Ollama. Utilisez la commande: ollama pull {model}"
|
|
print(model_error)
|
|
return {"error": model_error, "response": f"Error: model '{model}' not found, try pulling it first"}
|
|
|
|
# Attendre que le serveur Ollama soit disponible
|
|
if not self.wait_for_ollama(model, timeout=180):
|
|
return {"error": "Timeout waiting for Ollama", "response": "Timeout waiting for Ollama server to be available"}
|
|
|
|
try:
|
|
# Prepare payload
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"options": options,
|
|
"stream": False # Important: disable streaming to avoid JSON parsing errors
|
|
}
|
|
|
|
# Add images to the last user message if provided
|
|
if images and messages and messages[-1]["role"] == "user":
|
|
base64_images = []
|
|
for img in images:
|
|
if isinstance(img, bytes):
|
|
base64_img = base64.b64encode(img).decode("utf-8")
|
|
base64_images.append(base64_img)
|
|
|
|
# Modify the last message to include images
|
|
last_message = messages[-1].copy()
|
|
last_message["images"] = base64_images
|
|
|
|
# Replace the last message
|
|
payload["messages"] = messages[:-1] + [last_message]
|
|
|
|
# Make request
|
|
print(f"Sending chat request to {self.chat_endpoint} for model {model}...")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
response = self._make_request_with_retry("POST", self.chat_endpoint, payload)
|
|
except requests.exceptions.RequestException as e:
|
|
self.release_ollama() # Libérer Ollama en cas d'erreur
|
|
return {"error": f"Connection error: {str(e)}", "response": "Error connecting to model server"}
|
|
|
|
elapsed_time = time.time() - start_time
|
|
|
|
# Handle response
|
|
if response.status_code == 200:
|
|
print(f"Chat response received in {elapsed_time:.2f} seconds")
|
|
try:
|
|
result = response.json()
|
|
except Exception as e:
|
|
# In case of JSON parsing error, try to process line by line
|
|
print(f"JSON parsing error: {e}")
|
|
lines = response.text.strip().split("\n")
|
|
if len(lines) > 0:
|
|
try:
|
|
result = json.loads(lines[0])
|
|
except:
|
|
result = {"message": {"content": response.text[:1000]}, "model": model}
|
|
elif response.status_code == 404:
|
|
# Modèle spécifiquement non trouvé
|
|
error_msg = f"Model '{model}' not found on the server. Try running: ollama pull {model}"
|
|
print(error_msg)
|
|
result = {"error": error_msg, "response": f"Error: model '{model}' not found, try pulling it first"}
|
|
else:
|
|
error_msg = f"Error during chat generation: status {response.status_code}"
|
|
try:
|
|
error_json = response.json()
|
|
if "error" in error_json:
|
|
error_msg += f", message: {error_json['error']}"
|
|
except:
|
|
error_msg += f", body: {response.text[:100]}"
|
|
|
|
print(error_msg)
|
|
result = {"error": error_msg, "response": "Error communicating with model"}
|
|
|
|
except Exception as e:
|
|
# Catch any other unexpected errors
|
|
error_msg = f"Unexpected error: {str(e)}"
|
|
print(error_msg)
|
|
result = {"error": error_msg, "response": "An unexpected error occurred"}
|
|
|
|
finally:
|
|
# Toujours libérer Ollama à la fin
|
|
self.release_ollama()
|
|
|
|
# Ensure we always return a dictionary
|
|
return result
|
|
|
|
def stream_generate(self, model: str, prompt: str,
|
|
callback: Callable[[str], None],
|
|
images: Optional[List[bytes]] = None,
|
|
options: Optional[Dict[str, Any]] = None) -> str:
|
|
"""
|
|
Generate a response in streaming mode with a callback function
|
|
|
|
Args:
|
|
model (str): Model name
|
|
prompt (str): Prompt to send
|
|
callback (Callable): Function called for each received chunk
|
|
images (List[bytes], optional): Images to send
|
|
options (Dict, optional): Generation options
|
|
|
|
Returns:
|
|
str: Complete generated text
|
|
"""
|
|
if options is None:
|
|
options = {}
|
|
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"options": options,
|
|
"stream": True # Enable streaming
|
|
}
|
|
|
|
# Add images if provided
|
|
if images:
|
|
base64_images = []
|
|
for img in images:
|
|
if isinstance(img, bytes):
|
|
base64_img = base64.b64encode(img).decode("utf-8")
|
|
base64_images.append(base64_img)
|
|
|
|
payload["images"] = base64_images
|
|
|
|
full_response = ""
|
|
|
|
try:
|
|
with requests.post(
|
|
self.generate_endpoint,
|
|
json=payload,
|
|
stream=True,
|
|
timeout=self.timeout
|
|
) as response:
|
|
if response.status_code != 200:
|
|
error_msg = f"Error during streaming: status {response.status_code}"
|
|
callback(error_msg)
|
|
return error_msg
|
|
|
|
for line in response.iter_lines():
|
|
if line:
|
|
try:
|
|
chunk = json.loads(line)
|
|
if "response" in chunk:
|
|
text_chunk = chunk["response"]
|
|
full_response += text_chunk
|
|
callback(text_chunk)
|
|
except json.JSONDecodeError:
|
|
# Ignore lines that are not valid JSON
|
|
pass
|
|
|
|
return full_response
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error during streaming: {str(e)}"
|
|
callback(error_msg)
|
|
return error_msg
|
|
|
|
# Test the API if executed directly
|
|
if __name__ == "__main__":
|
|
api = OllamaAPI()
|
|
print("Testing connection to Ollama...")
|
|
|
|
if api._check_connection():
|
|
print("Connection successful!")
|
|
|
|
print("\nList of available models:")
|
|
models = api.list_models()
|
|
for model in models:
|
|
print(f"- {model.get('name', 'Unknown')} ({model.get('size', 'Unknown size')})")
|
|
|
|
print("\nTesting a model (if available):")
|
|
if models and "name" in models[0]:
|
|
model_name = models[0]["name"]
|
|
print(f"Testing model {model_name} with a simple prompt...")
|
|
response = api.generate(model_name, "Say hello in English")
|
|
if "response" in response:
|
|
print(f"Response: {response['response']}")
|
|
else:
|
|
print(f"Error: {response.get('error', 'Unknown error')}")
|
|
else:
|
|
print("Failed to connect to Ollama.")
|
|
print("Check that the server is running at the specified address.") |