#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ API Interface for Ollama """ import requests import json import base64 import time import os import threading from typing import List, Dict, Any, Optional, Union, Callable # Verrouillage global pour les appels Ollama _ollama_lock = threading.Lock() _model_in_use = None _last_call_time = 0.0 # Float pour le temps (secondes) _min_delay_between_calls = 3.0 # Délai minimum en secondes entre les appels à Ollama class OllamaAPI: """ Simplified interface for Ollama API """ def __init__(self, base_url: str = "http://217.182.105.173:11434"): """ Initialize the API with the server's base URL Args: base_url (str): Base URL of the Ollama server """ self.base_url = base_url self.generate_endpoint = f"{self.base_url}/api/generate" self.chat_endpoint = f"{self.base_url}/api/chat" self.models_endpoint = f"{self.base_url}/api/tags" self.timeout = 120 # Increase timeout to 2 minutes self.max_retries = 2 # Nombre maximum de tentatives pour les requêtes self.retry_delay = 2 # Délai entre les tentatives en secondes # Check connection on startup self._check_connection() @staticmethod def wait_for_ollama(model_name: str, timeout: int = 120) -> bool: """ Attend que le serveur Ollama soit disponible pour le modèle spécifié Args: model_name (str): Nom du modèle à attendre timeout (int): Délai d'attente maximum en secondes Returns: bool: True si le serveur est disponible, False si timeout """ global _ollama_lock, _model_in_use, _last_call_time, _min_delay_between_calls # Calculer le temps à attendre depuis le dernier appel time_since_last_call = time.time() - _last_call_time if time_since_last_call < _min_delay_between_calls: delay = _min_delay_between_calls - time_since_last_call print(f"Attente de {delay:.1f}s pour respecter le délai minimal entre appels...") time.sleep(delay) start_time = time.time() while True: with _ollama_lock: # Si aucun modèle n'est en cours d'utilisation ou si c'est le modèle demandé if _model_in_use is None: _model_in_use = model_name _last_call_time = time.time() return True # Si le temps d'attente est dépassé if time.time() - start_time > timeout: print(f"Timeout en attendant Ollama pour le modèle {model_name}") return False # Attendre et réessayer wait_time = min(5, (timeout - (time.time() - start_time))) if wait_time <= 0: return False print(f"En attente d'Ollama ({_model_in_use} est en cours d'utilisation)... Nouvel essai dans {wait_time:.1f}s") time.sleep(wait_time) @staticmethod def release_ollama(): """Libère le verrouillage sur Ollama""" global _ollama_lock, _model_in_use, _last_call_time with _ollama_lock: _model_in_use = None _last_call_time = time.time() print("Ollama libéré et disponible pour de nouveaux appels") def _check_connection(self) -> bool: """ Checks if the Ollama server is accessible Returns: bool: True if server is accessible, False otherwise """ try: response = requests.get(f"{self.base_url}/api/version", timeout=10) if response.status_code == 200: version_info = response.json() print(f"Connection to Ollama established. Version: {version_info.get('version', 'unknown')}") return True else: print(f"Error connecting to Ollama: status {response.status_code}") return False except requests.exceptions.RequestException as e: print(f"Unable to connect to Ollama server: {str(e)}") print(f"URL: {self.base_url}") print("Check that the server is running and accessible.") return False def list_models(self) -> List[Dict[str, Any]]: """ Lists available models on Ollama server Returns: List[Dict[str, Any]]: List of available models """ try: response = requests.get(self.models_endpoint, timeout=self.timeout) if response.status_code == 200: return response.json().get("models", []) else: print(f"Error retrieving models: status {response.status_code}") return [] except requests.exceptions.RequestException as e: print(f"Connection error while retrieving models: {str(e)}") return [] def _is_model_available(self, model_name: str) -> bool: """ Vérifie si un modèle spécifique est disponible sur le serveur Args: model_name (str): Nom du modèle à vérifier Returns: bool: True si le modèle est disponible, False sinon """ models = self.list_models() available_models = [model["name"] for model in models] # Vérification exacte if model_name in available_models: return True # Vérification partielle (pour gérer les versions) for available_model in available_models: # Si le modèle demandé est une partie d'un modèle disponible if model_name in available_model or available_model in model_name: print(f"Note: Le modèle '{model_name}' correspond partiellement à '{available_model}'") return True return False def _make_request_with_retry(self, method: str, url: str, json_data: Dict[str, Any], timeout: Optional[int] = None) -> requests.Response: """ Effectue une requête HTTP avec mécanisme de réessai Args: method (str): Méthode HTTP (POST, GET, etc.) url (str): URL de la requête json_data (Dict): Données JSON à envoyer timeout (int, optional): Timeout en secondes Returns: requests.Response: Réponse HTTP Raises: requests.exceptions.RequestException: Si toutes les tentatives échouent """ # Utiliser la valeur par défaut de l'instance si aucun timeout n'est spécifié request_timeout = self.timeout if timeout is None else timeout attempt = 0 last_error = None while attempt < self.max_retries: try: if method.upper() == "POST": return requests.post(url, json=json_data, timeout=request_timeout) elif method.upper() == "GET": return requests.get(url, json=json_data, timeout=request_timeout) else: raise ValueError(f"Méthode HTTP non supportée: {method}") except requests.exceptions.RequestException as e: last_error = e attempt += 1 if attempt < self.max_retries: print(f"Tentative {attempt} échouée. Nouvelle tentative dans {self.retry_delay}s...") time.sleep(self.retry_delay) # Si on arrive ici, c'est que toutes les tentatives ont échoué raise last_error or requests.exceptions.RequestException("Toutes les tentatives ont échoué") def generate(self, model: str, prompt: str, images: Optional[List[bytes]] = None, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """ Generates a response from an Ollama model Args: model (str): Model name to use prompt (str): Prompt text images (List[bytes], optional): Images to send to model (for multimodal models) options (Dict, optional): Generation options Returns: Dict[str, Any]: Model response """ # Default response in case of errors result: Dict[str, Any] = {"error": "Unknown error", "response": "Error during generation"} # Input validation if not model: return {"error": "Model parameter is required", "response": "Error: no model specified"} if not prompt and not images: return {"error": "Either prompt or images must be provided", "response": "Error: no content to generate from"} if options is None: options = {} # Vérifier si le modèle est disponible if not self._is_model_available(model): model_error = f"Le modèle '{model}' n'est pas disponible sur le serveur Ollama. Utilisez la commande: ollama pull {model}" print(model_error) return {"error": model_error, "response": f"Error: model '{model}' not found, try pulling it first"} # Attendre que le serveur Ollama soit disponible if not self.wait_for_ollama(model, timeout=180): return {"error": "Timeout waiting for Ollama", "response": "Timeout waiting for Ollama server to be available"} try: # Prepare payload payload = { "model": model, "prompt": prompt, "options": options, "stream": False # Important: disable streaming to avoid JSON parsing errors } # Add images if provided (for multimodal models) if images: base64_images = [] for img in images: if isinstance(img, bytes): base64_img = base64.b64encode(img).decode("utf-8") base64_images.append(base64_img) payload["images"] = base64_images # Make request print(f"Sending request to {self.generate_endpoint} for model {model}...") start_time = time.time() try: response = self._make_request_with_retry("POST", self.generate_endpoint, payload) except requests.exceptions.RequestException as e: self.release_ollama() # Libérer Ollama en cas d'erreur return {"error": f"Connection error: {str(e)}", "response": "Error connecting to model server"} elapsed_time = time.time() - start_time # Handle response if response.status_code == 200: print(f"Response received in {elapsed_time:.2f} seconds") try: result = response.json() except Exception as e: # In case of JSON parsing error, try to process line by line print(f"JSON parsing error: {e}") print("Trying to process line by line...") # If the response contains multiple JSON lines, take the first valid line lines = response.text.strip().split("\n") if len(lines) > 0: try: result = json.loads(lines[0]) except: # If that still doesn't work, return the raw text result = {"response": response.text[:1000], "model": model} elif response.status_code == 404: # Modèle spécifiquement non trouvé error_msg = f"Model '{model}' not found on the server. Try running: ollama pull {model}" print(error_msg) result = {"error": error_msg, "response": f"Error: model '{model}' not found, try pulling it first"} else: error_msg = f"Error during generation: status {response.status_code}" try: error_json = response.json() if "error" in error_json: error_msg += f", message: {error_json['error']}" except: error_msg += f", body: {response.text[:100]}" print(error_msg) result = {"error": error_msg, "response": "Error communicating with model"} except Exception as e: # Catch any other unexpected errors error_msg = f"Unexpected error: {str(e)}" print(error_msg) result = {"error": error_msg, "response": "An unexpected error occurred"} finally: # Toujours libérer Ollama à la fin self.release_ollama() # Ensure we always return a dictionary return result def chat(self, model: str, messages: List[Dict[str, Any]], images: Optional[List[bytes]] = None, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """ Generates a response from a chat history Args: model (str): Model name to use messages (List[Dict]): List of chat messages (format [{"role": "user", "content": "..."}]) images (List[bytes], optional): Images to send to model (for multimodal models) options (Dict, optional): Generation options Returns: Dict[str, Any]: Model response """ # Default response in case of errors result: Dict[str, Any] = {"error": "Unknown error", "response": "Error during chat generation"} # Input validation if not model: return {"error": "Model parameter is required", "response": "Error: no model specified"} if not messages: return {"error": "Messages parameter is required", "response": "Error: no chat messages provided"} if options is None: options = {} # Vérifier si le modèle est disponible if not self._is_model_available(model): model_error = f"Le modèle '{model}' n'est pas disponible sur le serveur Ollama. Utilisez la commande: ollama pull {model}" print(model_error) return {"error": model_error, "response": f"Error: model '{model}' not found, try pulling it first"} # Attendre que le serveur Ollama soit disponible if not self.wait_for_ollama(model, timeout=180): return {"error": "Timeout waiting for Ollama", "response": "Timeout waiting for Ollama server to be available"} try: # Prepare payload payload = { "model": model, "messages": messages, "options": options, "stream": False # Important: disable streaming to avoid JSON parsing errors } # Add images to the last user message if provided if images and messages and messages[-1]["role"] == "user": base64_images = [] for img in images: if isinstance(img, bytes): base64_img = base64.b64encode(img).decode("utf-8") base64_images.append(base64_img) # Modify the last message to include images last_message = messages[-1].copy() last_message["images"] = base64_images # Replace the last message payload["messages"] = messages[:-1] + [last_message] # Make request print(f"Sending chat request to {self.chat_endpoint} for model {model}...") start_time = time.time() try: response = self._make_request_with_retry("POST", self.chat_endpoint, payload) except requests.exceptions.RequestException as e: self.release_ollama() # Libérer Ollama en cas d'erreur return {"error": f"Connection error: {str(e)}", "response": "Error connecting to model server"} elapsed_time = time.time() - start_time # Handle response if response.status_code == 200: print(f"Chat response received in {elapsed_time:.2f} seconds") try: result = response.json() except Exception as e: # In case of JSON parsing error, try to process line by line print(f"JSON parsing error: {e}") lines = response.text.strip().split("\n") if len(lines) > 0: try: result = json.loads(lines[0]) except: result = {"message": {"content": response.text[:1000]}, "model": model} elif response.status_code == 404: # Modèle spécifiquement non trouvé error_msg = f"Model '{model}' not found on the server. Try running: ollama pull {model}" print(error_msg) result = {"error": error_msg, "response": f"Error: model '{model}' not found, try pulling it first"} else: error_msg = f"Error during chat generation: status {response.status_code}" try: error_json = response.json() if "error" in error_json: error_msg += f", message: {error_json['error']}" except: error_msg += f", body: {response.text[:100]}" print(error_msg) result = {"error": error_msg, "response": "Error communicating with model"} except Exception as e: # Catch any other unexpected errors error_msg = f"Unexpected error: {str(e)}" print(error_msg) result = {"error": error_msg, "response": "An unexpected error occurred"} finally: # Toujours libérer Ollama à la fin self.release_ollama() # Ensure we always return a dictionary return result def stream_generate(self, model: str, prompt: str, callback: Callable[[str], None], images: Optional[List[bytes]] = None, options: Optional[Dict[str, Any]] = None) -> str: """ Generate a response in streaming mode with a callback function Args: model (str): Model name prompt (str): Prompt to send callback (Callable): Function called for each received chunk images (List[bytes], optional): Images to send options (Dict, optional): Generation options Returns: str: Complete generated text """ if options is None: options = {} payload = { "model": model, "prompt": prompt, "options": options, "stream": True # Enable streaming } # Add images if provided if images: base64_images = [] for img in images: if isinstance(img, bytes): base64_img = base64.b64encode(img).decode("utf-8") base64_images.append(base64_img) payload["images"] = base64_images full_response = "" try: with requests.post( self.generate_endpoint, json=payload, stream=True, timeout=self.timeout ) as response: if response.status_code != 200: error_msg = f"Error during streaming: status {response.status_code}" callback(error_msg) return error_msg for line in response.iter_lines(): if line: try: chunk = json.loads(line) if "response" in chunk: text_chunk = chunk["response"] full_response += text_chunk callback(text_chunk) except json.JSONDecodeError: # Ignore lines that are not valid JSON pass return full_response except Exception as e: error_msg = f"Error during streaming: {str(e)}" callback(error_msg) return error_msg # Test the API if executed directly if __name__ == "__main__": api = OllamaAPI() print("Testing connection to Ollama...") if api._check_connection(): print("Connection successful!") print("\nList of available models:") models = api.list_models() for model in models: print(f"- {model.get('name', 'Unknown')} ({model.get('size', 'Unknown size')})") print("\nTesting a model (if available):") if models and "name" in models[0]: model_name = models[0]["name"] print(f"Testing model {model_name} with a simple prompt...") response = api.generate(model_name, "Say hello in English") if "response" in response: print(f"Response: {response['response']}") else: print(f"Error: {response.get('error', 'Unknown error')}") else: print("Failed to connect to Ollama.") print("Check that the server is running at the specified address.")