#!/usr/bin/env python3 """ API Server to integrate LLM Lab with Cursor and Obsidian """ from flask import Flask, request, jsonify, Response from flask_cors import CORS import json import os import logging import time import sys import subprocess import psutil import requests import argparse # Add current directory to Python search path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # Import LLM Lab modules from utils.agent_manager import AgentManager from utils.ollama_manager import ollama_manager # Logging configuration os.makedirs("logs", exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("logs/api_server.log"), logging.StreamHandler() ] ) logger = logging.getLogger("api_server") # Parse command line arguments parser = argparse.ArgumentParser(description="LLM Lab API Server") parser.add_argument("--port", type=int, default=8000, help="Port to run the server on") args = parser.parse_args() # Flask app initialization app = Flask(__name__) CORS(app) # Allow cross-origin requests # Custom model override based on environment variables cursor_model = os.environ.get("CURSOR_MODEL") obsidian_model = os.environ.get("OBSIDIAN_MODEL") # Log which models are being used for this instance if cursor_model: logger.info(f"Using custom model for Cursor: {cursor_model}") if obsidian_model: logger.info(f"Using custom model for Obsidian: {obsidian_model}") # Initialize all required agents # Extract default models from environment or use defaults default_cursor_model = cursor_model or "codellama:13b-python" default_obsidian_model = obsidian_model or "llama2:13b" # Préparation au démarrage - précharger les modèles appropriés logger.info("Initialisation du serveur API unifié...") # Précharger les modèles si Ollama est disponible if ollama_manager.is_ollama_available(): # Déterminer les modèles à précharger models_to_preload = [] # Toujours inclure les modèles spécifiés dans les variables d'environnement if cursor_model: models_to_preload.append(cursor_model) logger.info(f"Modèle Cursor (depuis variable d'env): {cursor_model}") else: models_to_preload.append("codellama:13b-python") logger.info("Modèle Cursor (défaut): codellama:13b-python") if obsidian_model: models_to_preload.append(obsidian_model) logger.info(f"Modèle Obsidian (depuis variable d'env): {obsidian_model}") else: models_to_preload.append("llama2:13b") logger.info("Modèle Obsidian (défaut): llama2:13b") # Précharger les modèles logger.info(f"Préchargement des modèles: {', '.join(models_to_preload)}") ollama_manager.preload_models(models_to_preload) # Attendre quelques secondes pour laisser le temps au premier modèle de commencer à charger logger.info("Attente de 10 secondes pour l'initialisation des modèles...") time.sleep(10) else: logger.warning("Ollama n'est pas disponible. Le préchargement des modèles est ignoré.") # Détecter le type de requête et choisir le modèle approprié def detect_request_type(prompt, endpoint_type=None): """ Détermine le type de requête (code ou texte) et le modèle approprié Args: prompt: Le texte de la requête endpoint_type: Le type d'endpoint appelé ('cursor', 'obsidian', ou None pour auto-détection) Returns: tuple: (type_requete, modele_recommandé) """ # Si l'endpoint est explicitement défini, utiliser le modèle correspondant if endpoint_type == "cursor": return "code", cursor_model or "codellama:13b-python" elif endpoint_type == "obsidian": return "text", obsidian_model or "llama2:13b" # Indicateurs pour du code code_indicators = [ "```", "function", "class", "def ", "import ", "sudo ", "npm ", "pip ", "python", "javascript", "typescript", "html", "css", "ruby", "php", "java", "json", "xml", "yaml", "bash", "shell", "powershell", "sql", "for(", "if(", "while(", "switch(", "{", "}", "==", "=>", "!=", "||", "&&" ] # Indicateurs pour du texte text_indicators = [ "résumé", "résume", "explique", "explique-moi", "summarize", "explain", "rédige", "écris", "write", "create a", "crée", "génère", "generate", "markdown", "obsidian", "note", "article", "blog", "histoire", "story", "essai", "dissertation", "rapport", "report", "livre", "book" ] # Compter les occurrences code_score = sum(1 for indicator in code_indicators if indicator.lower() in prompt.lower()) text_score = sum(1 for indicator in text_indicators if indicator.lower() in prompt.lower()) # Normaliser les scores en fonction du nombre d'indicateurs code_score = code_score / len(code_indicators) text_score = text_score / len(text_indicators) # Décision basée sur les scores if code_score > text_score: return "code", cursor_model or "codellama:13b-python" else: return "text", obsidian_model or "llama2:13b" # Fonction pour basculer le modèle en fonction du type de requête def ensure_appropriate_model(prompt, endpoint_type=None): """ Assure que le modèle approprié est chargé en fonction de la requête Args: prompt: Le texte de la requête endpoint_type: Le type d'endpoint appelé ('cursor', 'obsidian', ou None) Returns: str: Le modèle qui sera utilisé """ request_type, recommended_model = detect_request_type(prompt, endpoint_type) # Vérifier si un changement de modèle est nécessaire if ollama_manager.is_model_switch_needed(recommended_model): logger.info(f"Détecté demande de type '{request_type}', basculement vers {recommended_model}") ollama_manager.switch_model(recommended_model, max_wait=120) else: current_model = ollama_manager.get_running_model() or "inconnu" logger.info(f"Requête de type '{request_type}', utilisation du modèle actuel: {current_model}") return recommended_model @app.route('/v1/chat/completions', methods=['POST']) def chat_completion(): """ OpenAI-compatible Chat API endpoint for Cursor """ try: # Check for valid JSON request if not request.is_json: return jsonify({"error": "Request must contain valid JSON"}), 400 data = request.json or {} # Use empty dict as default if None logger.info(f"Request received: {json.dumps(data)}") # Extract messages and parameters messages = data.get('messages', []) model = data.get('model', 'codellama:13b-python') temperature = data.get('temperature', 0.7) # Build prompt from messages system_message = next((msg['content'] for msg in messages if msg['role'] == 'system'), None) user_messages = [msg['content'] for msg in messages if msg['role'] == 'user'] # Use last user message as prompt prompt = user_messages[-1] if user_messages else "" # Detect request type and ensure appropriate model is loaded # This is the Cursor endpoint, so we force 'cursor' as endpoint type ensure_appropriate_model(prompt, endpoint_type="cursor") # Detect task type to choose appropriate agent agent_name = "cursor" # Default # Agent selection logic based on content if "obsidian" in prompt.lower() or "markdown" in prompt.lower() or "note" in prompt.lower(): agent_name = "obsidian" elif "javascript" in prompt.lower() or "js" in prompt.lower() or "html" in prompt.lower() or "css" in prompt.lower(): agent_name = "webdev" elif "python" in prompt.lower(): agent_name = "python" logger.info(f"Selected agent: {agent_name}") # Create and configure agent agent = AgentManager.create(agent_name) # Apply model override from environment if available # This allows specific instances to use specific models if agent_name == "cursor" and cursor_model: from core.factory import LLMFactory from agents.roles import AGENTS # Importation pour éviter les erreurs logger.info(f"Overriding model for cursor agent: {cursor_model}") agent = LLMFactory.create(cursor_model) agent.set_role(agent_name, AGENTS[agent_name]) elif agent_name == "obsidian" and obsidian_model: from core.factory import LLMFactory from agents.roles import AGENTS # Importation pour éviter les erreurs logger.info(f"Overriding model for obsidian agent: {obsidian_model}") agent = LLMFactory.create(obsidian_model) agent.set_role(agent_name, AGENTS[agent_name]) # Replace system prompt if provided if system_message: agent.system_prompt = system_message # Adjust parameters agent.params["temperature"] = temperature # Generate response start_time = time.time() response = agent.generate(prompt) end_time = time.time() generation_time = end_time - start_time logger.info(f"Response generated for agent {agent_name} in {generation_time:.2f} seconds") # OpenAI API compatible formatting return jsonify({ "id": f"llmlab-{agent_name}-{hash(prompt) % 10000}", "object": "chat.completion", "created": int(time.time()), "model": agent.model, "choices": [ { "index": 0, "message": { "role": "assistant", "content": response }, "finish_reason": "stop" } ], "usage": { "prompt_tokens": len(prompt.split()), "completion_tokens": len(response.split()), "total_tokens": len(prompt.split()) + len(response.split()) } }) except Exception as e: logger.error(f"Error: {str(e)}", exc_info=True) return jsonify({ "error": { "message": str(e), "type": "server_error", "code": 500 } }), 500 @app.route('/v1/models', methods=['GET']) def list_models(): """ List available models (OpenAI compatible) """ agents = AgentManager.list_agents() models = [] for agent_name, info in agents.items(): # Apply model overrides from environment variables model_name = info['model'] if agent_name == "cursor" and cursor_model: model_name = cursor_model elif agent_name == "obsidian" and obsidian_model: model_name = obsidian_model models.append({ "id": model_name, "object": "model", "created": int(time.time()), "owned_by": "llmlab", "permission": [{"id": agent_name, "object": "model_permission"}], "root": model_name, "parent": None }) return jsonify({ "object": "list", "data": models }) @app.route('/health', methods=['GET']) def health_check(): """ Server health check endpoint """ # Get current Ollama state current_model = "none" ollama_status = "unavailable" if ollama_manager.is_ollama_available(): ollama_status = "online" current_model = ollama_manager.get_running_model() or "unknown" return jsonify({ "status": "healthy", "version": "1.0.0", "timestamp": int(time.time()), "port": args.port, "cursor_model": cursor_model, "obsidian_model": obsidian_model, "ollama_status": ollama_status, "current_model": current_model }) @app.route('/agents', methods=['GET']) def list_agents(): """ List available agents (custom endpoint) """ agents = AgentManager.list_agents() return jsonify({ "agents": [ { "name": name, "model": cursor_model if name == "cursor" and cursor_model else obsidian_model if name == "obsidian" and obsidian_model else info['model'], "description": info['description'] } for name, info in agents.items() ] }) @app.route('/running', methods=['GET']) def running_models(): """ Endpoint to check currently running models """ try: # Try to get list of available models via Ollama API ollama_available = ollama_manager.is_ollama_available() available_models = ollama_manager.available_models running_model = ollama_manager.get_running_model() # Compatibility with previous implementation running_models = [] if running_model: running_models.append({ "name": running_model, "status": "active", "memory": "unknown" }) return jsonify({ "ollama_available": ollama_available, "available_models": available_models, "running_models": running_models, "current_model": running_model, "cursor_model": cursor_model, "obsidian_model": obsidian_model, "timestamp": int(time.time()) }) except Exception as e: logger.error(f"Error: {str(e)}", exc_info=True) return jsonify({ "error": str(e) }), 500 @app.route('/generate', methods=['POST']) def generate(): """ Simplified endpoint for custom applications """ try: # Check for valid JSON request if not request.is_json: return jsonify({"error": "Request must contain valid JSON"}), 400 data = request.json or {} # Use empty dict as default if None prompt = data.get('prompt', '') agent_name = data.get('agent', 'auto') # Par défaut, auto-détection # Optional parameters system_prompt = data.get('system_prompt', None) temperature = data.get('temperature', None) # Détection d'application - si l'URL contient un port spécifique endpoint_type = None if request.host.endswith(':8001'): endpoint_type = "cursor" elif request.host.endswith(':5001'): endpoint_type = "obsidian" # Si l'agent est spécifié explicitement if agent_name == "cursor": endpoint_type = "cursor" elif agent_name == "obsidian": endpoint_type = "obsidian" elif agent_name == "auto": # Auto-détection basée sur le contenu endpoint_type = None # Détecter le type et s'assurer que le bon modèle est chargé logger.info(f"Analyse de la requête... Agent: {agent_name}, Endpoint: {endpoint_type}") ensure_appropriate_model(prompt, endpoint_type) # Déterminer l'agent optimal si 'auto' est spécifié if agent_name == "auto": request_type, _ = detect_request_type(prompt) if request_type == "code": agent_name = "cursor" else: agent_name = "obsidian" logger.info(f"Agent auto-sélectionné en fonction du contenu: {agent_name}") # Create agent agent = AgentManager.create(agent_name) # Apply model override from environment if available if agent_name == "cursor" and cursor_model: from core.factory import LLMFactory from agents.roles import AGENTS logger.info(f"Overriding model for cursor agent: {cursor_model}") agent = LLMFactory.create(cursor_model) agent.set_role(agent_name, AGENTS[agent_name]) elif agent_name == "obsidian" and obsidian_model: from core.factory import LLMFactory from agents.roles import AGENTS logger.info(f"Overriding model for obsidian agent: {obsidian_model}") agent = LLMFactory.create(obsidian_model) agent.set_role(agent_name, AGENTS[agent_name]) # Apply custom parameters if provided if system_prompt: agent.system_prompt = system_prompt if temperature is not None: agent.params["temperature"] = temperature # Generate response start_time = time.time() response = agent.generate(prompt) generation_time = time.time() - start_time return jsonify({ "response": response, "agent": agent_name, "model": agent.model, "generation_time": generation_time }) except Exception as e: logger.error(f"Error: {str(e)}", exc_info=True) return jsonify({ "error": str(e) }), 500 @app.route('/switch-model', methods=['POST']) def switch_model(): """ Endpoint to manually switch Ollama to a specific model """ try: if not request.is_json: return jsonify({"error": "Request must contain valid JSON"}), 400 data = request.json or {} # Utiliser un dictionnaire vide si json est None model_name = data.get('model') if not model_name: return jsonify({"error": "Model name is required"}), 400 success = ollama_manager.switch_model(model_name) if success: return jsonify({ "status": "switching", "model": model_name, "message": f"Switching to model {model_name} in background" }) else: return jsonify({ "status": "error", "message": f"Failed to switch to model {model_name}" }), 400 except Exception as e: logger.error(f"Error switching model: {str(e)}", exc_info=True) return jsonify({ "error": str(e) }), 500 if __name__ == '__main__': port = args.port # Log which models are being used model_info = "" if cursor_model: model_info += f"\n - Cursor override model: {cursor_model}" if obsidian_model: model_info += f"\n - Obsidian override model: {obsidian_model}" print(f"=== LLM Lab API Server for Cursor and Obsidian ===") print(f"Server started on http://localhost:{port}") if model_info: print(f"\nUsing custom models:{model_info}") print() # Show Ollama status if ollama_manager.is_ollama_available(): print("Ollama status: Online") current_model = ollama_manager.get_running_model() if current_model: print(f"Currently loaded model: {current_model}") # Print list of available models if ollama_manager.available_models: print("\nAvailable models:") for model in ollama_manager.available_models: print(f" - {model}") else: print("Ollama status: Offline") print("\nAvailable endpoints:") print(f" - http://localhost:{port}/v1/chat/completions (OpenAI compatible)") print(f" - http://localhost:{port}/v1/models (OpenAI compatible)") print(f" - http://localhost:{port}/generate (Simplified API)") print(f" - http://localhost:{port}/agents (agent list)") print(f" - http://localhost:{port}/running (running models)") print(f" - http://localhost:{port}/switch-model (manual model control)") print(f" - http://localhost:{port}/health (status)") print() # Show specific usage based on port for clearer user guidance if port == 8001: print("For Cursor:") print(" 1. Open Cursor") print(" 2. Go to Settings > AI") print(" 3. Select 'Custom endpoint'") print(f" 4. Enter URL: http://localhost:{port}/v1") elif port == 5001: print("For Obsidian Text Generator plugin:") print(" 1. In Obsidian, install the 'Text Generator' plugin") print(" 2. Go to Text Generator settings") print(" 3. Select 'Custom' endpoint") print(f" 4. Enter URL: http://localhost:{port}/generate") print(" 5. Set request method to POST") print(" 6. Set completion endpoint to /generate") else: print("For Cursor:") print(" 1. Open Cursor") print(" 2. Go to Settings > AI") print(" 3. Select 'Custom endpoint'") print(f" 4. Enter URL: http://localhost:{port}/v1") print() print("For Obsidian Text Generator plugin:") print(" 1. In Obsidian, install the 'Text Generator' plugin") print(" 2. Go to Text Generator settings") print(" 3. Select 'Custom' endpoint") print(f" 4. Enter URL: http://localhost:{port}/generate") print(" 5. Set request method to POST") print(" 6. Set completion endpoint to /generate") print() print("Available agents:") try: for agent_name, info in AgentManager.list_agents().items(): # Show customized model for agents with override model_display = cursor_model if agent_name == "cursor" and cursor_model else \ obsidian_model if agent_name == "obsidian" and obsidian_model else \ info['model'] print(f" - {agent_name}: {info['description']} ({model_display})") except Exception as e: print(f"Error listing agents: {str(e)}") print("Make sure LLM Lab modules are correctly installed.") print() print("Logs: logs/api_server.log") print("Press Ctrl+C to stop the server") try: # Import agents here to avoid circular imports from agents.roles import AGENTS except Exception as e: logger.error(f"Error importing AGENTS: {str(e)}") # Start server app.run(host='0.0.0.0', port=port, debug=False)