mirror of
https://github.com/Ladebeze66/llm_lab_perso.git
synced 2025-12-13 09:06:50 +01:00
2104-17:36
This commit is contained in:
parent
f6aaf15a33
commit
fb809045ff
65
.specstory/.what-is-this.md
Normal file
65
.specstory/.what-is-this.md
Normal file
@ -0,0 +1,65 @@
|
||||
|
||||
# SpecStory Artifacts Directory
|
||||
|
||||
This directory is automatically created and maintained by the SpecStory extension to preserve your Cursor composer and chat history.
|
||||
|
||||
## What's Here?
|
||||
|
||||
- `.specstory/history`: Contains markdown files of your AI coding sessions
|
||||
- Each file represents a separate chat or composer session
|
||||
- Files are automatically updated as you work
|
||||
- `.specstory/cursor_rules_backups`: Contains backups of the `.cursor/rules/derived-cursor-rules.mdc` file
|
||||
- Backups are automatically created each time the `.cursor/rules/derived-cursor-rules.mdc` file is updated
|
||||
- You can enable/disable the Cursor Rules feature in the SpecStory settings, it is disabled by default
|
||||
|
||||
## Valuable Uses
|
||||
|
||||
- Capture: Keep your context window up-to-date when starting new Chat/Composer sessions via @ references
|
||||
- Search: For previous prompts and code snippets
|
||||
- Learn: Meta-analyze your patterns and learn from your past experiences
|
||||
- Derive: Keep Cursor on course with your past decisions by automatically deriving Cursor rules from your AI interactions
|
||||
|
||||
## Version Control
|
||||
|
||||
We recommend keeping this directory under version control to maintain a history of your AI interactions. However, if you prefer not to version these files, you can exclude them by adding this to your `.gitignore`:
|
||||
|
||||
```
|
||||
.specstory
|
||||
```
|
||||
|
||||
We recommend not keeping the `.specstory/cursor_rules_backups` directory under version control if you are already using git to version the `.cursor/rules` directory, and committing regularly. You can exclude it by adding this to your `.gitignore`:
|
||||
|
||||
```
|
||||
.specstory/cursor_rules_backups
|
||||
```
|
||||
|
||||
## Searching Your Codebase
|
||||
|
||||
When searching your codebase in Cursor, search results may include your previous AI coding interactions. To focus solely on your actual code files, you can exclude the AI interaction history from search results.
|
||||
|
||||
To exclude AI interaction history:
|
||||
|
||||
1. Open the "Find in Files" search in Cursor (Cmd/Ctrl + Shift + F)
|
||||
2. Navigate to the "files to exclude" section
|
||||
3. Add the following pattern:
|
||||
|
||||
```
|
||||
.specstory/*
|
||||
```
|
||||
|
||||
This will ensure your searches only return results from your working codebase files.
|
||||
|
||||
## Notes
|
||||
|
||||
- Auto-save only works when Cursor/sqlite flushes data to disk. This results in a small delay after the AI response is complete before SpecStory can save the history.
|
||||
- Auto-save does not yet work on remote WSL workspaces.
|
||||
|
||||
## Settings
|
||||
|
||||
You can control auto-saving behavior in Cursor:
|
||||
|
||||
1. Open Cursor → Settings → VS Code Settings (Cmd/Ctrl + ,)
|
||||
2. Search for "SpecStory"
|
||||
3. Find "Auto Save" setting to enable/disable
|
||||
|
||||
Auto-save occurs when changes are detected in Cursor's sqlite database, or every 2 minutes as a safety net.
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -1,7 +1,27 @@
|
||||
@echo off
|
||||
setlocal
|
||||
|
||||
echo === Lancement du serveur API LLM Lab ===
|
||||
echo === Lancement automatique du serveur API LLM Lab ===
|
||||
|
||||
REM Vérifier si Ollama est en cours d'exécution
|
||||
echo Vérification d'Ollama...
|
||||
tasklist /FI "IMAGENAME eq ollama.exe" 2>NUL | find /I /N "ollama.exe" >NUL
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Ollama n'est pas en cours d'exécution, tentative de démarrage...
|
||||
|
||||
REM Vérifier si Ollama est installé
|
||||
if exist "C:\Program Files\Ollama\ollama.exe" (
|
||||
echo Démarrage d'Ollama...
|
||||
start "" /min "C:\Program Files\Ollama\ollama.exe"
|
||||
echo Attente de l'initialisation d'Ollama...
|
||||
timeout /t 5 /nobreak >NUL
|
||||
) else (
|
||||
echo Impossible de trouver Ollama. Veuillez l'installer depuis https://ollama.com/download/windows
|
||||
echo Le serveur API va démarrer, mais les modèles pourraient ne pas être disponibles.
|
||||
)
|
||||
) else (
|
||||
echo Ollama est déjà en cours d'exécution.
|
||||
)
|
||||
|
||||
REM Vérification si l'environnement virtuel existe
|
||||
if not exist "llmlab" (
|
||||
@ -23,16 +43,47 @@ REM Affichage des informations
|
||||
echo.
|
||||
echo Serveur API en cours de démarrage sur http://localhost:8000
|
||||
echo.
|
||||
echo Utilisez ce serveur pour:
|
||||
echo - Intégration avec Cursor (http://localhost:8000/v1)
|
||||
echo - Intégration avec Obsidian (via l'endpoint /generate)
|
||||
echo Endpoints disponibles:
|
||||
echo - http://localhost:8000/v1/chat/completions (compatible OpenAI/Cursor)
|
||||
echo - http://localhost:8000/v1/models (liste des modèles)
|
||||
echo - http://localhost:8000/generate (API simplifiée pour Obsidian)
|
||||
echo - http://localhost:8000/health (statut du serveur)
|
||||
echo.
|
||||
|
||||
REM Vérifier si Ollama est prêt en testant l'API
|
||||
echo Vérification des modèles Ollama disponibles...
|
||||
curl -s http://localhost:11434/api/tags > models_temp.json 2>NUL
|
||||
if %ERRORLEVEL% EQU 0 (
|
||||
echo Ollama est prêt. Modèles disponibles:
|
||||
|
||||
REM Utiliser findstr pour extraire les noms des modèles (version simplifiée)
|
||||
findstr /C:"\"name\":" models_temp.json
|
||||
|
||||
REM Supprimer le fichier temporaire
|
||||
del models_temp.json
|
||||
) else (
|
||||
echo Ollama semble ne pas répondre. Les modèles pourraient ne pas être disponibles.
|
||||
)
|
||||
|
||||
echo.
|
||||
echo Appuyez sur Ctrl+C pour arrêter le serveur
|
||||
echo.
|
||||
|
||||
REM Lancement du serveur API
|
||||
python api_server.py
|
||||
REM Option pour exécuter en arrière-plan
|
||||
if "%1"=="-b" goto background
|
||||
if "%1"=="--background" goto background
|
||||
|
||||
REM Lancement normal du serveur API
|
||||
python api_server.py
|
||||
goto end
|
||||
|
||||
:background
|
||||
REM Lancement en arrière-plan (minimisé)
|
||||
start /min python api_server.py
|
||||
echo Serveur API démarré en arrière-plan.
|
||||
goto end
|
||||
|
||||
:end
|
||||
REM Ce code ne sera exécuté qu'après l'arrêt du serveur
|
||||
echo.
|
||||
echo Serveur API arrêté.
|
||||
|
||||
476
api_server.py
476
api_server.py
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Serveur API pour intégrer LLM Lab avec Cursor et Obsidian
|
||||
API Server to integrate LLM Lab with Cursor and Obsidian
|
||||
"""
|
||||
from flask import Flask, request, jsonify, Response
|
||||
from flask_cors import CORS
|
||||
@ -9,14 +9,19 @@ import os
|
||||
import logging
|
||||
import time
|
||||
import sys
|
||||
import subprocess
|
||||
import psutil
|
||||
import requests
|
||||
import argparse
|
||||
|
||||
# Ajouter le répertoire courant au chemin de recherche Python
|
||||
# Add current directory to Python search path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# Importer les modules LLM Lab
|
||||
# Import LLM Lab modules
|
||||
from utils.agent_manager import AgentManager
|
||||
from utils.ollama_manager import ollama_manager
|
||||
|
||||
# Configuration du logging
|
||||
# Logging configuration
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@ -28,39 +33,168 @@ logging.basicConfig(
|
||||
)
|
||||
logger = logging.getLogger("api_server")
|
||||
|
||||
# Initialisation de l'application Flask
|
||||
# Parse command line arguments
|
||||
parser = argparse.ArgumentParser(description="LLM Lab API Server")
|
||||
parser.add_argument("--port", type=int, default=8000, help="Port to run the server on")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Flask app initialization
|
||||
app = Flask(__name__)
|
||||
CORS(app) # Permet les requêtes cross-origin
|
||||
CORS(app) # Allow cross-origin requests
|
||||
|
||||
# Custom model override based on environment variables
|
||||
cursor_model = os.environ.get("CURSOR_MODEL")
|
||||
obsidian_model = os.environ.get("OBSIDIAN_MODEL")
|
||||
|
||||
# Log which models are being used for this instance
|
||||
if cursor_model:
|
||||
logger.info(f"Using custom model for Cursor: {cursor_model}")
|
||||
if obsidian_model:
|
||||
logger.info(f"Using custom model for Obsidian: {obsidian_model}")
|
||||
|
||||
# Initialize all required agents
|
||||
# Extract default models from environment or use defaults
|
||||
default_cursor_model = cursor_model or "codellama:13b-python"
|
||||
default_obsidian_model = obsidian_model or "llama2:13b"
|
||||
|
||||
# Préparation au démarrage - précharger les modèles appropriés
|
||||
logger.info("Initialisation du serveur API unifié...")
|
||||
|
||||
# Précharger les modèles si Ollama est disponible
|
||||
if ollama_manager.is_ollama_available():
|
||||
# Déterminer les modèles à précharger
|
||||
models_to_preload = []
|
||||
|
||||
# Toujours inclure les modèles spécifiés dans les variables d'environnement
|
||||
if cursor_model:
|
||||
models_to_preload.append(cursor_model)
|
||||
logger.info(f"Modèle Cursor (depuis variable d'env): {cursor_model}")
|
||||
else:
|
||||
models_to_preload.append("codellama:13b-python")
|
||||
logger.info("Modèle Cursor (défaut): codellama:13b-python")
|
||||
|
||||
if obsidian_model:
|
||||
models_to_preload.append(obsidian_model)
|
||||
logger.info(f"Modèle Obsidian (depuis variable d'env): {obsidian_model}")
|
||||
else:
|
||||
models_to_preload.append("llama2:13b")
|
||||
logger.info("Modèle Obsidian (défaut): llama2:13b")
|
||||
|
||||
# Précharger les modèles
|
||||
logger.info(f"Préchargement des modèles: {', '.join(models_to_preload)}")
|
||||
ollama_manager.preload_models(models_to_preload)
|
||||
|
||||
# Attendre quelques secondes pour laisser le temps au premier modèle de commencer à charger
|
||||
logger.info("Attente de 10 secondes pour l'initialisation des modèles...")
|
||||
time.sleep(10)
|
||||
else:
|
||||
logger.warning("Ollama n'est pas disponible. Le préchargement des modèles est ignoré.")
|
||||
|
||||
# Détecter le type de requête et choisir le modèle approprié
|
||||
def detect_request_type(prompt, endpoint_type=None):
|
||||
"""
|
||||
Détermine le type de requête (code ou texte) et le modèle approprié
|
||||
|
||||
Args:
|
||||
prompt: Le texte de la requête
|
||||
endpoint_type: Le type d'endpoint appelé ('cursor', 'obsidian', ou None pour auto-détection)
|
||||
|
||||
Returns:
|
||||
tuple: (type_requete, modele_recommandé)
|
||||
"""
|
||||
# Si l'endpoint est explicitement défini, utiliser le modèle correspondant
|
||||
if endpoint_type == "cursor":
|
||||
return "code", cursor_model or "codellama:13b-python"
|
||||
elif endpoint_type == "obsidian":
|
||||
return "text", obsidian_model or "llama2:13b"
|
||||
|
||||
# Indicateurs pour du code
|
||||
code_indicators = [
|
||||
"```", "function", "class", "def ", "import ", "sudo ", "npm ", "pip ",
|
||||
"python", "javascript", "typescript", "html", "css", "ruby", "php", "java",
|
||||
"json", "xml", "yaml", "bash", "shell", "powershell", "sql",
|
||||
"for(", "if(", "while(", "switch(", "{", "}", "==", "=>", "!=", "||", "&&"
|
||||
]
|
||||
|
||||
# Indicateurs pour du texte
|
||||
text_indicators = [
|
||||
"résumé", "résume", "explique", "explique-moi", "summarize", "explain",
|
||||
"rédige", "écris", "write", "create a", "crée", "génère", "generate",
|
||||
"markdown", "obsidian", "note", "article", "blog", "histoire", "story",
|
||||
"essai", "dissertation", "rapport", "report", "livre", "book"
|
||||
]
|
||||
|
||||
# Compter les occurrences
|
||||
code_score = sum(1 for indicator in code_indicators if indicator.lower() in prompt.lower())
|
||||
text_score = sum(1 for indicator in text_indicators if indicator.lower() in prompt.lower())
|
||||
|
||||
# Normaliser les scores en fonction du nombre d'indicateurs
|
||||
code_score = code_score / len(code_indicators)
|
||||
text_score = text_score / len(text_indicators)
|
||||
|
||||
# Décision basée sur les scores
|
||||
if code_score > text_score:
|
||||
return "code", cursor_model or "codellama:13b-python"
|
||||
else:
|
||||
return "text", obsidian_model or "llama2:13b"
|
||||
|
||||
# Fonction pour basculer le modèle en fonction du type de requête
|
||||
def ensure_appropriate_model(prompt, endpoint_type=None):
|
||||
"""
|
||||
Assure que le modèle approprié est chargé en fonction de la requête
|
||||
|
||||
Args:
|
||||
prompt: Le texte de la requête
|
||||
endpoint_type: Le type d'endpoint appelé ('cursor', 'obsidian', ou None)
|
||||
|
||||
Returns:
|
||||
str: Le modèle qui sera utilisé
|
||||
"""
|
||||
request_type, recommended_model = detect_request_type(prompt, endpoint_type)
|
||||
|
||||
# Vérifier si un changement de modèle est nécessaire
|
||||
if ollama_manager.is_model_switch_needed(recommended_model):
|
||||
logger.info(f"Détecté demande de type '{request_type}', basculement vers {recommended_model}")
|
||||
ollama_manager.switch_model(recommended_model, max_wait=120)
|
||||
else:
|
||||
current_model = ollama_manager.get_running_model() or "inconnu"
|
||||
logger.info(f"Requête de type '{request_type}', utilisation du modèle actuel: {current_model}")
|
||||
|
||||
return recommended_model
|
||||
|
||||
@app.route('/v1/chat/completions', methods=['POST'])
|
||||
def chat_completion():
|
||||
"""
|
||||
Endpoint compatible avec l'API OpenAI Chat pour Cursor
|
||||
OpenAI-compatible Chat API endpoint for Cursor
|
||||
"""
|
||||
try:
|
||||
# Vérifier que la requête contient du JSON valide
|
||||
# Check for valid JSON request
|
||||
if not request.is_json:
|
||||
return jsonify({"error": "La requête doit contenir du JSON valide"}), 400
|
||||
return jsonify({"error": "Request must contain valid JSON"}), 400
|
||||
|
||||
data = request.json or {} # Utiliser un dictionnaire vide par défaut si None
|
||||
logger.info(f"Requête reçue: {json.dumps(data)}")
|
||||
data = request.json or {} # Use empty dict as default if None
|
||||
logger.info(f"Request received: {json.dumps(data)}")
|
||||
|
||||
# Extraire les messages et les paramètres
|
||||
# Extract messages and parameters
|
||||
messages = data.get('messages', [])
|
||||
model = data.get('model', 'codellama:13b-python')
|
||||
temperature = data.get('temperature', 0.7)
|
||||
|
||||
# Construire le prompt à partir des messages
|
||||
# Build prompt from messages
|
||||
system_message = next((msg['content'] for msg in messages if msg['role'] == 'system'), None)
|
||||
user_messages = [msg['content'] for msg in messages if msg['role'] == 'user']
|
||||
|
||||
# Utiliser le dernier message utilisateur comme prompt
|
||||
# Use last user message as prompt
|
||||
prompt = user_messages[-1] if user_messages else ""
|
||||
|
||||
# Détecter le type de tâche pour choisir l'agent approprié
|
||||
agent_name = "cursor" # Par défaut
|
||||
# Detect request type and ensure appropriate model is loaded
|
||||
# This is the Cursor endpoint, so we force 'cursor' as endpoint type
|
||||
ensure_appropriate_model(prompt, endpoint_type="cursor")
|
||||
|
||||
# Logique de sélection d'agent en fonction du contenu
|
||||
# Detect task type to choose appropriate agent
|
||||
agent_name = "cursor" # Default
|
||||
|
||||
# Agent selection logic based on content
|
||||
if "obsidian" in prompt.lower() or "markdown" in prompt.lower() or "note" in prompt.lower():
|
||||
agent_name = "obsidian"
|
||||
elif "javascript" in prompt.lower() or "js" in prompt.lower() or "html" in prompt.lower() or "css" in prompt.lower():
|
||||
@ -68,27 +202,44 @@ def chat_completion():
|
||||
elif "python" in prompt.lower():
|
||||
agent_name = "python"
|
||||
|
||||
logger.info(f"Agent sélectionné: {agent_name}")
|
||||
logger.info(f"Selected agent: {agent_name}")
|
||||
|
||||
# Créer et configurer l'agent
|
||||
# Create and configure agent
|
||||
agent = AgentManager.create(agent_name)
|
||||
|
||||
# Remplacer le system prompt si fourni
|
||||
# Apply model override from environment if available
|
||||
# This allows specific instances to use specific models
|
||||
if agent_name == "cursor" and cursor_model:
|
||||
from core.factory import LLMFactory
|
||||
from agents.roles import AGENTS # Importation pour éviter les erreurs
|
||||
logger.info(f"Overriding model for cursor agent: {cursor_model}")
|
||||
|
||||
agent = LLMFactory.create(cursor_model)
|
||||
agent.set_role(agent_name, AGENTS[agent_name])
|
||||
elif agent_name == "obsidian" and obsidian_model:
|
||||
from core.factory import LLMFactory
|
||||
from agents.roles import AGENTS # Importation pour éviter les erreurs
|
||||
logger.info(f"Overriding model for obsidian agent: {obsidian_model}")
|
||||
|
||||
agent = LLMFactory.create(obsidian_model)
|
||||
agent.set_role(agent_name, AGENTS[agent_name])
|
||||
|
||||
# Replace system prompt if provided
|
||||
if system_message:
|
||||
agent.system_prompt = system_message
|
||||
|
||||
# Ajuster les paramètres
|
||||
# Adjust parameters
|
||||
agent.params["temperature"] = temperature
|
||||
|
||||
# Générer la réponse
|
||||
# Generate response
|
||||
start_time = time.time()
|
||||
response = agent.generate(prompt)
|
||||
end_time = time.time()
|
||||
|
||||
generation_time = end_time - start_time
|
||||
logger.info(f"Réponse générée pour l'agent {agent_name} en {generation_time:.2f} secondes")
|
||||
logger.info(f"Response generated for agent {agent_name} in {generation_time:.2f} seconds")
|
||||
|
||||
# Formatage compatible avec l'API OpenAI
|
||||
# OpenAI API compatible formatting
|
||||
return jsonify({
|
||||
"id": f"llmlab-{agent_name}-{hash(prompt) % 10000}",
|
||||
"object": "chat.completion",
|
||||
@ -112,7 +263,7 @@ def chat_completion():
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur: {str(e)}", exc_info=True)
|
||||
logger.error(f"Error: {str(e)}", exc_info=True)
|
||||
return jsonify({
|
||||
"error": {
|
||||
"message": str(e),
|
||||
@ -124,19 +275,26 @@ def chat_completion():
|
||||
@app.route('/v1/models', methods=['GET'])
|
||||
def list_models():
|
||||
"""
|
||||
Liste les modèles disponibles (compatible OpenAI)
|
||||
List available models (OpenAI compatible)
|
||||
"""
|
||||
agents = AgentManager.list_agents()
|
||||
models = []
|
||||
|
||||
for agent_name, info in agents.items():
|
||||
# Apply model overrides from environment variables
|
||||
model_name = info['model']
|
||||
if agent_name == "cursor" and cursor_model:
|
||||
model_name = cursor_model
|
||||
elif agent_name == "obsidian" and obsidian_model:
|
||||
model_name = obsidian_model
|
||||
|
||||
models.append({
|
||||
"id": info['model'],
|
||||
"id": model_name,
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "llmlab",
|
||||
"permission": [{"id": agent_name, "object": "model_permission"}],
|
||||
"root": info['model'],
|
||||
"root": model_name,
|
||||
"parent": None
|
||||
})
|
||||
|
||||
@ -148,60 +306,154 @@ def list_models():
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""
|
||||
Endpoint de vérification de l'état du serveur
|
||||
Server health check endpoint
|
||||
"""
|
||||
# Get current Ollama state
|
||||
current_model = "none"
|
||||
ollama_status = "unavailable"
|
||||
|
||||
if ollama_manager.is_ollama_available():
|
||||
ollama_status = "online"
|
||||
current_model = ollama_manager.get_running_model() or "unknown"
|
||||
|
||||
return jsonify({
|
||||
"status": "healthy",
|
||||
"version": "1.0.0",
|
||||
"timestamp": int(time.time())
|
||||
"timestamp": int(time.time()),
|
||||
"port": args.port,
|
||||
"cursor_model": cursor_model,
|
||||
"obsidian_model": obsidian_model,
|
||||
"ollama_status": ollama_status,
|
||||
"current_model": current_model
|
||||
})
|
||||
|
||||
@app.route('/agents', methods=['GET'])
|
||||
def list_agents():
|
||||
"""
|
||||
Liste les agents disponibles (endpoint personnalisé)
|
||||
List available agents (custom endpoint)
|
||||
"""
|
||||
agents = AgentManager.list_agents()
|
||||
return jsonify({
|
||||
"agents": [
|
||||
{
|
||||
"name": name,
|
||||
"model": info['model'],
|
||||
"model": cursor_model if name == "cursor" and cursor_model else
|
||||
obsidian_model if name == "obsidian" and obsidian_model else
|
||||
info['model'],
|
||||
"description": info['description']
|
||||
}
|
||||
for name, info in agents.items()
|
||||
]
|
||||
})
|
||||
|
||||
@app.route('/running', methods=['GET'])
|
||||
def running_models():
|
||||
"""
|
||||
Endpoint to check currently running models
|
||||
"""
|
||||
try:
|
||||
# Try to get list of available models via Ollama API
|
||||
ollama_available = ollama_manager.is_ollama_available()
|
||||
available_models = ollama_manager.available_models
|
||||
running_model = ollama_manager.get_running_model()
|
||||
|
||||
# Compatibility with previous implementation
|
||||
running_models = []
|
||||
if running_model:
|
||||
running_models.append({
|
||||
"name": running_model,
|
||||
"status": "active",
|
||||
"memory": "unknown"
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
"ollama_available": ollama_available,
|
||||
"available_models": available_models,
|
||||
"running_models": running_models,
|
||||
"current_model": running_model,
|
||||
"cursor_model": cursor_model,
|
||||
"obsidian_model": obsidian_model,
|
||||
"timestamp": int(time.time())
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {str(e)}", exc_info=True)
|
||||
return jsonify({
|
||||
"error": str(e)
|
||||
}), 500
|
||||
|
||||
@app.route('/generate', methods=['POST'])
|
||||
def generate():
|
||||
"""
|
||||
Endpoint simplifié pour les applications personnalisées
|
||||
Simplified endpoint for custom applications
|
||||
"""
|
||||
try:
|
||||
# Vérifier que la requête contient du JSON valide
|
||||
# Check for valid JSON request
|
||||
if not request.is_json:
|
||||
return jsonify({"error": "La requête doit contenir du JSON valide"}), 400
|
||||
return jsonify({"error": "Request must contain valid JSON"}), 400
|
||||
|
||||
data = request.json or {} # Utiliser un dictionnaire vide par défaut si None
|
||||
data = request.json or {} # Use empty dict as default if None
|
||||
prompt = data.get('prompt', '')
|
||||
agent_name = data.get('agent', 'cursor')
|
||||
agent_name = data.get('agent', 'auto') # Par défaut, auto-détection
|
||||
|
||||
# Paramètres optionnels
|
||||
# Optional parameters
|
||||
system_prompt = data.get('system_prompt', None)
|
||||
temperature = data.get('temperature', None)
|
||||
|
||||
# Créer l'agent
|
||||
# Détection d'application - si l'URL contient un port spécifique
|
||||
endpoint_type = None
|
||||
if request.host.endswith(':8001'):
|
||||
endpoint_type = "cursor"
|
||||
elif request.host.endswith(':5001'):
|
||||
endpoint_type = "obsidian"
|
||||
|
||||
# Si l'agent est spécifié explicitement
|
||||
if agent_name == "cursor":
|
||||
endpoint_type = "cursor"
|
||||
elif agent_name == "obsidian":
|
||||
endpoint_type = "obsidian"
|
||||
elif agent_name == "auto":
|
||||
# Auto-détection basée sur le contenu
|
||||
endpoint_type = None
|
||||
|
||||
# Détecter le type et s'assurer que le bon modèle est chargé
|
||||
logger.info(f"Analyse de la requête... Agent: {agent_name}, Endpoint: {endpoint_type}")
|
||||
ensure_appropriate_model(prompt, endpoint_type)
|
||||
|
||||
# Déterminer l'agent optimal si 'auto' est spécifié
|
||||
if agent_name == "auto":
|
||||
request_type, _ = detect_request_type(prompt)
|
||||
if request_type == "code":
|
||||
agent_name = "cursor"
|
||||
else:
|
||||
agent_name = "obsidian"
|
||||
logger.info(f"Agent auto-sélectionné en fonction du contenu: {agent_name}")
|
||||
|
||||
# Create agent
|
||||
agent = AgentManager.create(agent_name)
|
||||
|
||||
# Appliquer les paramètres personnalisés si fournis
|
||||
# Apply model override from environment if available
|
||||
if agent_name == "cursor" and cursor_model:
|
||||
from core.factory import LLMFactory
|
||||
from agents.roles import AGENTS
|
||||
logger.info(f"Overriding model for cursor agent: {cursor_model}")
|
||||
agent = LLMFactory.create(cursor_model)
|
||||
agent.set_role(agent_name, AGENTS[agent_name])
|
||||
elif agent_name == "obsidian" and obsidian_model:
|
||||
from core.factory import LLMFactory
|
||||
from agents.roles import AGENTS
|
||||
logger.info(f"Overriding model for obsidian agent: {obsidian_model}")
|
||||
agent = LLMFactory.create(obsidian_model)
|
||||
agent.set_role(agent_name, AGENTS[agent_name])
|
||||
|
||||
# Apply custom parameters if provided
|
||||
if system_prompt:
|
||||
agent.system_prompt = system_prompt
|
||||
|
||||
if temperature is not None:
|
||||
agent.params["temperature"] = temperature
|
||||
|
||||
# Générer la réponse
|
||||
# Generate response
|
||||
start_time = time.time()
|
||||
response = agent.generate(prompt)
|
||||
generation_time = time.time() - start_time
|
||||
@ -214,38 +466,138 @@ def generate():
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur: {str(e)}", exc_info=True)
|
||||
logger.error(f"Error: {str(e)}", exc_info=True)
|
||||
return jsonify({
|
||||
"error": str(e)
|
||||
}), 500
|
||||
|
||||
@app.route('/switch-model', methods=['POST'])
|
||||
def switch_model():
|
||||
"""
|
||||
Endpoint to manually switch Ollama to a specific model
|
||||
"""
|
||||
try:
|
||||
if not request.is_json:
|
||||
return jsonify({"error": "Request must contain valid JSON"}), 400
|
||||
|
||||
data = request.json or {} # Utiliser un dictionnaire vide si json est None
|
||||
model_name = data.get('model')
|
||||
|
||||
if not model_name:
|
||||
return jsonify({"error": "Model name is required"}), 400
|
||||
|
||||
success = ollama_manager.switch_model(model_name)
|
||||
|
||||
if success:
|
||||
return jsonify({
|
||||
"status": "switching",
|
||||
"model": model_name,
|
||||
"message": f"Switching to model {model_name} in background"
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"message": f"Failed to switch to model {model_name}"
|
||||
}), 400
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error switching model: {str(e)}", exc_info=True)
|
||||
return jsonify({
|
||||
"error": str(e)
|
||||
}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=== Serveur API LLM Lab pour Cursor et Obsidian ===")
|
||||
print("Serveur démarré sur http://localhost:8000")
|
||||
port = args.port
|
||||
|
||||
# Log which models are being used
|
||||
model_info = ""
|
||||
if cursor_model:
|
||||
model_info += f"\n - Cursor override model: {cursor_model}"
|
||||
if obsidian_model:
|
||||
model_info += f"\n - Obsidian override model: {obsidian_model}"
|
||||
|
||||
print(f"=== LLM Lab API Server for Cursor and Obsidian ===")
|
||||
print(f"Server started on http://localhost:{port}")
|
||||
if model_info:
|
||||
print(f"\nUsing custom models:{model_info}")
|
||||
print()
|
||||
print("Endpoints disponibles:")
|
||||
print(" - http://localhost:8000/v1/chat/completions (compatible OpenAI)")
|
||||
print(" - http://localhost:8000/v1/models (compatible OpenAI)")
|
||||
print(" - http://localhost:8000/generate (API simplifiée)")
|
||||
print(" - http://localhost:8000/agents (liste d'agents)")
|
||||
print(" - http://localhost:8000/health (statut)")
|
||||
|
||||
# Show Ollama status
|
||||
if ollama_manager.is_ollama_available():
|
||||
print("Ollama status: Online")
|
||||
current_model = ollama_manager.get_running_model()
|
||||
if current_model:
|
||||
print(f"Currently loaded model: {current_model}")
|
||||
|
||||
# Print list of available models
|
||||
if ollama_manager.available_models:
|
||||
print("\nAvailable models:")
|
||||
for model in ollama_manager.available_models:
|
||||
print(f" - {model}")
|
||||
else:
|
||||
print("Ollama status: Offline")
|
||||
|
||||
print("\nAvailable endpoints:")
|
||||
print(f" - http://localhost:{port}/v1/chat/completions (OpenAI compatible)")
|
||||
print(f" - http://localhost:{port}/v1/models (OpenAI compatible)")
|
||||
print(f" - http://localhost:{port}/generate (Simplified API)")
|
||||
print(f" - http://localhost:{port}/agents (agent list)")
|
||||
print(f" - http://localhost:{port}/running (running models)")
|
||||
print(f" - http://localhost:{port}/switch-model (manual model control)")
|
||||
print(f" - http://localhost:{port}/health (status)")
|
||||
print()
|
||||
print("Pour Cursor:")
|
||||
print(" 1. Ouvrez Cursor")
|
||||
print(" 2. Allez dans Settings > AI")
|
||||
print(" 3. Sélectionnez 'Custom endpoint'")
|
||||
print(" 4. Entrez l'URL: http://localhost:8000/v1")
|
||||
|
||||
# Show specific usage based on port for clearer user guidance
|
||||
if port == 8001:
|
||||
print("For Cursor:")
|
||||
print(" 1. Open Cursor")
|
||||
print(" 2. Go to Settings > AI")
|
||||
print(" 3. Select 'Custom endpoint'")
|
||||
print(f" 4. Enter URL: http://localhost:{port}/v1")
|
||||
elif port == 5001:
|
||||
print("For Obsidian Text Generator plugin:")
|
||||
print(" 1. In Obsidian, install the 'Text Generator' plugin")
|
||||
print(" 2. Go to Text Generator settings")
|
||||
print(" 3. Select 'Custom' endpoint")
|
||||
print(f" 4. Enter URL: http://localhost:{port}/generate")
|
||||
print(" 5. Set request method to POST")
|
||||
print(" 6. Set completion endpoint to /generate")
|
||||
else:
|
||||
print("For Cursor:")
|
||||
print(" 1. Open Cursor")
|
||||
print(" 2. Go to Settings > AI")
|
||||
print(" 3. Select 'Custom endpoint'")
|
||||
print(f" 4. Enter URL: http://localhost:{port}/v1")
|
||||
print()
|
||||
print("For Obsidian Text Generator plugin:")
|
||||
print(" 1. In Obsidian, install the 'Text Generator' plugin")
|
||||
print(" 2. Go to Text Generator settings")
|
||||
print(" 3. Select 'Custom' endpoint")
|
||||
print(f" 4. Enter URL: http://localhost:{port}/generate")
|
||||
print(" 5. Set request method to POST")
|
||||
print(" 6. Set completion endpoint to /generate")
|
||||
|
||||
print()
|
||||
print("Agents disponibles:")
|
||||
print("Available agents:")
|
||||
try:
|
||||
for agent_name, info in AgentManager.list_agents().items():
|
||||
print(f" - {agent_name}: {info['description']} ({info['model']})")
|
||||
# Show customized model for agents with override
|
||||
model_display = cursor_model if agent_name == "cursor" and cursor_model else \
|
||||
obsidian_model if agent_name == "obsidian" and obsidian_model else \
|
||||
info['model']
|
||||
print(f" - {agent_name}: {info['description']} ({model_display})")
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de la liste des agents: {str(e)}")
|
||||
print("Assurez-vous que les modules LLM Lab sont correctement installés.")
|
||||
print(f"Error listing agents: {str(e)}")
|
||||
print("Make sure LLM Lab modules are correctly installed.")
|
||||
print()
|
||||
print("Logs: logs/api_server.log")
|
||||
print("Appuyez sur Ctrl+C pour arrêter le serveur")
|
||||
print("Press Ctrl+C to stop the server")
|
||||
|
||||
# Démarrer le serveur
|
||||
app.run(host='0.0.0.0', port=8000, debug=False)
|
||||
try:
|
||||
# Import agents here to avoid circular imports
|
||||
from agents.roles import AGENTS
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing AGENTS: {str(e)}")
|
||||
|
||||
# Start server
|
||||
app.run(host='0.0.0.0', port=port, debug=False)
|
||||
13
lancer-api.bat
Normal file
13
lancer-api.bat
Normal file
@ -0,0 +1,13 @@
|
||||
@echo off
|
||||
REM Starting API server in the background
|
||||
powershell -Command "& {Start-Process powershell -ArgumentList '-ExecutionPolicy Bypass -File "".\run-api.ps1"" -background' -WindowStyle Minimized}"
|
||||
|
||||
echo API server started at http://localhost:8000/v1
|
||||
echo.
|
||||
echo Access points:
|
||||
echo - Cursor: http://localhost:8000/v1
|
||||
echo - Obsidian: http://localhost:8000/generate
|
||||
echo - Models: http://localhost:8000/running
|
||||
echo.
|
||||
echo Press any key to close this window...
|
||||
pause > nul
|
||||
60
lancer-cursor.bat
Normal file
60
lancer-cursor.bat
Normal file
@ -0,0 +1,60 @@
|
||||
@echo off
|
||||
echo === Lancement du serveur API pour Cursor ===
|
||||
|
||||
:: Configuration de l'environnement pour Windows
|
||||
SET PYTHONIOENCODING=utf-8
|
||||
SET CURSOR_MODEL=codellama:13b-python
|
||||
|
||||
:: Vérifier si Ollama est en cours d'exécution
|
||||
echo Vérification d'Ollama...
|
||||
tasklist /FI "IMAGENAME eq ollama.exe" 2>NUL | find /I /N "ollama.exe" >NUL
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Ollama n'est pas en cours d'exécution, tentative de démarrage...
|
||||
|
||||
:: Vérifier si Ollama est installé
|
||||
if exist "C:\Program Files\Ollama\ollama.exe" (
|
||||
echo Démarrage d'Ollama...
|
||||
start "" /min "C:\Program Files\Ollama\ollama.exe"
|
||||
echo Attente de l'initialisation d'Ollama...
|
||||
timeout /t 10 /nobreak >NUL
|
||||
) else (
|
||||
echo Impossible de trouver Ollama. Veuillez l'installer depuis https://ollama.com/download/windows
|
||||
echo Le serveur API va démarrer, mais les modèles pourraient ne pas être disponibles.
|
||||
)
|
||||
) else (
|
||||
echo Ollama est déjà en cours d'exécution.
|
||||
)
|
||||
|
||||
:: Vérification si l'environnement virtuel existe
|
||||
if not exist "llmlab" (
|
||||
echo L'environnement virtuel n'existe pas. Veuillez exécuter setup_env.bat pour le créer.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:: Activation de l'environnement virtuel
|
||||
call llmlab\Scripts\activate.bat
|
||||
|
||||
:: Vérification des dépendances API
|
||||
pip show flask flask-cors > nul 2>&1
|
||||
if ERRORLEVEL 1 (
|
||||
echo Installation des dépendances manquantes...
|
||||
pip install flask flask-cors
|
||||
)
|
||||
|
||||
:: Affichage des informations
|
||||
echo.
|
||||
echo Serveur API en cours de démarrage pour Cursor sur http://localhost:8001
|
||||
echo.
|
||||
echo Pour Cursor:
|
||||
echo - Ouvrez Cursor
|
||||
echo - Allez dans Settings > AI
|
||||
echo - Sélectionnez 'Custom endpoint'
|
||||
echo - Entrez l'URL: http://localhost:8001/v1
|
||||
echo.
|
||||
|
||||
:: Démarrage du serveur API
|
||||
echo Démarrage du serveur... (Ctrl+C pour arrêter)
|
||||
python api_server.py --port 8001
|
||||
|
||||
:: Désactivation de l'environnement virtuel en cas d'arrêt
|
||||
call deactivate
|
||||
32
lancer-monitor.bat
Normal file
32
lancer-monitor.bat
Normal file
@ -0,0 +1,32 @@
|
||||
@echo off
|
||||
echo === LLM Lab System Monitor ===
|
||||
echo.
|
||||
|
||||
REM Check if virtual environment exists
|
||||
if not exist "llmlab" (
|
||||
echo Virtual environment not found. Please run setup_env.bat to create it.
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Activate virtual environment
|
||||
call llmlab\Scripts\activate.bat
|
||||
|
||||
REM Check required dependencies
|
||||
pip show psutil tkinter > nul 2>&1
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Installing missing dependencies...
|
||||
pip install psutil
|
||||
REM Tkinter is included with Python, not installable via pip
|
||||
)
|
||||
|
||||
echo Starting system monitor...
|
||||
echo The monitor will display running Ollama models in the "Ollama" tab.
|
||||
echo.
|
||||
echo Press Ctrl+C in the monitor window to stop.
|
||||
|
||||
REM Launch the monitor
|
||||
python -m utils.system_monitor
|
||||
|
||||
REM Deactivate virtual environment when done
|
||||
call llmlab\Scripts\deactivate.bat
|
||||
62
lancer-obsidian.bat
Normal file
62
lancer-obsidian.bat
Normal file
@ -0,0 +1,62 @@
|
||||
@echo off
|
||||
echo === Lancement du serveur API pour Obsidian ===
|
||||
|
||||
:: Configuration de l'environnement pour Windows
|
||||
SET PYTHONIOENCODING=utf-8
|
||||
SET OBSIDIAN_MODEL=llama2:13b
|
||||
|
||||
:: Vérifier si Ollama est en cours d'exécution
|
||||
echo Vérification d'Ollama...
|
||||
tasklist /FI "IMAGENAME eq ollama.exe" 2>NUL | find /I /N "ollama.exe" >NUL
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Ollama n'est pas en cours d'exécution, tentative de démarrage...
|
||||
|
||||
:: Vérifier si Ollama est installé
|
||||
if exist "C:\Program Files\Ollama\ollama.exe" (
|
||||
echo Démarrage d'Ollama...
|
||||
start "" /min "C:\Program Files\Ollama\ollama.exe"
|
||||
echo Attente de l'initialisation d'Ollama...
|
||||
timeout /t 10 /nobreak >NUL
|
||||
) else (
|
||||
echo Impossible de trouver Ollama. Veuillez l'installer depuis https://ollama.com/download/windows
|
||||
echo Le serveur API va démarrer, mais les modèles pourraient ne pas être disponibles.
|
||||
)
|
||||
) else (
|
||||
echo Ollama est déjà en cours d'exécution.
|
||||
)
|
||||
|
||||
:: Vérification si l'environnement virtuel existe
|
||||
if not exist "llmlab" (
|
||||
echo L'environnement virtuel n'existe pas. Veuillez exécuter setup_env.bat pour le créer.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:: Activation de l'environnement virtuel
|
||||
call llmlab\Scripts\activate.bat
|
||||
|
||||
:: Vérification des dépendances API
|
||||
pip show flask flask-cors > nul 2>&1
|
||||
if ERRORLEVEL 1 (
|
||||
echo Installation des dépendances manquantes...
|
||||
pip install flask flask-cors
|
||||
)
|
||||
|
||||
:: Affichage des informations
|
||||
echo.
|
||||
echo Serveur API en cours de démarrage pour Obsidian sur http://localhost:5001
|
||||
echo.
|
||||
echo Pour Obsidian:
|
||||
echo - Installez le plugin Text Generator dans Obsidian
|
||||
echo - Allez dans les paramètres du plugin
|
||||
echo - Sélectionnez "Custom endpoint"
|
||||
echo - Entrez l'URL: http://localhost:5001/generate
|
||||
echo - Méthode: POST
|
||||
echo - Endpoint: /generate
|
||||
echo.
|
||||
|
||||
:: Démarrage du serveur API
|
||||
echo Démarrage du serveur... (Ctrl+C pour arrêter)
|
||||
python api_server.py --port 5001
|
||||
|
||||
:: Désactivation de l'environnement virtuel en cas d'arrêt
|
||||
call deactivate
|
||||
80
lancer-unifie.bat
Normal file
80
lancer-unifie.bat
Normal file
@ -0,0 +1,80 @@
|
||||
@echo off
|
||||
echo === Lancement du serveur API unifié pour Cursor et Obsidian ===
|
||||
|
||||
:: Configuration de l'environnement pour Windows
|
||||
SET PYTHONIOENCODING=utf-8
|
||||
SET PYTHONLEGACYWINDOWSSTDIO=1
|
||||
SET CURSOR_MODEL=codellama:13b-python
|
||||
SET OBSIDIAN_MODEL=llama2:13b
|
||||
SET PORT=7000
|
||||
|
||||
:: Vérifier si Ollama est en cours d'exécution
|
||||
echo Vérification d'Ollama...
|
||||
tasklist /FI "IMAGENAME eq ollama.exe" 2>NUL | find /I /N "ollama.exe" >NUL
|
||||
if %ERRORLEVEL% NEQ 0 (
|
||||
echo Ollama n'est pas en cours d'exécution, tentative de démarrage...
|
||||
|
||||
:: Vérifier si Ollama est installé
|
||||
if exist "C:\Program Files\Ollama\ollama.exe" (
|
||||
echo Démarrage d'Ollama...
|
||||
start "" /min "C:\Program Files\Ollama\ollama.exe"
|
||||
echo Attente de l'initialisation d'Ollama...
|
||||
timeout /t 10 /nobreak >NUL
|
||||
) else (
|
||||
echo Impossible de trouver Ollama. Veuillez l'installer depuis https://ollama.com/download/windows
|
||||
echo Le serveur API va démarrer, mais les modèles pourraient ne pas être disponibles.
|
||||
)
|
||||
) else (
|
||||
echo Ollama est déjà en cours d'exécution.
|
||||
)
|
||||
|
||||
:: Vérification si l'environnement virtuel existe
|
||||
if not exist "llmlab" (
|
||||
echo L'environnement virtuel n'existe pas. Veuillez exécuter setup_env.bat pour le créer.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
:: Activation de l'environnement virtuel
|
||||
call llmlab\Scripts\activate.bat
|
||||
|
||||
:: Vérification des dépendances API
|
||||
pip show flask flask-cors requests > nul 2>&1
|
||||
if ERRORLEVEL 1 (
|
||||
echo Installation des dépendances manquantes...
|
||||
pip install flask flask-cors requests
|
||||
)
|
||||
|
||||
:: Préchargement des modèles
|
||||
echo Préchargement des modèles pour faciliter les basculements...
|
||||
python -c "from utils.ollama_manager import ollama_manager; ollama_manager.preload_models(['%CURSOR_MODEL%', '%OBSIDIAN_MODEL%']); print('Préchargement lancé en arrière-plan.')"
|
||||
|
||||
:: Affichage des informations
|
||||
echo.
|
||||
echo Serveur API unifié en cours de démarrage sur http://localhost:%PORT%
|
||||
echo.
|
||||
echo [Configuration pour Cursor]
|
||||
echo - Ouvrez Cursor
|
||||
echo - Allez dans Settings > AI
|
||||
echo - Sélectionnez 'Custom endpoint'
|
||||
echo - Entrez l'URL: http://localhost:%PORT%/v1
|
||||
echo.
|
||||
echo [Configuration pour Obsidian]
|
||||
echo - Installez le plugin Text Generator dans Obsidian
|
||||
echo - Allez dans les paramètres du plugin
|
||||
echo - Sélectionnez "Custom endpoint"
|
||||
echo - Entrez l'URL: http://localhost:%PORT%/generate
|
||||
echo - Méthode: POST
|
||||
echo - Endpoint: /generate
|
||||
echo.
|
||||
echo [Mode unifié]
|
||||
echo Le serveur détectera automatiquement le type de requête et basculera entre les modèles
|
||||
echo - %CURSOR_MODEL% pour les requêtes de code (Cursor)
|
||||
echo - %OBSIDIAN_MODEL% pour les requêtes de texte (Obsidian)
|
||||
echo.
|
||||
|
||||
:: Démarrage du serveur API
|
||||
echo Démarrage du serveur unifié... (Ctrl+C pour arrêter)
|
||||
python api_server.py --port %PORT%
|
||||
|
||||
:: Désactivation de l'environnement virtuel en cas d'arrêt
|
||||
call deactivate
|
||||
@ -15,3 +15,61 @@
|
||||
2025-03-26 21:57:22,642 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-26 21:57:28,897 - werkzeug - INFO - 127.0.0.1 - - [26/Mar/2025 21:57:28] "GET /health HTTP/1.1" 200 -
|
||||
2025-03-26 21:57:29,503 - werkzeug - INFO - 127.0.0.1 - - [26/Mar/2025 21:57:29] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
|
||||
2025-03-27 20:03:57,655 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:5001
|
||||
* Running on http://192.168.31.86:5001
|
||||
2025-03-27 20:03:57,656 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-27 20:04:09,866 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:04:09] "[31m[1mGET /generate HTTP/1.1[0m" 405 -
|
||||
2025-03-27 20:04:10,293 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:04:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
|
||||
2025-03-27 20:06:23,436 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:06:23] "[31m[1mGET /generate HTTP/1.1[0m" 405 -
|
||||
2025-03-27 20:06:26,362 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:06:26] "[31m[1mGET /generate HTTP/1.1[0m" 405 -
|
||||
2025-03-27 20:06:46,320 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:5001
|
||||
* Running on http://192.168.31.86:5001
|
||||
2025-03-27 20:06:46,320 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-27 20:07:45,935 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:8001
|
||||
* Running on http://192.168.31.86:8001
|
||||
2025-03-27 20:07:45,935 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-27 20:07:55,316 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:07:55] "[33mGET /v1 HTTP/1.1[0m" 404 -
|
||||
2025-03-27 20:07:55,742 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:07:55] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
|
||||
2025-03-27 20:14:24,446 - api_server - INFO - Using custom model for Cursor: codellama:13b-python
|
||||
2025-03-27 20:14:24,454 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:8001
|
||||
* Running on http://192.168.31.86:8001
|
||||
2025-03-27 20:14:24,454 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-27 20:14:30,767 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:14:30] "[33mGET /v1 HTTP/1.1[0m" 404 -
|
||||
2025-03-27 20:14:44,414 - api_server - INFO - Using custom model for Obsidian: llama2:13b
|
||||
2025-03-27 20:14:44,421 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:5001
|
||||
* Running on http://192.168.31.86:5001
|
||||
2025-03-27 20:14:44,422 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-27 20:14:53,918 - api_server - INFO - Using custom model for Obsidian: llama2:13b
|
||||
2025-03-27 20:14:53,925 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:5001
|
||||
* Running on http://192.168.31.86:5001
|
||||
2025-03-27 20:14:53,926 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-03-27 20:14:56,416 - werkzeug - INFO - 127.0.0.1 - - [27/Mar/2025 20:14:56] "[31m[1mGET /generate HTTP/1.1[0m" 405 -
|
||||
2025-04-21 17:25:43,829 - api_server - INFO - Preloading model for Cursor: codellama:13b-python
|
||||
2025-04-21 17:25:47,724 - ollama_manager - INFO - Chargement du modèle codellama:13b-python...
|
||||
2025-04-21 17:25:50,086 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:8001
|
||||
* Running on http://192.168.31.86:8001
|
||||
2025-04-21 17:25:50,087 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-04-21 17:25:54,783 - ollama_manager - WARNING - Délai dépassé lors du chargement du modèle codellama:13b-python
|
||||
2025-04-21 17:30:36,589 - api_server - INFO - Using custom model for Cursor: codellama:13b-python
|
||||
2025-04-21 17:30:38,649 - api_server - INFO - Preloading model for Cursor: codellama:13b-python
|
||||
2025-04-21 17:30:46,872 - ollama_manager - INFO - Chargement du modèle codellama:13b-python... (délai max: 180s)
|
||||
2025-04-21 17:30:47,923 - werkzeug - INFO - [31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:8001
|
||||
* Running on http://192.168.31.86:8001
|
||||
2025-04-21 17:30:47,923 - werkzeug - INFO - [33mPress CTRL+C to quit[0m
|
||||
2025-04-21 17:30:55,196 - ollama_manager - INFO - Modèle codellama:13b-python chargé avec succès
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Lanceur pour le moniteur système LLM Lab
|
||||
Launcher for LLM Lab system monitor
|
||||
"""
|
||||
from utils.system_monitor import main
|
||||
|
||||
|
||||
140
run-api.ps1
140
run-api.ps1
@ -1,25 +1,58 @@
|
||||
# Script PowerShell pour lancer le serveur API LLM Lab
|
||||
Write-Host "=== Lancement du serveur API LLM Lab pour Cursor et Obsidian ===" -ForegroundColor Green
|
||||
# PowerShell script to launch the LLM Lab API server autonomously
|
||||
Write-Host "=== LLM Lab API Server Automatic Launch for Cursor and Obsidian ===" -ForegroundColor Green
|
||||
|
||||
# Vérifier si l'environnement virtuel existe
|
||||
# Function to check if a process is running
|
||||
function Test-ProcessRunning {
|
||||
param (
|
||||
[string]$ProcessName
|
||||
)
|
||||
return (Get-Process -Name $ProcessName -ErrorAction SilentlyContinue)
|
||||
}
|
||||
|
||||
# Function to start Ollama if not already running
|
||||
function Start-OllamaIfNeeded {
|
||||
if (-not (Test-ProcessRunning -ProcessName "ollama")) {
|
||||
Write-Host "Starting Ollama..." -ForegroundColor Yellow
|
||||
|
||||
# Find Ollama location
|
||||
$ollamaPath = "C:\Program Files\Ollama\ollama.exe"
|
||||
|
||||
if (Test-Path -Path $ollamaPath) {
|
||||
# Start Ollama in background
|
||||
Start-Process -FilePath $ollamaPath -WindowStyle Minimized
|
||||
Write-Host "Ollama started. Waiting for server initialization..." -ForegroundColor Green
|
||||
Start-Sleep -Seconds 5 # Wait for Ollama server to start
|
||||
} else {
|
||||
Write-Host "Unable to find Ollama. Please ensure it is installed." -ForegroundColor Red
|
||||
Write-Host "You can download it from https://ollama.com/download/windows" -ForegroundColor Yellow
|
||||
}
|
||||
} else {
|
||||
Write-Host "Ollama is already running." -ForegroundColor Green
|
||||
}
|
||||
}
|
||||
|
||||
# Check if virtual environment exists
|
||||
if (-not (Test-Path -Path ".\llmlab")) {
|
||||
Write-Host "L'environnement virtuel n'existe pas. Veuillez exécuter setup_env.bat pour le créer." -ForegroundColor Red
|
||||
Write-Host "Virtual environment not found. Please run setup_env.bat to create it." -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Activer l'environnement virtuel
|
||||
Write-Host "Activation de l'environnement virtuel..." -ForegroundColor Cyan
|
||||
# Start Ollama automatically if needed
|
||||
Start-OllamaIfNeeded
|
||||
|
||||
# Activate virtual environment
|
||||
Write-Host "Activating virtual environment..." -ForegroundColor Cyan
|
||||
try {
|
||||
& .\llmlab\Scripts\Activate.ps1
|
||||
} catch {
|
||||
Write-Host "Erreur lors de l'activation de l'environnement virtuel: $_" -ForegroundColor Red
|
||||
Write-Host "Tentative alternative d'activation..." -ForegroundColor Yellow
|
||||
Write-Host "Error activating virtual environment: $_" -ForegroundColor Red
|
||||
Write-Host "Trying alternative activation..." -ForegroundColor Yellow
|
||||
& cmd /c ".\llmlab\Scripts\activate.bat && powershell -NoExit"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Vérifier que Flask et Flask-CORS sont installés
|
||||
Write-Host "Vérification des dépendances..." -ForegroundColor Cyan
|
||||
# Check that Flask and Flask-CORS are installed
|
||||
Write-Host "Checking dependencies..." -ForegroundColor Cyan
|
||||
$flaskInstalled = $false
|
||||
$flaskCorsInstalled = $false
|
||||
|
||||
@ -28,42 +61,81 @@ try {
|
||||
$flaskInstalled = $modules -match "flask" -and $modules -notmatch "flask-cors"
|
||||
$flaskCorsInstalled = $modules -match "flask-cors"
|
||||
} catch {
|
||||
Write-Host "Erreur lors de la vérification des modules: $_" -ForegroundColor Red
|
||||
Write-Host "Error checking modules: $_" -ForegroundColor Red
|
||||
}
|
||||
|
||||
# Installer les dépendances manquantes
|
||||
# Install missing dependencies
|
||||
if (-not $flaskInstalled) {
|
||||
Write-Host "Installation de Flask..." -ForegroundColor Yellow
|
||||
Write-Host "Installing Flask..." -ForegroundColor Yellow
|
||||
pip install flask
|
||||
}
|
||||
|
||||
if (-not $flaskCorsInstalled) {
|
||||
Write-Host "Installation de Flask-CORS..." -ForegroundColor Yellow
|
||||
Write-Host "Installing Flask-CORS..." -ForegroundColor Yellow
|
||||
pip install flask-cors
|
||||
}
|
||||
|
||||
# Vérifier si Ollama est en cours d'exécution
|
||||
Write-Host "Vérification qu'Ollama est en cours d'exécution..." -ForegroundColor Cyan
|
||||
try {
|
||||
$ollamaResponse = Invoke-WebRequest -Uri "http://localhost:11434/api/tags" -UseBasicParsing -ErrorAction SilentlyContinue
|
||||
if ($ollamaResponse.StatusCode -eq 200) {
|
||||
Write-Host "Ollama est en cours d'exécution." -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "Ollama semble ne pas fonctionner correctement." -ForegroundColor Yellow
|
||||
# Check if Ollama is ready
|
||||
$ollamaReady = $false
|
||||
$retryCount = 0
|
||||
$maxRetries = 5
|
||||
|
||||
Write-Host "Checking if Ollama server is ready..." -ForegroundColor Cyan
|
||||
while (-not $ollamaReady -and $retryCount -lt $maxRetries) {
|
||||
try {
|
||||
$ollamaResponse = Invoke-WebRequest -Uri "http://localhost:11434/api/tags" -UseBasicParsing -ErrorAction SilentlyContinue
|
||||
if ($ollamaResponse.StatusCode -eq 200) {
|
||||
$ollamaReady = $true
|
||||
$models = ($ollamaResponse.Content | ConvertFrom-Json).models
|
||||
$modelCount = $models.Length
|
||||
|
||||
Write-Host "Ollama is ready with $modelCount available models." -ForegroundColor Green
|
||||
|
||||
# Display available models
|
||||
if ($modelCount -gt 0) {
|
||||
Write-Host "Available models:" -ForegroundColor Cyan
|
||||
foreach ($model in $models) {
|
||||
Write-Host " - $($model.name)" -ForegroundColor Cyan
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
$retryCount++
|
||||
if ($retryCount -lt $maxRetries) {
|
||||
Write-Host "Ollama not ready yet. Retrying in 3 seconds..." -ForegroundColor Yellow
|
||||
Start-Sleep -Seconds 3
|
||||
} else {
|
||||
Write-Host "Unable to connect to Ollama after several attempts." -ForegroundColor Red
|
||||
Write-Host "API server will start, but Ollama models might not be available." -ForegroundColor Yellow
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
Write-Host "Impossible de se connecter à Ollama. Assurez-vous qu'il est en cours d'exécution." -ForegroundColor Red
|
||||
Write-Host "Vous pouvez le télécharger depuis https://ollama.com/download/windows" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
# Lancer le serveur API
|
||||
Write-Host "`nLancement du serveur API..." -ForegroundColor Green
|
||||
Write-Host "Utilisez Ctrl+C pour arrêter le serveur`n" -ForegroundColor Yellow
|
||||
# Start the API server
|
||||
Write-Host "`nStarting API server on http://localhost:8000..." -ForegroundColor Green
|
||||
Write-Host "Available endpoints:" -ForegroundColor Cyan
|
||||
Write-Host " - http://localhost:8000/v1/chat/completions (OpenAI/Cursor compatible)" -ForegroundColor Cyan
|
||||
Write-Host " - http://localhost:8000/v1/models (model list)" -ForegroundColor Cyan
|
||||
Write-Host " - http://localhost:8000/generate (simplified API for Obsidian)" -ForegroundColor Cyan
|
||||
Write-Host " - http://localhost:8000/health (server status)" -ForegroundColor Cyan
|
||||
Write-Host "`nUse Ctrl+C to stop the server" -ForegroundColor Yellow
|
||||
|
||||
# Exécution du serveur
|
||||
python api_server.py
|
||||
# Run the server in background if -Background parameter is specified
|
||||
$runInBackground = $args -contains "-background" -or $args -contains "-b"
|
||||
|
||||
# Ce code ne sera exécuté qu'après l'arrêt du serveur
|
||||
Write-Host "`nServeur API arrêté." -ForegroundColor Cyan
|
||||
Write-Host "Désactivation de l'environnement virtuel..." -ForegroundColor Cyan
|
||||
deactivate
|
||||
if ($runInBackground) {
|
||||
Write-Host "Starting server in background..." -ForegroundColor Green
|
||||
Start-Process -FilePath "python" -ArgumentList "api_server.py" -WindowStyle Minimized
|
||||
Write-Host "API server started in background. Use 'Get-Process python' to check its status." -ForegroundColor Green
|
||||
|
||||
# Keep virtual environment active
|
||||
Write-Host "Virtual environment remains active. Type 'deactivate' to deactivate it." -ForegroundColor Yellow
|
||||
} else {
|
||||
# Normal server execution
|
||||
python api_server.py
|
||||
|
||||
# This code will only run after server shutdown
|
||||
Write-Host "`nAPI server stopped." -ForegroundColor Cyan
|
||||
Write-Host "Deactivating virtual environment..." -ForegroundColor Cyan
|
||||
deactivate
|
||||
}
|
||||
85
setup_env.sh
85
setup_env.sh
@ -1,85 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script de configuration de l'environnement virtuel LLM Lab
|
||||
# Ce script crée un nouvel environnement virtuel et installe les dépendances requises
|
||||
|
||||
echo "=== Configuration de l'environnement LLM Lab ==="
|
||||
|
||||
# Vérification si Python 3 est installé
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo "Erreur: Python 3 n'est pas installé. Veuillez l'installer avant de continuer."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Vérification si pip est installé
|
||||
if ! command -v pip3 &> /dev/null; then
|
||||
echo "Erreur: pip3 n'est pas installé. Veuillez l'installer avant de continuer."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Vérification si venv est disponible
|
||||
python3 -c "import venv" &> /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Le module venv n'est pas disponible. Installation en cours..."
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3-venv
|
||||
fi
|
||||
|
||||
# Vérification de l'installation de tkinter
|
||||
python3 -c "import tkinter" &> /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Tkinter n'est pas installé. Installation en cours..."
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3-tk
|
||||
fi
|
||||
|
||||
# Suppression de l'ancien environnement s'il existe
|
||||
if [ -d "llmlab" ]; then
|
||||
echo "Suppression de l'ancien environnement virtuel..."
|
||||
rm -rf llmlab
|
||||
fi
|
||||
|
||||
# Création du nouvel environnement virtuel
|
||||
echo "Création d'un nouvel environnement virtuel..."
|
||||
python3 -m venv llmlab
|
||||
|
||||
# Activation de l'environnement virtuel
|
||||
echo "Activation de l'environnement virtuel..."
|
||||
source llmlab/bin/activate
|
||||
|
||||
# Mise à jour de pip
|
||||
echo "Mise à jour de pip..."
|
||||
pip install --upgrade pip
|
||||
|
||||
# Installation des dépendances
|
||||
echo "Installation des dépendances requises..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Installation optionnelle de dépendances pour NVIDIA GPU
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
echo "NVIDIA GPU détecté, installation des dépendances NVIDIA..."
|
||||
pip install nvidia-ml-py
|
||||
fi
|
||||
|
||||
# Création de liens symboliques pour faciliter l'utilisation
|
||||
echo "Création de liens symboliques pour les scripts..."
|
||||
chmod +x chat.py
|
||||
chmod +x monitor.py
|
||||
|
||||
# Affichage du résumé
|
||||
echo ""
|
||||
echo "=== Configuration terminée ==="
|
||||
echo "Pour activer l'environnement virtuel, exécutez:"
|
||||
echo " source llmlab/bin/activate"
|
||||
echo ""
|
||||
echo "Pour lancer le moniteur système:"
|
||||
echo " ./monitor.py"
|
||||
echo ""
|
||||
echo "Pour utiliser un agent LLM:"
|
||||
echo " ./chat.py [nom de l'agent]"
|
||||
echo ""
|
||||
echo "Agents disponibles:"
|
||||
echo " cursor - CodeLlama 13B Python (pour le code)"
|
||||
echo " obsidian - Llama2 13B (pour la gestion des connaissances)"
|
||||
echo " test - Mistral 7B (pour les tests rapides)"
|
||||
echo ""
|
||||
BIN
utils/__pycache__/ollama_manager.cpython-313.pyc
Normal file
BIN
utils/__pycache__/ollama_manager.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
239
utils/ollama_manager.py
Normal file
239
utils/ollama_manager.py
Normal file
@ -0,0 +1,239 @@
|
||||
"""
|
||||
Gestionnaire de modèles Ollama pour optimiser le chargement et la bascule entre modèles
|
||||
"""
|
||||
import requests
|
||||
import subprocess
|
||||
import logging
|
||||
import platform
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
import sys
|
||||
|
||||
logger = logging.getLogger("ollama_manager")
|
||||
|
||||
class OllamaModelManager:
|
||||
"""
|
||||
Classe utilitaire pour gérer efficacement les modèles Ollama:
|
||||
- Préchargement des modèles
|
||||
- Basculement entre modèles
|
||||
- Vérification de l'état des modèles
|
||||
- Libération des ressources
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.base_url = "http://localhost:11434/api"
|
||||
self.available_models = []
|
||||
self.running_model = None
|
||||
self.lock = threading.Lock()
|
||||
self.last_model_switch_time = 0
|
||||
self.model_switching_in_progress = False
|
||||
self.refresh_available_models()
|
||||
|
||||
def refresh_available_models(self):
|
||||
"""Actualise la liste des modèles disponibles dans Ollama"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/tags", timeout=5)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
self.available_models = [model.get("name", "") for model in data.get("models", [])]
|
||||
logger.info(f"Modèles Ollama disponibles: {self.available_models}")
|
||||
else:
|
||||
logger.warning(f"Impossible de récupérer les modèles: {response.status_code}")
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur lors de la récupération des modèles: {str(e)}")
|
||||
|
||||
def get_running_model(self):
|
||||
"""Détecte le modèle Ollama actuellement chargé"""
|
||||
try:
|
||||
# Méthode simplifiée et plus fiable - via appel à l'API Ollama
|
||||
# Les méthodes précédentes avec subprocess peuvent causer des erreurs d'encodage
|
||||
for model in self.available_models:
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.base_url}/generate",
|
||||
json={"model": model, "prompt": "Hi", "stream": False},
|
||||
timeout=1.0
|
||||
)
|
||||
if response.status_code == 200:
|
||||
# Un modèle répond rapidement s'il est déjà chargé
|
||||
self.running_model = model
|
||||
return model
|
||||
except Exception:
|
||||
# Si timeout ou erreur, on continue avec le modèle suivant
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur lors de la détection du modèle en cours: {str(e)}")
|
||||
|
||||
return None
|
||||
|
||||
def ensure_model_loaded(self, model_name, max_wait=120):
|
||||
"""
|
||||
S'assure qu'un modèle est chargé en mémoire, prêt à répondre rapidement.
|
||||
Retourne True si le modèle est chargé avec succès.
|
||||
"""
|
||||
with self.lock: # Éviter les conflits entre threads
|
||||
# Si ce modèle n'est pas disponible, on ne peut pas le charger
|
||||
if model_name not in self.available_models:
|
||||
logger.warning(f"Modèle {model_name} non disponible dans Ollama")
|
||||
return False
|
||||
|
||||
# Si le modèle est déjà chargé, rien à faire
|
||||
current_model = self.get_running_model()
|
||||
if current_model == model_name:
|
||||
logger.info(f"Modèle {model_name} déjà chargé et prêt")
|
||||
return True
|
||||
|
||||
# Marquer le début du chargement
|
||||
self.model_switching_in_progress = True
|
||||
self.last_model_switch_time = time.time()
|
||||
|
||||
# Charger le modèle avec un petit prompt pour forcer le chargement
|
||||
logger.info(f"Chargement du modèle {model_name}... (délai max: {max_wait}s)")
|
||||
try:
|
||||
# Prompt minimal pour charger le modèle
|
||||
response = requests.post(
|
||||
f"{self.base_url}/generate",
|
||||
json={"model": model_name, "prompt": "Hello", "stream": False},
|
||||
timeout=max_wait
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.running_model = model_name
|
||||
logger.info(f"Modèle {model_name} chargé avec succès")
|
||||
self.model_switching_in_progress = False
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Erreur lors du chargement du modèle {model_name}: {response.status_code}")
|
||||
self.model_switching_in_progress = False
|
||||
return False
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.warning(f"Délai dépassé lors du chargement du modèle {model_name} (timeout: {max_wait}s)")
|
||||
self.model_switching_in_progress = False
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur lors du chargement du modèle {model_name}: {str(e)}")
|
||||
self.model_switching_in_progress = False
|
||||
return False
|
||||
|
||||
def switch_model(self, model_name, max_wait=60):
|
||||
"""
|
||||
Bascule vers un autre modèle.
|
||||
Pour éviter les délais, cette méthode est non bloquante par défaut
|
||||
et retourne True si le processus de changement a démarré.
|
||||
"""
|
||||
# Vérifier si un basculement est déjà en cours
|
||||
if self.model_switching_in_progress:
|
||||
logger.info(f"Basculement vers {model_name} ignoré - un autre basculement est déjà en cours")
|
||||
return False
|
||||
|
||||
# Vérifier si le modèle est déjà chargé
|
||||
if self.running_model == model_name:
|
||||
logger.info(f"Modèle {model_name} déjà actif, pas besoin de basculer")
|
||||
return True
|
||||
|
||||
# Éviter les basculements trop fréquents (moins de 10 secondes entre les basculements)
|
||||
if time.time() - self.last_model_switch_time < 10:
|
||||
logger.info(f"Basculement vers {model_name} reporté - dernier basculement trop récent")
|
||||
return False
|
||||
|
||||
# Démarrer le chargement dans un thread séparé pour ne pas bloquer l'API
|
||||
self.model_switching_in_progress = True
|
||||
threading.Thread(
|
||||
target=self.ensure_model_loaded,
|
||||
args=(model_name, max_wait),
|
||||
daemon=True
|
||||
).start()
|
||||
|
||||
logger.info(f"Basculement vers le modèle {model_name} démarré en arrière-plan")
|
||||
return True
|
||||
|
||||
def preload_models(self, models_list):
|
||||
"""
|
||||
Précharge une liste de modèles pour accélérer leur disponibilité.
|
||||
Utile au démarrage du serveur.
|
||||
"""
|
||||
logger.info(f"Démarrage du préchargement des modèles: {', '.join(models_list)}")
|
||||
# Vérifier quels modèles sont disponibles
|
||||
self.refresh_available_models()
|
||||
|
||||
# Filtrer pour n'inclure que les modèles disponibles
|
||||
available_models = [model for model in models_list if model in self.available_models]
|
||||
if len(available_models) != len(models_list):
|
||||
missing = set(models_list) - set(available_models)
|
||||
logger.warning(f"Certains modèles demandés ne sont pas disponibles: {', '.join(missing)}")
|
||||
|
||||
for model in available_models:
|
||||
threading.Thread(
|
||||
target=self.ensure_model_loaded,
|
||||
args=(model, 180), # Temps d'attente plus long pour le préchargement (3 minutes)
|
||||
daemon=True
|
||||
).start()
|
||||
# Pause pour éviter de surcharger Ollama avec plusieurs chargements simultanés
|
||||
time.sleep(5)
|
||||
|
||||
def is_model_switch_needed(self, requested_model):
|
||||
"""
|
||||
Détermine si un basculement de modèle est nécessaire et approprié
|
||||
"""
|
||||
# Si le modèle demandé n'est pas disponible, pas besoin de basculer
|
||||
if requested_model not in self.available_models:
|
||||
return False
|
||||
|
||||
# Si c'est le modèle actuel, pas besoin de basculer
|
||||
if self.running_model == requested_model:
|
||||
return False
|
||||
|
||||
# Si un basculement est déjà en cours, ne pas en lancer un autre
|
||||
if self.model_switching_in_progress:
|
||||
return False
|
||||
|
||||
# Si le dernier basculement était récent, éviter de créer du thrashing
|
||||
if time.time() - self.last_model_switch_time < 10:
|
||||
return False
|
||||
|
||||
# Dans tous les autres cas, le basculement est nécessaire
|
||||
return True
|
||||
|
||||
def _is_server_available(self):
|
||||
"""Check if Ollama server is available"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/tags", timeout=5) # Timeout augmenté
|
||||
return response.status_code == 200
|
||||
except requests.exceptions.RequestException:
|
||||
return False
|
||||
|
||||
def is_ollama_available(self):
|
||||
"""Vérifie si le serveur Ollama est disponible et répond aux requêtes"""
|
||||
try:
|
||||
response = requests.get(f"{self.base_url}/tags", timeout=5) # Timeout augmenté
|
||||
return response.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# Singleton pour utilisation dans l'application
|
||||
ollama_manager = OllamaModelManager()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Configuration du logging pour les tests
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# Test de la classe
|
||||
manager = OllamaModelManager()
|
||||
print(f"Modèles disponibles: {manager.available_models}")
|
||||
|
||||
current_model = manager.get_running_model()
|
||||
print(f"Modèle actuellement chargé: {current_model or 'Aucun'}")
|
||||
|
||||
if manager.available_models:
|
||||
test_model = manager.available_models[0]
|
||||
print(f"Test de chargement du modèle {test_model}...")
|
||||
if manager.ensure_model_loaded(test_model):
|
||||
print(f"Modèle {test_model} chargé avec succès!")
|
||||
else:
|
||||
print(f"Échec du chargement du modèle {test_model}")
|
||||
@ -1,8 +1,9 @@
|
||||
from itertools import product
|
||||
from agents.roles import AGENTS
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, List, Any
|
||||
|
||||
def test_agents_on_prompt(model_class, prompt: str, agents: list, param_grid: dict, fixed_params: dict = None):
|
||||
def test_agents_on_prompt(model_class, prompt: str, agents: List[str], param_grid: Dict[str, List[Any]], fixed_params: Optional[Dict[str, Any]] = None):
|
||||
"""
|
||||
Teste les agents sur un prompt donné avec différentes combinaisons de paramètres
|
||||
model_class: Classe du modèle LLM à utiliser
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
"""
|
||||
Moniteur de ressources système pour Ollama et les ressources NVIDIA GPU
|
||||
System resource monitor for Ollama and NVIDIA GPU resources
|
||||
"""
|
||||
import tkinter as tk
|
||||
from tkinter import ttk
|
||||
@ -16,7 +16,7 @@ from datetime import datetime
|
||||
class SystemMonitor:
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
self.root.title("Moniteur LLM Lab")
|
||||
self.root.title("LLM Lab Monitor")
|
||||
self.root.geometry("800x600")
|
||||
self.root.minsize(700, 500)
|
||||
|
||||
@ -25,75 +25,75 @@ class SystemMonitor:
|
||||
self.style.theme_use('alt') # 'clam', 'alt', 'default', 'classic'
|
||||
|
||||
# Variables
|
||||
self.update_interval = 2 # secondes
|
||||
self.update_interval = 2 # seconds
|
||||
self.running = True
|
||||
self.ollama_models = []
|
||||
self.active_model = None
|
||||
self.gpu_available = self._check_gpu_available()
|
||||
|
||||
# Création de l'UI
|
||||
# Create UI
|
||||
self._create_widgets()
|
||||
|
||||
# Démarrer le thread de mise à jour
|
||||
# Start update thread
|
||||
self.update_thread = threading.Thread(target=self._update_loop)
|
||||
self.update_thread.daemon = True
|
||||
self.update_thread.start()
|
||||
|
||||
# Intercepter la fermeture de la fenêtre
|
||||
# Intercept window close
|
||||
self.root.protocol("WM_DELETE_WINDOW", self._on_close)
|
||||
|
||||
def _create_widgets(self):
|
||||
# Créer le notebook (onglets)
|
||||
# Create notebook (tabs)
|
||||
self.notebook = ttk.Notebook(self.root)
|
||||
self.notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
||||
|
||||
# Onglet 1: Surveillance système
|
||||
# Tab 1: System monitoring
|
||||
self.system_frame = ttk.Frame(self.notebook)
|
||||
self.notebook.add(self.system_frame, text="Système")
|
||||
self.notebook.add(self.system_frame, text="System")
|
||||
|
||||
# Onglet 2: Ollama
|
||||
# Tab 2: Ollama
|
||||
self.ollama_frame = ttk.Frame(self.notebook)
|
||||
self.notebook.add(self.ollama_frame, text="Ollama")
|
||||
|
||||
# Onglet 3: GPU
|
||||
# Tab 3: GPU
|
||||
self.gpu_frame = ttk.Frame(self.notebook)
|
||||
self.notebook.add(self.gpu_frame, text="GPU")
|
||||
|
||||
# Onglet 4: Logs
|
||||
# Tab 4: Logs
|
||||
self.logs_frame = ttk.Frame(self.notebook)
|
||||
self.notebook.add(self.logs_frame, text="Logs")
|
||||
|
||||
# === Configuration de l'onglet Système ===
|
||||
system_label = ttk.Label(self.system_frame, text="Ressources Système", font=("Arial", 14, "bold"))
|
||||
# === System tab configuration ===
|
||||
system_label = ttk.Label(self.system_frame, text="System Resources", font=("Arial", 14, "bold"))
|
||||
system_label.pack(pady=10)
|
||||
|
||||
# Infos système
|
||||
system_info_frame = ttk.LabelFrame(self.system_frame, text="Informations Système")
|
||||
# System info
|
||||
system_info_frame = ttk.LabelFrame(self.system_frame, text="System Information")
|
||||
system_info_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
# OS
|
||||
os_frame = ttk.Frame(system_info_frame)
|
||||
os_frame.pack(fill=tk.X, padx=5, pady=2)
|
||||
ttk.Label(os_frame, text="Système d'exploitation:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(os_frame, text="Operating System:").pack(side=tk.LEFT, padx=5)
|
||||
self.os_label = ttk.Label(os_frame, text="")
|
||||
self.os_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# CPU
|
||||
cpu_frame = ttk.Frame(system_info_frame)
|
||||
cpu_frame.pack(fill=tk.X, padx=5, pady=2)
|
||||
ttk.Label(cpu_frame, text="Processeur:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(cpu_frame, text="Processor:").pack(side=tk.LEFT, padx=5)
|
||||
self.cpu_label = ttk.Label(cpu_frame, text="")
|
||||
self.cpu_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# RAM
|
||||
ram_frame = ttk.Frame(system_info_frame)
|
||||
ram_frame.pack(fill=tk.X, padx=5, pady=2)
|
||||
ttk.Label(ram_frame, text="Mémoire RAM:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(ram_frame, text="RAM Memory:").pack(side=tk.LEFT, padx=5)
|
||||
self.ram_label = ttk.Label(ram_frame, text="")
|
||||
self.ram_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Barres de progression
|
||||
progress_frame = ttk.LabelFrame(self.system_frame, text="Utilisation des ressources")
|
||||
# Progress bars
|
||||
progress_frame = ttk.LabelFrame(self.system_frame, text="Resource Usage")
|
||||
progress_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
# CPU Usage
|
||||
@ -117,25 +117,25 @@ class SystemMonitor:
|
||||
# Disk Usage
|
||||
disk_usage_frame = ttk.Frame(progress_frame)
|
||||
disk_usage_frame.pack(fill=tk.X, padx=5, pady=5)
|
||||
ttk.Label(disk_usage_frame, text="Disque:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(disk_usage_frame, text="Disk:").pack(side=tk.LEFT, padx=5)
|
||||
self.disk_progress = ttk.Progressbar(disk_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
||||
self.disk_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
||||
self.disk_percent = ttk.Label(disk_usage_frame, text="0%")
|
||||
self.disk_percent.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# === Configuration de l'onglet Ollama ===
|
||||
ollama_label = ttk.Label(self.ollama_frame, text="Serveur Ollama", font=("Arial", 14, "bold"))
|
||||
# === Ollama tab configuration ===
|
||||
ollama_label = ttk.Label(self.ollama_frame, text="Ollama Server", font=("Arial", 14, "bold"))
|
||||
ollama_label.pack(pady=10)
|
||||
|
||||
# État du serveur
|
||||
server_frame = ttk.LabelFrame(self.ollama_frame, text="État du serveur")
|
||||
# Server status
|
||||
server_frame = ttk.LabelFrame(self.ollama_frame, text="Server Status")
|
||||
server_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
# Status
|
||||
status_frame = ttk.Frame(server_frame)
|
||||
status_frame.pack(fill=tk.X, padx=5, pady=2)
|
||||
ttk.Label(status_frame, text="Statut:").pack(side=tk.LEFT, padx=5)
|
||||
self.status_label = ttk.Label(status_frame, text="Vérification...")
|
||||
ttk.Label(status_frame, text="Status:").pack(side=tk.LEFT, padx=5)
|
||||
self.status_label = ttk.Label(status_frame, text="Checking...")
|
||||
self.status_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# URL
|
||||
@ -145,56 +145,58 @@ class SystemMonitor:
|
||||
self.url_label = ttk.Label(url_frame, text="http://localhost:11434")
|
||||
self.url_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Modèles disponibles
|
||||
models_frame = ttk.LabelFrame(self.ollama_frame, text="Modèles disponibles")
|
||||
# Available models
|
||||
models_frame = ttk.LabelFrame(self.ollama_frame, text="Available Models")
|
||||
models_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
||||
|
||||
# Liste des modèles
|
||||
self.tree = ttk.Treeview(models_frame, columns=("Taille", "Modifié"), show='headings')
|
||||
self.tree.heading("Taille", text="Taille")
|
||||
self.tree.heading("Modifié", text="Modifié")
|
||||
# Models list
|
||||
self.tree = ttk.Treeview(models_frame, columns=("Nom", "Taille", "Modifié"), show='headings')
|
||||
self.tree.heading("Nom", text="LLM Model")
|
||||
self.tree.heading("Taille", text="Size")
|
||||
self.tree.heading("Modifié", text="Modified")
|
||||
self.tree.column("Nom", width=150)
|
||||
self.tree.column("Taille", width=100)
|
||||
self.tree.column("Modifié", width=150)
|
||||
self.tree.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
|
||||
|
||||
# Boutons
|
||||
# Buttons
|
||||
button_frame = ttk.Frame(self.ollama_frame)
|
||||
button_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
refresh_button = ttk.Button(button_frame, text="Rafraîchir", command=self._refresh_ollama)
|
||||
refresh_button = ttk.Button(button_frame, text="Refresh", command=self._refresh_ollama)
|
||||
refresh_button.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# === Configuration de l'onglet GPU ===
|
||||
gpu_label = ttk.Label(self.gpu_frame, text="Ressources GPU", font=("Arial", 14, "bold"))
|
||||
# === GPU tab configuration ===
|
||||
gpu_label = ttk.Label(self.gpu_frame, text="GPU Resources", font=("Arial", 14, "bold"))
|
||||
gpu_label.pack(pady=10)
|
||||
|
||||
if self.gpu_available:
|
||||
# Infos GPU
|
||||
gpu_info_frame = ttk.LabelFrame(self.gpu_frame, text="Informations GPU")
|
||||
# GPU Info
|
||||
gpu_info_frame = ttk.LabelFrame(self.gpu_frame, text="GPU Information")
|
||||
gpu_info_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
# Modèle GPU
|
||||
# GPU Model
|
||||
gpu_model_frame = ttk.Frame(gpu_info_frame)
|
||||
gpu_model_frame.pack(fill=tk.X, padx=5, pady=2)
|
||||
ttk.Label(gpu_model_frame, text="Modèle:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(gpu_model_frame, text="Model:").pack(side=tk.LEFT, padx=5)
|
||||
self.gpu_model_label = ttk.Label(gpu_model_frame, text="")
|
||||
self.gpu_model_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Mémoire GPU
|
||||
# GPU Memory
|
||||
gpu_memory_frame = ttk.Frame(gpu_info_frame)
|
||||
gpu_memory_frame.pack(fill=tk.X, padx=5, pady=2)
|
||||
ttk.Label(gpu_memory_frame, text="Mémoire:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(gpu_memory_frame, text="Memory:").pack(side=tk.LEFT, padx=5)
|
||||
self.gpu_memory_label = ttk.Label(gpu_memory_frame, text="")
|
||||
self.gpu_memory_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Utilisation GPU
|
||||
gpu_usage_frame = ttk.LabelFrame(self.gpu_frame, text="Utilisation")
|
||||
# GPU Usage
|
||||
gpu_usage_frame = ttk.LabelFrame(self.gpu_frame, text="Utilization")
|
||||
gpu_usage_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
# GPU Compute
|
||||
gpu_compute_frame = ttk.Frame(gpu_usage_frame)
|
||||
gpu_compute_frame.pack(fill=tk.X, padx=5, pady=5)
|
||||
ttk.Label(gpu_compute_frame, text="Calcul:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(gpu_compute_frame, text="Compute:").pack(side=tk.LEFT, padx=5)
|
||||
self.gpu_compute_progress = ttk.Progressbar(gpu_compute_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
||||
self.gpu_compute_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
||||
self.gpu_compute_percent = ttk.Label(gpu_compute_frame, text="0%")
|
||||
@ -203,52 +205,52 @@ class SystemMonitor:
|
||||
# GPU Memory
|
||||
gpu_mem_usage_frame = ttk.Frame(gpu_usage_frame)
|
||||
gpu_mem_usage_frame.pack(fill=tk.X, padx=5, pady=5)
|
||||
ttk.Label(gpu_mem_usage_frame, text="Mémoire:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(gpu_mem_usage_frame, text="Memory:").pack(side=tk.LEFT, padx=5)
|
||||
self.gpu_mem_progress = ttk.Progressbar(gpu_mem_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
||||
self.gpu_mem_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
||||
self.gpu_mem_percent = ttk.Label(gpu_mem_usage_frame, text="0%")
|
||||
self.gpu_mem_percent.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Température
|
||||
# Temperature
|
||||
gpu_temp_frame = ttk.Frame(gpu_usage_frame)
|
||||
gpu_temp_frame.pack(fill=tk.X, padx=5, pady=5)
|
||||
ttk.Label(gpu_temp_frame, text="Température:").pack(side=tk.LEFT, padx=5)
|
||||
ttk.Label(gpu_temp_frame, text="Temperature:").pack(side=tk.LEFT, padx=5)
|
||||
self.gpu_temp_progress = ttk.Progressbar(gpu_temp_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
||||
self.gpu_temp_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
||||
self.gpu_temp_label = ttk.Label(gpu_temp_frame, text="0°C")
|
||||
self.gpu_temp_label.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Graphiques processe actifs
|
||||
gpu_processes_frame = ttk.LabelFrame(self.gpu_frame, text="Processus GPU")
|
||||
# Active processes graphs
|
||||
gpu_processes_frame = ttk.LabelFrame(self.gpu_frame, text="GPU Processes")
|
||||
gpu_processes_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
||||
|
||||
# Liste des processus
|
||||
# Process list
|
||||
self.gpu_process_tree = ttk.Treeview(gpu_processes_frame,
|
||||
columns=("PID", "Nom", "Mémoire"),
|
||||
show='headings')
|
||||
self.gpu_process_tree.heading("PID", text="PID")
|
||||
self.gpu_process_tree.heading("Nom", text="Processus")
|
||||
self.gpu_process_tree.heading("Mémoire", text="Mémoire")
|
||||
self.gpu_process_tree.heading("Nom", text="Process")
|
||||
self.gpu_process_tree.heading("Mémoire", text="Memory")
|
||||
self.gpu_process_tree.column("PID", width=50)
|
||||
self.gpu_process_tree.column("Nom", width=200)
|
||||
self.gpu_process_tree.column("Mémoire", width=100)
|
||||
self.gpu_process_tree.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
|
||||
else:
|
||||
no_gpu_label = ttk.Label(self.gpu_frame,
|
||||
text="Aucun GPU NVIDIA détecté.",
|
||||
text="No NVIDIA GPU detected.",
|
||||
font=("Arial", 12))
|
||||
no_gpu_label.pack(pady=50)
|
||||
|
||||
install_label = ttk.Label(self.gpu_frame,
|
||||
text="Pour surveiller un GPU NVIDIA, installez nvidia-smi et nvitop.",
|
||||
text="To monitor an NVIDIA GPU, install nvidia-smi and nvitop.",
|
||||
font=("Arial", 10))
|
||||
install_label.pack(pady=10)
|
||||
|
||||
# === Configuration de l'onglet Logs ===
|
||||
logs_label = ttk.Label(self.logs_frame, text="Journaux d'activité", font=("Arial", 14, "bold"))
|
||||
# === Logs tab configuration ===
|
||||
logs_label = ttk.Label(self.logs_frame, text="Activity Logs", font=("Arial", 14, "bold"))
|
||||
logs_label.pack(pady=10)
|
||||
|
||||
# Zone de logs
|
||||
# Log area
|
||||
log_area_frame = ttk.Frame(self.logs_frame)
|
||||
log_area_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
||||
|
||||
@ -261,114 +263,114 @@ class SystemMonitor:
|
||||
self.log_text.pack(fill=tk.BOTH, expand=True)
|
||||
scrollbar.config(command=self.log_text.yview)
|
||||
|
||||
# Boutons
|
||||
# Buttons
|
||||
log_button_frame = ttk.Frame(self.logs_frame)
|
||||
log_button_frame.pack(fill=tk.X, padx=10, pady=5)
|
||||
|
||||
clear_log_button = ttk.Button(log_button_frame, text="Effacer les logs",
|
||||
clear_log_button = ttk.Button(log_button_frame, text="Clear Logs",
|
||||
command=lambda: self.log_text.delete(1.0, tk.END))
|
||||
clear_log_button.pack(side=tk.LEFT, padx=5)
|
||||
|
||||
# Barre d'état en bas
|
||||
self.status_bar = ttk.Label(self.root, text="Moniteur LLM Lab - Dernière mise à jour: Jamais",
|
||||
# Status bar at bottom
|
||||
self.status_bar = ttk.Label(self.root, text="LLM Lab Monitor - Last update: Never",
|
||||
relief=tk.SUNKEN, anchor=tk.W)
|
||||
self.status_bar.pack(side=tk.BOTTOM, fill=tk.X)
|
||||
|
||||
def _update_loop(self):
|
||||
"""Thread principal de mise à jour"""
|
||||
"""Main update thread"""
|
||||
while self.running:
|
||||
try:
|
||||
# Mise à jour système
|
||||
# System update
|
||||
self._update_system_info()
|
||||
|
||||
# Mise à jour Ollama
|
||||
if self.notebook.index(self.notebook.select()) == 1: # Onglet Ollama
|
||||
# Ollama update
|
||||
if self.notebook.index(self.notebook.select()) == 1: # Ollama tab
|
||||
self._update_ollama_info()
|
||||
|
||||
# Mise à jour GPU
|
||||
if self.gpu_available and self.notebook.index(self.notebook.select()) == 2: # Onglet GPU
|
||||
# GPU update
|
||||
if self.gpu_available and self.notebook.index(self.notebook.select()) == 2: # GPU tab
|
||||
self._update_gpu_info()
|
||||
|
||||
# Mise à jour de la barre d'état
|
||||
# Status bar update
|
||||
now = datetime.now().strftime("%H:%M:%S")
|
||||
self.status_bar.config(text=f"Moniteur LLM Lab - Dernière mise à jour: {now}")
|
||||
self.status_bar.config(text=f"LLM Lab Monitor - Last update: {now}")
|
||||
|
||||
except Exception as e:
|
||||
self._log(f"Erreur de mise à jour: {str(e)}")
|
||||
self._log(f"Update error: {str(e)}")
|
||||
|
||||
time.sleep(self.update_interval)
|
||||
|
||||
def _update_system_info(self):
|
||||
"""Met à jour les informations système"""
|
||||
# Informations système
|
||||
"""Updates system information"""
|
||||
# System information
|
||||
self.os_label.config(text=f"{platform.system()} {platform.release()}")
|
||||
self.cpu_label.config(text=f"{psutil.cpu_count(logical=False)} cœurs ({psutil.cpu_count()} threads)")
|
||||
self.cpu_label.config(text=f"{psutil.cpu_count(logical=False)} cores ({psutil.cpu_count()} threads)")
|
||||
|
||||
# Détection avancée de la RAM
|
||||
# Advanced RAM detection
|
||||
try:
|
||||
ram = psutil.virtual_memory()
|
||||
total_ram = ram.total / (1024 * 1024 * 1024) # GB
|
||||
|
||||
# Vérification supplémentaire pour Linux
|
||||
# Additional check for Linux
|
||||
if platform.system() == "Linux":
|
||||
try:
|
||||
# Utiliser /proc/meminfo pour une détection plus précise
|
||||
# Use /proc/meminfo for more accurate detection
|
||||
with open('/proc/meminfo', 'r') as f:
|
||||
for line in f:
|
||||
if 'MemTotal' in line:
|
||||
# MemTotal est en kB
|
||||
# MemTotal is in kB
|
||||
mem_kb = int(line.split()[1])
|
||||
linux_ram = mem_kb / (1024 * 1024) # GB
|
||||
# Utiliser la valeur la plus élevée
|
||||
# Use the higher value
|
||||
total_ram = max(total_ram, linux_ram)
|
||||
break
|
||||
except Exception as e:
|
||||
self._log(f"Erreur lors de la lecture de /proc/meminfo: {str(e)}")
|
||||
self._log(f"Error reading /proc/meminfo: {str(e)}")
|
||||
|
||||
self.ram_label.config(text=f"{total_ram:.1f} GB")
|
||||
except Exception as e:
|
||||
self._log(f"Erreur lors de la détection de la RAM: {str(e)}")
|
||||
self.ram_label.config(text="Détection impossible")
|
||||
self._log(f"Error detecting RAM: {str(e)}")
|
||||
self.ram_label.config(text="Detection failed")
|
||||
|
||||
# Utilisation CPU
|
||||
# CPU Usage
|
||||
cpu_percent = psutil.cpu_percent()
|
||||
self.cpu_progress["value"] = cpu_percent
|
||||
self.cpu_percent.config(text=f"{cpu_percent:.1f}%")
|
||||
|
||||
# Utilisation RAM
|
||||
# RAM Usage
|
||||
ram_percent = ram.percent
|
||||
self.ram_progress["value"] = ram_percent
|
||||
self.ram_percent.config(text=f"{ram_percent:.1f}%")
|
||||
|
||||
# Utilisation disque
|
||||
# Disk usage
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_percent = disk.percent
|
||||
self.disk_progress["value"] = disk_percent
|
||||
self.disk_percent.config(text=f"{disk_percent:.1f}%")
|
||||
|
||||
def _update_ollama_info(self):
|
||||
"""Met à jour les informations Ollama"""
|
||||
"""Updates Ollama information"""
|
||||
try:
|
||||
# Vérifier si le serveur est en cours d'exécution
|
||||
# Check if server is running
|
||||
response = requests.get("http://localhost:11434/api/tags", timeout=2)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.status_label.config(text="En ligne", foreground="green")
|
||||
self.status_label.config(text="Online", foreground="green")
|
||||
|
||||
# Mettre à jour la liste des modèles
|
||||
# Update model list
|
||||
data = response.json()
|
||||
models = data.get("models", [])
|
||||
|
||||
# Effacer la liste actuelle
|
||||
# Clear current list
|
||||
for item in self.tree.get_children():
|
||||
self.tree.delete(item)
|
||||
|
||||
# Ajouter les modèles
|
||||
# Add models
|
||||
for model in models:
|
||||
model_name = model.get("name", "")
|
||||
model_size = self._format_size(model.get("size", 0))
|
||||
modified = model.get("modified_at", "")
|
||||
# Convertir le format de date
|
||||
# Convert date format
|
||||
if modified:
|
||||
try:
|
||||
modified_dt = datetime.fromisoformat(modified.replace('Z', '+00:00'))
|
||||
@ -376,33 +378,87 @@ class SystemMonitor:
|
||||
except:
|
||||
pass
|
||||
|
||||
self.tree.insert("", tk.END, text=model_name, values=(model_size, modified), iid=model_name)
|
||||
self.tree.insert("", tk.END, text=model_name, values=(model_name, model_size, modified), iid=model_name)
|
||||
|
||||
# Mettre à jour la liste globale
|
||||
# Update global list and count active models
|
||||
self.ollama_models = [model.get("name", "") for model in models]
|
||||
model_count = len(models)
|
||||
|
||||
# Check if there's an active model via nvidia-smi if GPU available
|
||||
active_models = []
|
||||
if self.gpu_available:
|
||||
try:
|
||||
# Check processes using GPU
|
||||
result_processes = subprocess.run(
|
||||
['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader,nounits'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False
|
||||
)
|
||||
|
||||
if result_processes.returncode == 0:
|
||||
processes = result_processes.stdout.strip().split('\n')
|
||||
for process in processes:
|
||||
if process.strip():
|
||||
process_data = process.split(',')
|
||||
if len(process_data) >= 3:
|
||||
pid = process_data[0].strip()
|
||||
name = process_data[1].strip()
|
||||
memory = process_data[2].strip()
|
||||
|
||||
# If it's Ollama, search which model is active
|
||||
if "ollama" in name.lower():
|
||||
try:
|
||||
process_info = psutil.Process(int(pid))
|
||||
cmd_line = " ".join(process_info.cmdline())
|
||||
for model in self.ollama_models:
|
||||
if model in cmd_line:
|
||||
active_models.append(model)
|
||||
self.active_model = model
|
||||
self._log(f"Active model detected: {model} (PID {pid}, using {memory} MiB)")
|
||||
# Highlight in list
|
||||
self.tree.selection_set(model)
|
||||
self.tree.see(model)
|
||||
# Add "ACTIVE" to the list
|
||||
values = self.tree.item(model, "values")
|
||||
self.tree.item(model, values=values, tags=("active",))
|
||||
self.style.configure("Treeview", background="#FFFFFF")
|
||||
self.style.map("Treeview",
|
||||
foreground=[("selected", "#000000")],
|
||||
background=[("selected", "#e1e1e1")])
|
||||
self.tree.tag_configure("active", background="#e0f7fa")
|
||||
except Exception as e:
|
||||
self._log(f"Error analyzing process: {str(e)}")
|
||||
except Exception as e:
|
||||
self._log(f"Error checking GPU processes: {str(e)}")
|
||||
|
||||
# Update models section title with count
|
||||
active_count = len(active_models)
|
||||
models_text = f"Available Models ({model_count})"
|
||||
if active_count > 0:
|
||||
models_text += f" - Active: {', '.join(active_models)}"
|
||||
|
||||
# Find the models LabelFrame and update its text
|
||||
for child in self.ollama_frame.winfo_children():
|
||||
if isinstance(child, ttk.LabelFrame) and "Available Models" in child["text"]:
|
||||
child["text"] = models_text
|
||||
break
|
||||
|
||||
# Vérifier s'il y a un modèle actif
|
||||
if self.active_model:
|
||||
self._log(f"Modèle actif: {self.active_model}")
|
||||
# Mettre en surbrillance le modèle actif
|
||||
if self.active_model in self.ollama_models:
|
||||
self.tree.selection_set(self.active_model)
|
||||
self.tree.see(self.active_model)
|
||||
else:
|
||||
self.status_label.config(text="Erreur", foreground="red")
|
||||
self._log(f"Erreur de connexion au serveur Ollama: {response.status_code}")
|
||||
self.status_label.config(text="Error", foreground="red")
|
||||
self._log(f"Error connecting to Ollama server: {response.status_code}")
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
self.status_label.config(text="Hors ligne", foreground="red")
|
||||
self._log("Serveur Ollama non disponible")
|
||||
self.status_label.config(text="Offline", foreground="red")
|
||||
self._log("Ollama server not available")
|
||||
|
||||
def _update_gpu_info(self):
|
||||
"""Met à jour les informations GPU"""
|
||||
"""Updates GPU information"""
|
||||
if not self.gpu_available:
|
||||
return
|
||||
|
||||
try:
|
||||
# Exécuter nvidia-smi pour obtenir les informations GPU
|
||||
# Run nvidia-smi to get GPU information
|
||||
result = subprocess.run(
|
||||
['nvidia-smi', '--query-gpu=name,memory.total,memory.used,utilization.gpu,temperature.gpu',
|
||||
'--format=csv,noheader,nounits'],
|
||||
@ -412,15 +468,15 @@ class SystemMonitor:
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
# Analyser les résultats
|
||||
# Parse results
|
||||
gpu_data = result.stdout.strip().split(',')
|
||||
|
||||
if len(gpu_data) >= 5:
|
||||
# Nom du modèle
|
||||
# Model name
|
||||
model_name = gpu_data[0].strip()
|
||||
self.gpu_model_label.config(text=model_name)
|
||||
|
||||
# Mémoire totale et utilisée
|
||||
# Total and used memory
|
||||
total_memory = float(gpu_data[1].strip())
|
||||
used_memory = float(gpu_data[2].strip())
|
||||
memory_percent = (used_memory / total_memory) * 100 if total_memory > 0 else 0
|
||||
@ -429,18 +485,18 @@ class SystemMonitor:
|
||||
self.gpu_mem_progress["value"] = memory_percent
|
||||
self.gpu_mem_percent.config(text=f"{memory_percent:.1f}%")
|
||||
|
||||
# Utilisation GPU
|
||||
# GPU utilization
|
||||
gpu_util = float(gpu_data[3].strip())
|
||||
self.gpu_compute_progress["value"] = gpu_util
|
||||
self.gpu_compute_percent.config(text=f"{gpu_util:.1f}%")
|
||||
|
||||
# Température
|
||||
# Temperature
|
||||
temp = float(gpu_data[4].strip())
|
||||
# Échelle de température: 0-100°C
|
||||
# Temperature scale: 0-100°C
|
||||
self.gpu_temp_progress["value"] = temp
|
||||
self.gpu_temp_label.config(text=f"{temp:.1f}°C")
|
||||
|
||||
# Récupérer les processus GPU
|
||||
# Get GPU processes
|
||||
result_processes = subprocess.run(
|
||||
['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader,nounits'],
|
||||
capture_output=True,
|
||||
@ -448,11 +504,11 @@ class SystemMonitor:
|
||||
)
|
||||
|
||||
if result_processes.returncode == 0:
|
||||
# Effacer la liste actuelle
|
||||
# Clear current list
|
||||
for item in self.gpu_process_tree.get_children():
|
||||
self.gpu_process_tree.delete(item)
|
||||
|
||||
# Ajouter les processus
|
||||
# Add processes
|
||||
processes = result_processes.stdout.strip().split('\n')
|
||||
for process in processes:
|
||||
if process.strip():
|
||||
@ -462,36 +518,36 @@ class SystemMonitor:
|
||||
name = process_data[1].strip()
|
||||
memory = f"{process_data[2].strip()} MiB"
|
||||
|
||||
# Ajouter à la liste
|
||||
# Add to list
|
||||
self.gpu_process_tree.insert("", tk.END, text=pid, values=(pid, name, memory))
|
||||
|
||||
# Si c'est Ollama, marquer comme modèle actif
|
||||
# If it's Ollama, mark as active model
|
||||
if "ollama" in name.lower():
|
||||
self._log(f"Ollama détecté sur GPU: PID {pid}, utilisant {memory}")
|
||||
# Chercher quel modèle est actif
|
||||
self._log(f"Ollama detected on GPU: PID {pid}, using {memory}")
|
||||
# Find which model is active
|
||||
try:
|
||||
process_info = psutil.Process(int(pid))
|
||||
cmd_line = " ".join(process_info.cmdline())
|
||||
for model in self.ollama_models:
|
||||
if model in cmd_line:
|
||||
self.active_model = model
|
||||
self._log(f"Modèle actif détecté: {model}")
|
||||
self._log(f"Active model detected: {model}")
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
except subprocess.SubprocessError as e:
|
||||
self._log(f"Erreur lors de l'exécution de nvidia-smi: {str(e)}")
|
||||
self._log(f"Error executing nvidia-smi: {str(e)}")
|
||||
except Exception as e:
|
||||
self._log(f"Erreur de mise à jour GPU: {str(e)}")
|
||||
self._log(f"GPU update error: {str(e)}")
|
||||
|
||||
def _refresh_ollama(self):
|
||||
"""Force le rafraîchissement des informations Ollama"""
|
||||
"""Force refresh of Ollama information"""
|
||||
self._update_ollama_info()
|
||||
self._log("Informations Ollama rafraîchies")
|
||||
self._log("Ollama information refreshed")
|
||||
|
||||
def _check_gpu_available(self):
|
||||
"""Vérifie si un GPU NVIDIA est disponible"""
|
||||
"""Check if an NVIDIA GPU is available"""
|
||||
try:
|
||||
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
|
||||
return result.returncode == 0
|
||||
@ -499,7 +555,7 @@ class SystemMonitor:
|
||||
return False
|
||||
|
||||
def _format_size(self, size_bytes):
|
||||
"""Formate la taille en unités lisibles"""
|
||||
"""Format size in readable units"""
|
||||
if size_bytes < 1024:
|
||||
return f"{size_bytes} B"
|
||||
elif size_bytes < 1024 * 1024:
|
||||
@ -510,22 +566,22 @@ class SystemMonitor:
|
||||
return f"{size_bytes/(1024*1024*1024):.1f} GB"
|
||||
|
||||
def _log(self, message):
|
||||
"""Ajoute un message aux logs"""
|
||||
"""Add a message to logs"""
|
||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||
log_message = f"[{timestamp}] {message}\n"
|
||||
|
||||
# Ajouter au texte
|
||||
# Add to text
|
||||
self.log_text.insert(tk.END, log_message)
|
||||
self.log_text.see(tk.END) # Défiler vers le bas
|
||||
self.log_text.see(tk.END) # Scroll down
|
||||
|
||||
def _on_close(self):
|
||||
"""Gère la fermeture de l'application"""
|
||||
"""Handle application closure"""
|
||||
self.running = False
|
||||
time.sleep(0.5) # Attendre que le thread se termine
|
||||
time.sleep(0.5) # Wait for thread to terminate
|
||||
self.root.destroy()
|
||||
|
||||
def main():
|
||||
"""Point d'entrée principal"""
|
||||
"""Main entry point"""
|
||||
root = tk.Tk()
|
||||
app = SystemMonitor(root)
|
||||
root.mainloop()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user