mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-16 00:36:52 +01:00
253 lines
10 KiB
Python
253 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
Script pour comparer plusieurs modèles LLM sur le même ticket.
|
|
Permet d'évaluer rapidement les performances de différents modèles.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import json
|
|
import time
|
|
import logging
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from tests.common.llm_factory import TEXT_MODELS, VISION_MODELS
|
|
from tests.workflows import execute_ticket_analysis, execute_image_analysis
|
|
|
|
def setup_logging(verbose: bool = False, log_file: Optional[str] = None):
|
|
"""
|
|
Configure le système de logging.
|
|
|
|
Args:
|
|
verbose: Si True, active le mode verbeux (DEBUG)
|
|
log_file: Nom du fichier de log (si None, pas de log dans un fichier)
|
|
"""
|
|
log_level = logging.DEBUG if verbose else logging.INFO
|
|
|
|
handlers: List[logging.Handler] = [logging.StreamHandler()]
|
|
if log_file:
|
|
handlers.append(logging.FileHandler(log_file))
|
|
|
|
logging.basicConfig(
|
|
level=log_level,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=handlers
|
|
)
|
|
|
|
def compare_text_models(ticket_id: str, models: List[str], output_dir: str = "output") -> Dict[str, Any]:
|
|
"""
|
|
Compare plusieurs modèles de texte sur le même ticket.
|
|
|
|
Args:
|
|
ticket_id: ID du ticket à analyser
|
|
models: Liste des modèles à comparer
|
|
output_dir: Répertoire contenant les tickets
|
|
|
|
Returns:
|
|
Dictionnaire avec les résultats pour chaque modèle
|
|
"""
|
|
results = {
|
|
"ticket_id": ticket_id,
|
|
"type": "text",
|
|
"models": {},
|
|
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
|
|
}
|
|
|
|
print(f"\n=== Comparaison de modèles texte pour le ticket {ticket_id} ===\n")
|
|
|
|
for model in models:
|
|
print(f"Test du modèle: {model}")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
workflow_results = execute_ticket_analysis(ticket_id, output_dir, model)
|
|
execution_time = time.time() - start_time
|
|
|
|
if "error" in workflow_results:
|
|
print(f" ❌ Échec pour {model}: {workflow_results['error']}")
|
|
results["models"][model] = {
|
|
"success": False,
|
|
"error": workflow_results["error"],
|
|
"stage": workflow_results.get("stage", "unknown"),
|
|
"execution_time": execution_time
|
|
}
|
|
else:
|
|
analysis = workflow_results["analysis"]
|
|
print(f" ✅ Succès pour {model} en {execution_time:.2f} sec: {len(analysis) if analysis else 0} caractères")
|
|
results["models"][model] = {
|
|
"success": True,
|
|
"execution_time": execution_time,
|
|
"analysis_length": len(analysis) if analysis else 0,
|
|
"analysis": analysis
|
|
}
|
|
except Exception as e:
|
|
execution_time = time.time() - start_time
|
|
print(f" ❌ Erreur pour {model}: {e}")
|
|
results["models"][model] = {
|
|
"success": False,
|
|
"error": str(e),
|
|
"execution_time": execution_time
|
|
}
|
|
|
|
return results
|
|
|
|
def compare_vision_models(ticket_id: str, models: List[str], text_model: str = "mistral_large", output_dir: str = "output") -> Dict[str, Any]:
|
|
"""
|
|
Compare plusieurs modèles de vision sur le même ticket.
|
|
|
|
Args:
|
|
ticket_id: ID du ticket à analyser
|
|
models: Liste des modèles de vision à comparer
|
|
text_model: Modèle de texte à utiliser pour l'analyse du contexte
|
|
output_dir: Répertoire contenant les tickets
|
|
|
|
Returns:
|
|
Dictionnaire avec les résultats pour chaque modèle
|
|
"""
|
|
results = {
|
|
"ticket_id": ticket_id,
|
|
"type": "vision",
|
|
"text_model": text_model,
|
|
"models": {},
|
|
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
|
|
}
|
|
|
|
print(f"\n=== Comparaison de modèles vision pour le ticket {ticket_id} (texte: {text_model}) ===\n")
|
|
|
|
for model in models:
|
|
print(f"Test du modèle: {model}")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
workflow_results = execute_image_analysis(ticket_id, output_dir, text_model, model)
|
|
execution_time = time.time() - start_time
|
|
|
|
if "error" in workflow_results:
|
|
print(f" ❌ Échec pour {model}: {workflow_results['error']}")
|
|
results["models"][model] = {
|
|
"success": False,
|
|
"error": workflow_results["error"],
|
|
"stage": workflow_results.get("stage", "unknown"),
|
|
"execution_time": execution_time
|
|
}
|
|
else:
|
|
images_analysed = len(workflow_results["analysis_results"])
|
|
print(f" ✅ Succès pour {model} en {execution_time:.2f} sec: {images_analysed} images analysées")
|
|
results["models"][model] = {
|
|
"success": True,
|
|
"execution_time": execution_time,
|
|
"images_total": workflow_results["images_count"],
|
|
"images_relevant": len(workflow_results["relevant_images"]),
|
|
"images_analysed": images_analysed,
|
|
"analysis_results": workflow_results["analysis_results"]
|
|
}
|
|
except Exception as e:
|
|
execution_time = time.time() - start_time
|
|
print(f" ❌ Erreur pour {model}: {e}")
|
|
results["models"][model] = {
|
|
"success": False,
|
|
"error": str(e),
|
|
"execution_time": execution_time
|
|
}
|
|
|
|
return results
|
|
|
|
def print_comparison_table(results: Dict[str, Any]):
|
|
"""
|
|
Affiche un tableau de comparaison des modèles.
|
|
|
|
Args:
|
|
results: Résultats de la comparaison
|
|
"""
|
|
if results["type"] == "text":
|
|
print("\n=== Résultats de la comparaison des modèles texte ===")
|
|
print(f"Ticket: {results['ticket_id']}")
|
|
print(f"\n{'Modèle':<15} | {'Statut':<10} | {'Temps (s)':<10} | {'Taille analyse':<15}")
|
|
print("-" * 60)
|
|
|
|
for model, data in results["models"].items():
|
|
status = "✅ Succès" if data.get("success", False) else "❌ Échec"
|
|
time_str = f"{data.get('execution_time', 0):.2f}"
|
|
length = data.get("analysis_length", "N/A")
|
|
|
|
print(f"{model:<15} | {status:<10} | {time_str:<10} | {length:<15}")
|
|
|
|
elif results["type"] == "vision":
|
|
print("\n=== Résultats de la comparaison des modèles vision ===")
|
|
print(f"Ticket: {results['ticket_id']}")
|
|
print(f"Modèle texte: {results['text_model']}")
|
|
print(f"\n{'Modèle':<15} | {'Statut':<10} | {'Temps (s)':<10} | {'Images analysées':<20}")
|
|
print("-" * 65)
|
|
|
|
for model, data in results["models"].items():
|
|
status = "✅ Succès" if data.get("success", False) else "❌ Échec"
|
|
time_str = f"{data.get('execution_time', 0):.2f}"
|
|
if data.get("success", False):
|
|
images = f"{data.get('images_analysed', 0)}/{data.get('images_relevant', 0)}/{data.get('images_total', 0)}"
|
|
else:
|
|
images = "N/A"
|
|
|
|
print(f"{model:<15} | {status:<10} | {time_str:<10} | {images:<20}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Comparer plusieurs modèles LLM sur le même ticket")
|
|
parser.add_argument("ticket_id", help="ID du ticket à analyser")
|
|
parser.add_argument("--type", choices=["text", "vision", "both"], default="text",
|
|
help="Type de modèles à comparer")
|
|
parser.add_argument("--text-models", nargs="+", choices=TEXT_MODELS,
|
|
help="Modèles texte à comparer (par défaut: tous)")
|
|
parser.add_argument("--vision-models", nargs="+", choices=VISION_MODELS,
|
|
help="Modèles vision à comparer (par défaut: tous)")
|
|
parser.add_argument("--text-model", choices=TEXT_MODELS, default="mistral_large",
|
|
help="Modèle texte à utiliser pour les tests vision")
|
|
parser.add_argument("--output-dir", default="output", help="Répertoire des tickets")
|
|
parser.add_argument("--save", action="store_true", help="Sauvegarder les résultats")
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="Mode verbeux")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Configuration du logging
|
|
setup_logging(args.verbose, f"compare_models_{args.ticket_id}.log")
|
|
|
|
# Sélection des modèles à comparer
|
|
text_models = args.text_models if args.text_models else TEXT_MODELS
|
|
vision_models = args.vision_models if args.vision_models else VISION_MODELS
|
|
|
|
# Comparaison des modèles
|
|
results = {}
|
|
|
|
if args.type in ["text", "both"]:
|
|
text_results = compare_text_models(args.ticket_id, text_models, args.output_dir)
|
|
results["text"] = text_results
|
|
print_comparison_table(text_results)
|
|
|
|
if args.save:
|
|
save_dir = os.path.join("results", "comparisons", args.ticket_id)
|
|
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
save_file = os.path.join(save_dir, f"text_comparison_{int(time.time())}.json")
|
|
with open(save_file, "w", encoding="utf-8") as f:
|
|
json.dump(text_results, f, ensure_ascii=False, indent=2)
|
|
print(f"Résultats sauvegardés dans: {save_file}")
|
|
|
|
if args.type in ["vision", "both"]:
|
|
vision_results = compare_vision_models(args.ticket_id, vision_models, args.text_model, args.output_dir)
|
|
results["vision"] = vision_results
|
|
print_comparison_table(vision_results)
|
|
|
|
if args.save:
|
|
save_dir = os.path.join("results", "comparisons", args.ticket_id)
|
|
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
save_file = os.path.join(save_dir, f"vision_comparison_{int(time.time())}.json")
|
|
with open(save_file, "w", encoding="utf-8") as f:
|
|
json.dump(vision_results, f, ensure_ascii=False, indent=2)
|
|
print(f"Résultats sauvegardés dans: {save_file}")
|
|
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main()) |