llm_ticket3/tests/common/ticket_utils.py

"""
Utilitaires pour la gestion des tickets dans les tests.
"""

import os
import json
import logging
from typing import Dict, Any, Optional, List, Tuple

logger = logging.getLogger("TicketUtils")

def get_ticket_info(ticket_id: str, output_dir: str = "output") -> Dict[str, Any]:
    """
    Récupère les informations de base sur un ticket.

    Args:
        ticket_id: ID du ticket (ex: T1234)
        output_dir: Répertoire de base contenant les tickets

    Returns:
        Dictionnaire avec les informations sur le ticket

    Raises:
        FileNotFoundError: Si le ticket n'existe pas
    """
    ticket_path = os.path.join(output_dir, f"ticket_{ticket_id}")

    if not os.path.exists(ticket_path):
        raise FileNotFoundError(f"Le ticket {ticket_id} n'existe pas dans {output_dir}")

    # Chercher les extractions
    extractions = [d for d in os.listdir(ticket_path)
                  if os.path.isdir(os.path.join(ticket_path, d)) and d.startswith(ticket_id)]

    if not extractions:
        raise FileNotFoundError(f"Aucune extraction trouvée pour le ticket {ticket_id}")

    # Trier par ordre décroissant pour avoir la plus récente en premier
    extractions.sort(reverse=True)
    latest_extraction = extractions[0]
    extraction_path = os.path.join(ticket_path, latest_extraction)

    # Vérifier s'il y a des pièces jointes
    attachments_path = os.path.join(extraction_path, "attachments")
    has_attachments = os.path.exists(attachments_path)

    if has_attachments:
        attachments = [f for f in os.listdir(attachments_path)
                      if os.path.isfile(os.path.join(attachments_path, f))]
        images = [f for f in attachments
                 if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
    else:
        attachments = []
        images = []

    return {
        "ticket_id": ticket_id,
        "path": ticket_path,
        "extraction": latest_extraction,
        "extraction_path": extraction_path,
        "has_attachments": has_attachments,
        "attachments_path": attachments_path if has_attachments else None,
        "attachments_count": len(attachments),
        "images_count": len(images)
    }

def get_ticket_json(ticket_id: str, output_dir: str = "output") -> Tuple[Optional[str], Optional[Dict[str, Any]]]:
    """
    Récupère le fichier JSON d'un ticket et son contenu.

    Args:
        ticket_id: ID du ticket (ex: T1234)
        output_dir: Répertoire de base contenant les tickets

    Returns:
        Tuple avec (chemin_fichier, contenu_json) ou (None, None) si non trouvé
    """
    try:
        ticket_info = get_ticket_info(ticket_id, output_dir)
    except FileNotFoundError:
        logger.error(f"Ticket {ticket_id} non trouvé")
        return None, None

    extraction_path = ticket_info["extraction_path"]
    rapport_dir = os.path.join(extraction_path, f"{ticket_id}_rapports")

    # Chercher dans le répertoire des rapports
    json_file = None
    if os.path.exists(rapport_dir):
        for file in os.listdir(rapport_dir):
            if file.lower().endswith('.json'):
                json_file = os.path.join(rapport_dir, file)
                break

    # Si pas trouvé, chercher directement dans l'extraction
    if not json_file:
        for file in os.listdir(extraction_path):
            if file.lower().endswith('.json') and ticket_id in file:
                json_file = os.path.join(extraction_path, file)
                break

    if not json_file:
        logger.warning(f"Aucun fichier JSON trouvé pour le ticket {ticket_id}")
        return None, None

    # Charger le contenu du JSON
    try:
        with open(json_file, 'r', encoding='utf-8') as f:
            json_data = json.load(f)

        # Ajouter le code du ticket si absent
        if "code" not in json_data:
            json_data["code"] = ticket_id

        return json_file, json_data
    except Exception as e:
        logger.error(f"Erreur lors du chargement du JSON: {e}")
        return json_file, None

def get_ticket_images(ticket_id: str, output_dir: str = "output", filter_duplicates: bool = True) -> List[str]:
    """
    Récupère la liste des images d'un ticket.

    Args:
        ticket_id: ID du ticket (ex: T1234)
        output_dir: Répertoire de base contenant les tickets
        filter_duplicates: Si True, filtre les images en double

    Returns:
        Liste des chemins d'accès aux images
    """
    try:
        ticket_info = get_ticket_info(ticket_id, output_dir)
    except FileNotFoundError:
        logger.error(f"Ticket {ticket_id} non trouvé")
        return []

    if not ticket_info["has_attachments"]:
        return []

    attachments_path = ticket_info["attachments_path"]

    # Récupérer les images
    images = [f for f in os.listdir(attachments_path)
             if os.path.isfile(os.path.join(attachments_path, f)) and
             f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]

    image_paths = [os.path.join(attachments_path, img) for img in images]

    # Filtrer les doublons si demandé
    if filter_duplicates and image_paths:
        try:
            from utils.image_dedup import filtrer_images_uniques
            image_paths = filtrer_images_uniques(image_paths)
        except ImportError:
            logger.warning("Module utils.image_dedup non disponible, pas de filtrage des doublons")

    return image_paths