#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Test script to verify critical components """ import os import sys import platform import requests import time import subprocess import json from typing import List, Dict, Any # Check Tesseract OCR installation def check_tesseract(): print("\n=== Checking Tesseract OCR ===") try: import pytesseract from PIL import Image # Possible paths for Tesseract on Windows possible_paths = [ r"C:\Program Files\Tesseract-OCR\tesseract.exe", r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe", r"C:\Tesseract-OCR\tesseract.exe", r"C:\Users\PCDEV\AppData\Local\Programs\Tesseract-OCR\tesseract.exe", r"C:\Users\PCDEV\Tesseract-OCR\tesseract.exe" ] # Check if Tesseract is in PATH tesseract_in_path = False try: if platform.system() == "Windows": result = subprocess.run(["where", "tesseract"], capture_output=True, text=True) if result.returncode == 0: tesseract_in_path = True tesseract_path = result.stdout.strip().split("\n")[0] print(f"Tesseract found in PATH: {tesseract_path}") else: result = subprocess.run(["which", "tesseract"], capture_output=True, text=True) if result.returncode == 0: tesseract_in_path = True tesseract_path = result.stdout.strip() print(f"Tesseract found in PATH: {tesseract_path}") except Exception as e: print(f"Error checking for Tesseract in PATH: {e}") if not tesseract_in_path and platform.system() == "Windows": print("Tesseract is not in PATH. Searching in standard locations...") # Check standard paths for path in possible_paths: if os.path.exists(path): pytesseract.pytesseract.tesseract_cmd = path print(f"Tesseract found at: {path}") break # Test Tesseract with a version command try: if platform.system() == "Windows" and not tesseract_in_path: # Use explicit path for path in possible_paths: if os.path.exists(path): result = subprocess.run([path, "--version"], capture_output=True, text=True) if result.returncode == 0: print(f"Tesseract version: {result.stdout.strip().split()[0]}") break else: # Tesseract is in PATH result = subprocess.run(["tesseract", "--version"], capture_output=True, text=True) if result.returncode == 0: print(f"Tesseract version: {result.stdout.strip().split()[0]}") except Exception as e: print(f"Error checking Tesseract version: {e}") # Check installed languages try: if platform.system() == "Windows" and not tesseract_in_path: # Use explicit path for path in possible_paths: if os.path.exists(path): tesseract_folder = os.path.dirname(path) tessdata_folder = os.path.join(tesseract_folder, "tessdata") if os.path.exists(tessdata_folder): langs = [f for f in os.listdir(tessdata_folder) if f.endswith(".traineddata")] print(f"Installed languages: {', '.join([lang.split('.')[0] for lang in langs])}") break else: # Tesseract is in PATH result = subprocess.run(["tesseract", "--list-langs"], capture_output=True, text=True) if result.returncode == 0: langs = result.stdout.strip().split("\n")[1:] # Skip the first line print(f"Installed languages: {', '.join(langs)}") except Exception as e: print(f"Error checking Tesseract languages: {e}") print("\nINSTRUCTIONS FOR TESSERACT OCR:") print("1. If Tesseract is not installed, download it from:") print(" https://github.com/UB-Mannheim/tesseract/wiki") print("2. Make sure to install French (fra) and English (eng) languages") print("3. Check the 'Add to PATH' option during installation") except ImportError as e: print(f"Error: {e}") print("Tesseract OCR or its Python dependencies are not properly installed") print("Install them with: pip install pytesseract Pillow") # Check connection to Ollama def check_ollama(endpoint="http://217.182.105.173:11434"): print("\n=== Checking Ollama connection ===") print(f"Endpoint: {endpoint}") # Test basic connection try: response = requests.get(f"{endpoint}/api/version", timeout=10) if response.status_code == 200: version_info = response.json() print(f"✓ Connection to Ollama successful - Version: {version_info.get('version', 'unknown')}") # List available models try: response = requests.get(f"{endpoint}/api/tags", timeout=10) if response.status_code == 200: models = response.json().get("models", []) if models: print(f"✓ Available models ({len(models)}):") for model in models: print(f" - {model.get('name', 'Unknown')} ({model.get('size', 'Unknown size')})") else: print("No models found on Ollama server") else: print(f"✗ Error retrieving models: status {response.status_code}") except requests.exceptions.RequestException as e: print(f"✗ Error retrieving models: {str(e)}") # Test a simple model try: print("\nTesting a simple model (mistral)...") payload = { "model": "mistral", "prompt": "Say hello in English", "options": { "temperature": 0.1 } } start_time = time.time() response = requests.post(f"{endpoint}/api/generate", json=payload, timeout=30) elapsed_time = time.time() - start_time if response.status_code == 200: try: result = response.json() print(f"✓ Test successful in {elapsed_time:.2f} seconds") print(f" Response: {result.get('response', 'No response')[:100]}...") except json.JSONDecodeError as e: print(f"✗ JSON parsing error: {str(e)}") print(" Trying to process first line only...") lines = response.text.strip().split("\n") if lines: try: result = json.loads(lines[0]) print(f"✓ Test successful with first line parsing in {elapsed_time:.2f} seconds") print(f" Response: {result.get('response', 'No response')[:100]}...") except json.JSONDecodeError: print("✗ Failed to parse first line as JSON") print(f" Raw response (first 200 chars): {response.text[:200]}") else: print(f"✗ Error testing model: status {response.status_code}") print(f" Body: {response.text[:200]}") except requests.exceptions.RequestException as e: print(f"✗ Error testing model: {str(e)}") else: print(f"✗ Error connecting to Ollama: status {response.status_code}") print(f" Body: {response.text[:200]}") except requests.exceptions.RequestException as e: print(f"✗ Unable to connect to Ollama: {str(e)}") print("\nINSTRUCTIONS FOR OLLAMA:") print("1. Verify that the Ollama server is running at the specified address") print("2. Verify that port 11434 is open and accessible") print("3. Check Ollama server logs for potential issues") # Check Python environment def check_python_env(): print("\n=== Checking Python environment ===") print(f"Python {sys.version}") print(f"Platform: {platform.platform()}") # Check installed packages required_packages = ["PyQt6", "PyPDF2", "pytesseract", "requests", "fitz"] print("\nChecking required packages:") for pkg in required_packages: try: __import__(pkg) print(f"✓ {pkg} is installed") except ImportError: print(f"✗ {pkg} is NOT installed") # Check Pillow separately (package name is Pillow but import name is PIL) try: import PIL print(f"✓ PIL (Pillow) is installed") except ImportError: print(f"✗ PIL (Pillow) is NOT installed") print("\nINSTRUCTIONS FOR PYTHON ENVIRONMENT:") print("1. Make sure you're using the virtual environment if configured") print("2. Install missing packages with: pip install -r requirements.txt") # Main function def main(): print("=== Testing critical components ===") # Check Python environment check_python_env() # Check Tesseract OCR check_tesseract() # Check connection to Ollama check_ollama() print("\n=== Checks completed ===") print("If issues were detected, follow the displayed instructions") print("After fixing issues, run this script again to verify") if __name__ == "__main__": main()