mirror of
https://github.com/Ladebeze66/ragflow_preprocess.git
synced 2026-02-04 05:50:26 +01:00
84 lines
3.2 KiB
PowerShell
84 lines
3.2 KiB
PowerShell
# PowerShell launch script for Ragflow PDF Preprocessing
|
|
Write-Host "======================================================" -ForegroundColor Cyan
|
|
Write-Host " Ragflow PDF Preprocessing Launch" -ForegroundColor Cyan
|
|
Write-Host "======================================================" -ForegroundColor Cyan
|
|
Write-Host ""
|
|
|
|
# Répertoire actuel
|
|
$scriptPath = Split-Path -Parent $MyInvocation.MyCommand.Path
|
|
Set-Location $scriptPath
|
|
|
|
# Vérifier si le répertoire venv existe
|
|
if (-not (Test-Path "$scriptPath\venv")) {
|
|
Write-Host "L'environnement virtuel n'existe pas. Veuillez d'abord exécuter install_windows.ps1"
|
|
Write-Host "Vous pouvez double-cliquer sur 'install_windows.bat' pour lancer l'installation"
|
|
Read-Host "Appuyez sur Entrée pour quitter"
|
|
exit
|
|
}
|
|
|
|
# Activer l'environnement virtuel
|
|
Write-Host "Activation de l'environnement virtuel..."
|
|
& "$scriptPath\venv\Scripts\Activate.ps1"
|
|
|
|
# Vérifier l'installation des dépendances
|
|
Write-Host "Vérification des dépendances Python..."
|
|
& python -c "import PyPDF2, PyQt6, pytesseract, Pillow" 2>$null
|
|
if ($LASTEXITCODE -ne 0) {
|
|
Write-Host "Certaines dépendances ne sont pas installées. Installation en cours..."
|
|
& pip install -r requirements.txt
|
|
}
|
|
|
|
# Créer la structure de répertoires
|
|
Write-Host "Initialisation de la structure de données..."
|
|
& python -c "from utils.data_structure import initialize_data_directories; initialize_data_directories()"
|
|
|
|
# Check if Tesseract is accessible
|
|
$tesseractPaths = @(
|
|
"C:\Program Files\Tesseract-OCR\tesseract.exe",
|
|
"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
|
|
"C:\Tesseract-OCR\tesseract.exe"
|
|
)
|
|
|
|
$tesseractInstalled = $false
|
|
foreach ($path in $tesseractPaths) {
|
|
if (Test-Path $path) {
|
|
$tesseractInstalled = $true
|
|
Write-Host "Tesseract OCR detected at: $path" -ForegroundColor Green
|
|
break
|
|
}
|
|
}
|
|
|
|
if (-not $tesseractInstalled) {
|
|
Write-Host "Information: Tesseract OCR was not detected." -ForegroundColor Yellow
|
|
Write-Host "The application will try to find Tesseract in standard locations." -ForegroundColor Yellow
|
|
Write-Host "If OCR doesn't work, please install Tesseract from: https://github.com/UB-Mannheim/tesseract/wiki" -ForegroundColor Yellow
|
|
}
|
|
|
|
# Check if Ollama server is accessible
|
|
try {
|
|
$response = Invoke-WebRequest -Uri "http://217.182.105.173:11434/api/version" -UseBasicParsing -ErrorAction SilentlyContinue
|
|
if ($response.StatusCode -eq 200) {
|
|
Write-Host "Ollama server is accessible at 217.182.105.173:11434." -ForegroundColor Green
|
|
}
|
|
} catch {
|
|
Write-Host "Warning: Cannot connect to Ollama server at 217.182.105.173:11434." -ForegroundColor Yellow
|
|
Write-Host "Make sure you have network connectivity to the Ollama server." -ForegroundColor Yellow
|
|
$continue = Read-Host "Do you want to continue anyway? (Y/N)"
|
|
if ($continue -ne "Y" -and $continue -ne "y") {
|
|
exit
|
|
}
|
|
}
|
|
|
|
# Lancer l'application
|
|
Write-Host "Lancement de l'application de prétraitement PDF..."
|
|
& python main.py
|
|
|
|
# End
|
|
Write-Host ""
|
|
if ($LASTEXITCODE -ne 0) {
|
|
Write-Host "The application has terminated with errors (code $LASTEXITCODE)." -ForegroundColor Red
|
|
} else {
|
|
Write-Host "The application has terminated normally." -ForegroundColor Green
|
|
}
|
|
|
|
Read-Host "Press ENTER to exit" |