ragflow_preprocess/install_languages.ps1
2025-03-27 17:59:10 +01:00

86 lines
3.1 KiB
PowerShell

#!/usr/bin/env pwsh
# Script to install additional languages for Tesseract OCR
# Check if the script is running as administrator
if (-NOT ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")) {
Write-Warning "Please run this script as administrator!"
Write-Host "The script will close in 5 seconds..."
Start-Sleep -Seconds 5
exit
}
# Function to find Tesseract installation path
function Find-TesseractPath {
$possiblePaths = @(
"C:\Program Files\Tesseract-OCR",
"C:\Program Files (x86)\Tesseract-OCR",
"C:\Tesseract-OCR"
)
foreach ($path in $possiblePaths) {
if (Test-Path "$path\tesseract.exe") {
return $path
}
}
return $null
}
# Find Tesseract path
$tesseractPath = Find-TesseractPath
if ($null -eq $tesseractPath) {
Write-Host "Tesseract OCR is not installed or was not found in standard locations." -ForegroundColor Red
Write-Host "Please install Tesseract OCR first from: https://github.com/UB-Mannheim/tesseract/wiki" -ForegroundColor Yellow
Write-Host "The script will close in 5 seconds..."
Start-Sleep -Seconds 5
exit
}
Write-Host "Tesseract OCR found at: $tesseractPath" -ForegroundColor Green
# Check tessdata folder
$tessDataPath = Join-Path -Path $tesseractPath -ChildPath "tessdata"
if (-not (Test-Path $tessDataPath)) {
Write-Host "The tessdata folder doesn't exist. Creating..." -ForegroundColor Yellow
New-Item -Path $tessDataPath -ItemType Directory | Out-Null
}
# URLs of languages to download
$languageUrls = @{
"fra" = "https://github.com/tesseract-ocr/tessdata/raw/4.00/fra.traineddata"
"fra_vert" = "https://github.com/tesseract-ocr/tessdata/raw/4.00/fra_vert.traineddata"
"frk" = "https://github.com/tesseract-ocr/tessdata/raw/4.00/frk.traineddata"
"frm" = "https://github.com/tesseract-ocr/tessdata/raw/4.00/frm.traineddata"
}
# Download languages
foreach ($lang in $languageUrls.Keys) {
$url = $languageUrls[$lang]
$outputFile = Join-Path -Path $tessDataPath -ChildPath "$lang.traineddata"
if (Test-Path $outputFile) {
Write-Host "Language file $lang already exists. Removing..." -ForegroundColor Yellow
Remove-Item -Path $outputFile -Force
}
Write-Host "Downloading $lang.traineddata..." -ForegroundColor Cyan
try {
Invoke-WebRequest -Uri $url -OutFile $outputFile
Write-Host "Language $lang successfully installed." -ForegroundColor Green
}
catch {
Write-Host "Error downloading $lang.traineddata: $_" -ForegroundColor Red
}
}
# Check installed languages
Write-Host "`nVerifying installed languages:" -ForegroundColor Cyan
$installedLanguages = Get-ChildItem -Path $tessDataPath -Filter "*.traineddata" | ForEach-Object { $_.Name.Replace(".traineddata", "") }
Write-Host "Installed languages: $($installedLanguages -join ', ')" -ForegroundColor Green
Write-Host "`nLanguage installation completed." -ForegroundColor Green
Write-Host "Press any key to close..."
$null = $Host.UI.RawUI.ReadKey("NoEcho,IncludeKeyDown")