mirror of
https://github.com/Ladebeze66/llm_lab_perso.git
synced 2025-12-15 19:16:51 +01:00
590 lines
27 KiB
Python
590 lines
27 KiB
Python
"""
|
|
System resource monitor for Ollama and NVIDIA GPU resources
|
|
"""
|
|
import tkinter as tk
|
|
from tkinter import ttk
|
|
import psutil
|
|
import threading
|
|
import time
|
|
import subprocess
|
|
import json
|
|
import os
|
|
import platform
|
|
import requests
|
|
from datetime import datetime
|
|
|
|
class SystemMonitor:
|
|
def __init__(self, root):
|
|
self.root = root
|
|
self.root.title("LLM Lab Monitor")
|
|
self.root.geometry("800x600")
|
|
self.root.minsize(700, 500)
|
|
|
|
# Style
|
|
self.style = ttk.Style()
|
|
self.style.theme_use('alt') # 'clam', 'alt', 'default', 'classic'
|
|
|
|
# Variables
|
|
self.update_interval = 2 # seconds
|
|
self.running = True
|
|
self.ollama_models = []
|
|
self.active_model = None
|
|
self.gpu_available = self._check_gpu_available()
|
|
|
|
# Create UI
|
|
self._create_widgets()
|
|
|
|
# Start update thread
|
|
self.update_thread = threading.Thread(target=self._update_loop)
|
|
self.update_thread.daemon = True
|
|
self.update_thread.start()
|
|
|
|
# Intercept window close
|
|
self.root.protocol("WM_DELETE_WINDOW", self._on_close)
|
|
|
|
def _create_widgets(self):
|
|
# Create notebook (tabs)
|
|
self.notebook = ttk.Notebook(self.root)
|
|
self.notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
|
|
|
# Tab 1: System monitoring
|
|
self.system_frame = ttk.Frame(self.notebook)
|
|
self.notebook.add(self.system_frame, text="System")
|
|
|
|
# Tab 2: Ollama
|
|
self.ollama_frame = ttk.Frame(self.notebook)
|
|
self.notebook.add(self.ollama_frame, text="Ollama")
|
|
|
|
# Tab 3: GPU
|
|
self.gpu_frame = ttk.Frame(self.notebook)
|
|
self.notebook.add(self.gpu_frame, text="GPU")
|
|
|
|
# Tab 4: Logs
|
|
self.logs_frame = ttk.Frame(self.notebook)
|
|
self.notebook.add(self.logs_frame, text="Logs")
|
|
|
|
# === System tab configuration ===
|
|
system_label = ttk.Label(self.system_frame, text="System Resources", font=("Arial", 14, "bold"))
|
|
system_label.pack(pady=10)
|
|
|
|
# System info
|
|
system_info_frame = ttk.LabelFrame(self.system_frame, text="System Information")
|
|
system_info_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
# OS
|
|
os_frame = ttk.Frame(system_info_frame)
|
|
os_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(os_frame, text="Operating System:").pack(side=tk.LEFT, padx=5)
|
|
self.os_label = ttk.Label(os_frame, text="")
|
|
self.os_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# CPU
|
|
cpu_frame = ttk.Frame(system_info_frame)
|
|
cpu_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(cpu_frame, text="Processor:").pack(side=tk.LEFT, padx=5)
|
|
self.cpu_label = ttk.Label(cpu_frame, text="")
|
|
self.cpu_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# RAM
|
|
ram_frame = ttk.Frame(system_info_frame)
|
|
ram_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(ram_frame, text="RAM Memory:").pack(side=tk.LEFT, padx=5)
|
|
self.ram_label = ttk.Label(ram_frame, text="")
|
|
self.ram_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# Progress bars
|
|
progress_frame = ttk.LabelFrame(self.system_frame, text="Resource Usage")
|
|
progress_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
# CPU Usage
|
|
cpu_usage_frame = ttk.Frame(progress_frame)
|
|
cpu_usage_frame.pack(fill=tk.X, padx=5, pady=5)
|
|
ttk.Label(cpu_usage_frame, text="CPU:").pack(side=tk.LEFT, padx=5)
|
|
self.cpu_progress = ttk.Progressbar(cpu_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
|
self.cpu_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
|
self.cpu_percent = ttk.Label(cpu_usage_frame, text="0%")
|
|
self.cpu_percent.pack(side=tk.LEFT, padx=5)
|
|
|
|
# RAM Usage
|
|
ram_usage_frame = ttk.Frame(progress_frame)
|
|
ram_usage_frame.pack(fill=tk.X, padx=5, pady=5)
|
|
ttk.Label(ram_usage_frame, text="RAM:").pack(side=tk.LEFT, padx=5)
|
|
self.ram_progress = ttk.Progressbar(ram_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
|
self.ram_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
|
self.ram_percent = ttk.Label(ram_usage_frame, text="0%")
|
|
self.ram_percent.pack(side=tk.LEFT, padx=5)
|
|
|
|
# Disk Usage
|
|
disk_usage_frame = ttk.Frame(progress_frame)
|
|
disk_usage_frame.pack(fill=tk.X, padx=5, pady=5)
|
|
ttk.Label(disk_usage_frame, text="Disk:").pack(side=tk.LEFT, padx=5)
|
|
self.disk_progress = ttk.Progressbar(disk_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
|
self.disk_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
|
self.disk_percent = ttk.Label(disk_usage_frame, text="0%")
|
|
self.disk_percent.pack(side=tk.LEFT, padx=5)
|
|
|
|
# === Ollama tab configuration ===
|
|
ollama_label = ttk.Label(self.ollama_frame, text="Ollama Server", font=("Arial", 14, "bold"))
|
|
ollama_label.pack(pady=10)
|
|
|
|
# Server status
|
|
server_frame = ttk.LabelFrame(self.ollama_frame, text="Server Status")
|
|
server_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
# Status
|
|
status_frame = ttk.Frame(server_frame)
|
|
status_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(status_frame, text="Status:").pack(side=tk.LEFT, padx=5)
|
|
self.status_label = ttk.Label(status_frame, text="Checking...")
|
|
self.status_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# URL
|
|
url_frame = ttk.Frame(server_frame)
|
|
url_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(url_frame, text="URL:").pack(side=tk.LEFT, padx=5)
|
|
self.url_label = ttk.Label(url_frame, text="http://localhost:11434")
|
|
self.url_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# Available models
|
|
models_frame = ttk.LabelFrame(self.ollama_frame, text="Available Models")
|
|
models_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
|
|
# Models list
|
|
self.tree = ttk.Treeview(models_frame, columns=("Nom", "Taille", "Modifié"), show='headings')
|
|
self.tree.heading("Nom", text="LLM Model")
|
|
self.tree.heading("Taille", text="Size")
|
|
self.tree.heading("Modifié", text="Modified")
|
|
self.tree.column("Nom", width=150)
|
|
self.tree.column("Taille", width=100)
|
|
self.tree.column("Modifié", width=150)
|
|
self.tree.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
|
|
|
|
# Buttons
|
|
button_frame = ttk.Frame(self.ollama_frame)
|
|
button_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
refresh_button = ttk.Button(button_frame, text="Refresh", command=self._refresh_ollama)
|
|
refresh_button.pack(side=tk.LEFT, padx=5)
|
|
|
|
# === GPU tab configuration ===
|
|
gpu_label = ttk.Label(self.gpu_frame, text="GPU Resources", font=("Arial", 14, "bold"))
|
|
gpu_label.pack(pady=10)
|
|
|
|
if self.gpu_available:
|
|
# GPU Info
|
|
gpu_info_frame = ttk.LabelFrame(self.gpu_frame, text="GPU Information")
|
|
gpu_info_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
# GPU Model
|
|
gpu_model_frame = ttk.Frame(gpu_info_frame)
|
|
gpu_model_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(gpu_model_frame, text="Model:").pack(side=tk.LEFT, padx=5)
|
|
self.gpu_model_label = ttk.Label(gpu_model_frame, text="")
|
|
self.gpu_model_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# GPU Memory
|
|
gpu_memory_frame = ttk.Frame(gpu_info_frame)
|
|
gpu_memory_frame.pack(fill=tk.X, padx=5, pady=2)
|
|
ttk.Label(gpu_memory_frame, text="Memory:").pack(side=tk.LEFT, padx=5)
|
|
self.gpu_memory_label = ttk.Label(gpu_memory_frame, text="")
|
|
self.gpu_memory_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# GPU Usage
|
|
gpu_usage_frame = ttk.LabelFrame(self.gpu_frame, text="Utilization")
|
|
gpu_usage_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
# GPU Compute
|
|
gpu_compute_frame = ttk.Frame(gpu_usage_frame)
|
|
gpu_compute_frame.pack(fill=tk.X, padx=5, pady=5)
|
|
ttk.Label(gpu_compute_frame, text="Compute:").pack(side=tk.LEFT, padx=5)
|
|
self.gpu_compute_progress = ttk.Progressbar(gpu_compute_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
|
self.gpu_compute_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
|
self.gpu_compute_percent = ttk.Label(gpu_compute_frame, text="0%")
|
|
self.gpu_compute_percent.pack(side=tk.LEFT, padx=5)
|
|
|
|
# GPU Memory
|
|
gpu_mem_usage_frame = ttk.Frame(gpu_usage_frame)
|
|
gpu_mem_usage_frame.pack(fill=tk.X, padx=5, pady=5)
|
|
ttk.Label(gpu_mem_usage_frame, text="Memory:").pack(side=tk.LEFT, padx=5)
|
|
self.gpu_mem_progress = ttk.Progressbar(gpu_mem_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
|
self.gpu_mem_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
|
self.gpu_mem_percent = ttk.Label(gpu_mem_usage_frame, text="0%")
|
|
self.gpu_mem_percent.pack(side=tk.LEFT, padx=5)
|
|
|
|
# Temperature
|
|
gpu_temp_frame = ttk.Frame(gpu_usage_frame)
|
|
gpu_temp_frame.pack(fill=tk.X, padx=5, pady=5)
|
|
ttk.Label(gpu_temp_frame, text="Temperature:").pack(side=tk.LEFT, padx=5)
|
|
self.gpu_temp_progress = ttk.Progressbar(gpu_temp_frame, orient=tk.HORIZONTAL, length=300, mode='determinate')
|
|
self.gpu_temp_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
|
|
self.gpu_temp_label = ttk.Label(gpu_temp_frame, text="0°C")
|
|
self.gpu_temp_label.pack(side=tk.LEFT, padx=5)
|
|
|
|
# Active processes graphs
|
|
gpu_processes_frame = ttk.LabelFrame(self.gpu_frame, text="GPU Processes")
|
|
gpu_processes_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
|
|
# Process list
|
|
self.gpu_process_tree = ttk.Treeview(gpu_processes_frame,
|
|
columns=("PID", "Nom", "Mémoire"),
|
|
show='headings')
|
|
self.gpu_process_tree.heading("PID", text="PID")
|
|
self.gpu_process_tree.heading("Nom", text="Process")
|
|
self.gpu_process_tree.heading("Mémoire", text="Memory")
|
|
self.gpu_process_tree.column("PID", width=50)
|
|
self.gpu_process_tree.column("Nom", width=200)
|
|
self.gpu_process_tree.column("Mémoire", width=100)
|
|
self.gpu_process_tree.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
|
|
else:
|
|
no_gpu_label = ttk.Label(self.gpu_frame,
|
|
text="No NVIDIA GPU detected.",
|
|
font=("Arial", 12))
|
|
no_gpu_label.pack(pady=50)
|
|
|
|
install_label = ttk.Label(self.gpu_frame,
|
|
text="To monitor an NVIDIA GPU, install nvidia-smi and nvitop.",
|
|
font=("Arial", 10))
|
|
install_label.pack(pady=10)
|
|
|
|
# === Logs tab configuration ===
|
|
logs_label = ttk.Label(self.logs_frame, text="Activity Logs", font=("Arial", 14, "bold"))
|
|
logs_label.pack(pady=10)
|
|
|
|
# Log area
|
|
log_area_frame = ttk.Frame(self.logs_frame)
|
|
log_area_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
|
|
# Scrollbar
|
|
scrollbar = ttk.Scrollbar(log_area_frame)
|
|
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
|
|
|
# Text area
|
|
self.log_text = tk.Text(log_area_frame, yscrollcommand=scrollbar.set)
|
|
self.log_text.pack(fill=tk.BOTH, expand=True)
|
|
scrollbar.config(command=self.log_text.yview)
|
|
|
|
# Buttons
|
|
log_button_frame = ttk.Frame(self.logs_frame)
|
|
log_button_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
|
|
clear_log_button = ttk.Button(log_button_frame, text="Clear Logs",
|
|
command=lambda: self.log_text.delete(1.0, tk.END))
|
|
clear_log_button.pack(side=tk.LEFT, padx=5)
|
|
|
|
# Status bar at bottom
|
|
self.status_bar = ttk.Label(self.root, text="LLM Lab Monitor - Last update: Never",
|
|
relief=tk.SUNKEN, anchor=tk.W)
|
|
self.status_bar.pack(side=tk.BOTTOM, fill=tk.X)
|
|
|
|
def _update_loop(self):
|
|
"""Main update thread"""
|
|
while self.running:
|
|
try:
|
|
# System update
|
|
self._update_system_info()
|
|
|
|
# Ollama update
|
|
if self.notebook.index(self.notebook.select()) == 1: # Ollama tab
|
|
self._update_ollama_info()
|
|
|
|
# GPU update
|
|
if self.gpu_available and self.notebook.index(self.notebook.select()) == 2: # GPU tab
|
|
self._update_gpu_info()
|
|
|
|
# Status bar update
|
|
now = datetime.now().strftime("%H:%M:%S")
|
|
self.status_bar.config(text=f"LLM Lab Monitor - Last update: {now}")
|
|
|
|
except Exception as e:
|
|
self._log(f"Update error: {str(e)}")
|
|
|
|
time.sleep(self.update_interval)
|
|
|
|
def _update_system_info(self):
|
|
"""Updates system information"""
|
|
# System information
|
|
self.os_label.config(text=f"{platform.system()} {platform.release()}")
|
|
self.cpu_label.config(text=f"{psutil.cpu_count(logical=False)} cores ({psutil.cpu_count()} threads)")
|
|
|
|
# Advanced RAM detection
|
|
try:
|
|
ram = psutil.virtual_memory()
|
|
total_ram = ram.total / (1024 * 1024 * 1024) # GB
|
|
|
|
# Additional check for Linux
|
|
if platform.system() == "Linux":
|
|
try:
|
|
# Use /proc/meminfo for more accurate detection
|
|
with open('/proc/meminfo', 'r') as f:
|
|
for line in f:
|
|
if 'MemTotal' in line:
|
|
# MemTotal is in kB
|
|
mem_kb = int(line.split()[1])
|
|
linux_ram = mem_kb / (1024 * 1024) # GB
|
|
# Use the higher value
|
|
total_ram = max(total_ram, linux_ram)
|
|
break
|
|
except Exception as e:
|
|
self._log(f"Error reading /proc/meminfo: {str(e)}")
|
|
|
|
self.ram_label.config(text=f"{total_ram:.1f} GB")
|
|
except Exception as e:
|
|
self._log(f"Error detecting RAM: {str(e)}")
|
|
self.ram_label.config(text="Detection failed")
|
|
|
|
# CPU Usage
|
|
cpu_percent = psutil.cpu_percent()
|
|
self.cpu_progress["value"] = cpu_percent
|
|
self.cpu_percent.config(text=f"{cpu_percent:.1f}%")
|
|
|
|
# RAM Usage
|
|
ram_percent = ram.percent
|
|
self.ram_progress["value"] = ram_percent
|
|
self.ram_percent.config(text=f"{ram_percent:.1f}%")
|
|
|
|
# Disk usage
|
|
disk = psutil.disk_usage('/')
|
|
disk_percent = disk.percent
|
|
self.disk_progress["value"] = disk_percent
|
|
self.disk_percent.config(text=f"{disk_percent:.1f}%")
|
|
|
|
def _update_ollama_info(self):
|
|
"""Updates Ollama information"""
|
|
try:
|
|
# Check if server is running
|
|
response = requests.get("http://localhost:11434/api/tags", timeout=2)
|
|
|
|
if response.status_code == 200:
|
|
self.status_label.config(text="Online", foreground="green")
|
|
|
|
# Update model list
|
|
data = response.json()
|
|
models = data.get("models", [])
|
|
|
|
# Clear current list
|
|
for item in self.tree.get_children():
|
|
self.tree.delete(item)
|
|
|
|
# Add models
|
|
for model in models:
|
|
model_name = model.get("name", "")
|
|
model_size = self._format_size(model.get("size", 0))
|
|
modified = model.get("modified_at", "")
|
|
# Convert date format
|
|
if modified:
|
|
try:
|
|
modified_dt = datetime.fromisoformat(modified.replace('Z', '+00:00'))
|
|
modified = modified_dt.strftime("%d/%m/%Y %H:%M")
|
|
except:
|
|
pass
|
|
|
|
self.tree.insert("", tk.END, text=model_name, values=(model_name, model_size, modified), iid=model_name)
|
|
|
|
# Update global list and count active models
|
|
self.ollama_models = [model.get("name", "") for model in models]
|
|
model_count = len(models)
|
|
|
|
# Check if there's an active model via nvidia-smi if GPU available
|
|
active_models = []
|
|
if self.gpu_available:
|
|
try:
|
|
# Check processes using GPU
|
|
result_processes = subprocess.run(
|
|
['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader,nounits'],
|
|
capture_output=True,
|
|
text=True,
|
|
check=False
|
|
)
|
|
|
|
if result_processes.returncode == 0:
|
|
processes = result_processes.stdout.strip().split('\n')
|
|
for process in processes:
|
|
if process.strip():
|
|
process_data = process.split(',')
|
|
if len(process_data) >= 3:
|
|
pid = process_data[0].strip()
|
|
name = process_data[1].strip()
|
|
memory = process_data[2].strip()
|
|
|
|
# If it's Ollama, search which model is active
|
|
if "ollama" in name.lower():
|
|
try:
|
|
process_info = psutil.Process(int(pid))
|
|
cmd_line = " ".join(process_info.cmdline())
|
|
for model in self.ollama_models:
|
|
if model in cmd_line:
|
|
active_models.append(model)
|
|
self.active_model = model
|
|
self._log(f"Active model detected: {model} (PID {pid}, using {memory} MiB)")
|
|
# Highlight in list
|
|
self.tree.selection_set(model)
|
|
self.tree.see(model)
|
|
# Add "ACTIVE" to the list
|
|
values = self.tree.item(model, "values")
|
|
self.tree.item(model, values=values, tags=("active",))
|
|
self.style.configure("Treeview", background="#FFFFFF")
|
|
self.style.map("Treeview",
|
|
foreground=[("selected", "#000000")],
|
|
background=[("selected", "#e1e1e1")])
|
|
self.tree.tag_configure("active", background="#e0f7fa")
|
|
except Exception as e:
|
|
self._log(f"Error analyzing process: {str(e)}")
|
|
except Exception as e:
|
|
self._log(f"Error checking GPU processes: {str(e)}")
|
|
|
|
# Update models section title with count
|
|
active_count = len(active_models)
|
|
models_text = f"Available Models ({model_count})"
|
|
if active_count > 0:
|
|
models_text += f" - Active: {', '.join(active_models)}"
|
|
|
|
# Find the models LabelFrame and update its text
|
|
for child in self.ollama_frame.winfo_children():
|
|
if isinstance(child, ttk.LabelFrame) and "Available Models" in child["text"]:
|
|
child["text"] = models_text
|
|
break
|
|
|
|
else:
|
|
self.status_label.config(text="Error", foreground="red")
|
|
self._log(f"Error connecting to Ollama server: {response.status_code}")
|
|
|
|
except requests.exceptions.RequestException:
|
|
self.status_label.config(text="Offline", foreground="red")
|
|
self._log("Ollama server not available")
|
|
|
|
def _update_gpu_info(self):
|
|
"""Updates GPU information"""
|
|
if not self.gpu_available:
|
|
return
|
|
|
|
try:
|
|
# Run nvidia-smi to get GPU information
|
|
result = subprocess.run(
|
|
['nvidia-smi', '--query-gpu=name,memory.total,memory.used,utilization.gpu,temperature.gpu',
|
|
'--format=csv,noheader,nounits'],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
# Parse results
|
|
gpu_data = result.stdout.strip().split(',')
|
|
|
|
if len(gpu_data) >= 5:
|
|
# Model name
|
|
model_name = gpu_data[0].strip()
|
|
self.gpu_model_label.config(text=model_name)
|
|
|
|
# Total and used memory
|
|
total_memory = float(gpu_data[1].strip())
|
|
used_memory = float(gpu_data[2].strip())
|
|
memory_percent = (used_memory / total_memory) * 100 if total_memory > 0 else 0
|
|
|
|
self.gpu_memory_label.config(text=f"{used_memory:.0f} MiB / {total_memory:.0f} MiB")
|
|
self.gpu_mem_progress["value"] = memory_percent
|
|
self.gpu_mem_percent.config(text=f"{memory_percent:.1f}%")
|
|
|
|
# GPU utilization
|
|
gpu_util = float(gpu_data[3].strip())
|
|
self.gpu_compute_progress["value"] = gpu_util
|
|
self.gpu_compute_percent.config(text=f"{gpu_util:.1f}%")
|
|
|
|
# Temperature
|
|
temp = float(gpu_data[4].strip())
|
|
# Temperature scale: 0-100°C
|
|
self.gpu_temp_progress["value"] = temp
|
|
self.gpu_temp_label.config(text=f"{temp:.1f}°C")
|
|
|
|
# Get GPU processes
|
|
result_processes = subprocess.run(
|
|
['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader,nounits'],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if result_processes.returncode == 0:
|
|
# Clear current list
|
|
for item in self.gpu_process_tree.get_children():
|
|
self.gpu_process_tree.delete(item)
|
|
|
|
# Add processes
|
|
processes = result_processes.stdout.strip().split('\n')
|
|
for process in processes:
|
|
if process.strip():
|
|
process_data = process.split(',')
|
|
if len(process_data) >= 3:
|
|
pid = process_data[0].strip()
|
|
name = process_data[1].strip()
|
|
memory = f"{process_data[2].strip()} MiB"
|
|
|
|
# Add to list
|
|
self.gpu_process_tree.insert("", tk.END, text=pid, values=(pid, name, memory))
|
|
|
|
# If it's Ollama, mark as active model
|
|
if "ollama" in name.lower():
|
|
self._log(f"Ollama detected on GPU: PID {pid}, using {memory}")
|
|
# Find which model is active
|
|
try:
|
|
process_info = psutil.Process(int(pid))
|
|
cmd_line = " ".join(process_info.cmdline())
|
|
for model in self.ollama_models:
|
|
if model in cmd_line:
|
|
self.active_model = model
|
|
self._log(f"Active model detected: {model}")
|
|
break
|
|
except:
|
|
pass
|
|
|
|
except subprocess.SubprocessError as e:
|
|
self._log(f"Error executing nvidia-smi: {str(e)}")
|
|
except Exception as e:
|
|
self._log(f"GPU update error: {str(e)}")
|
|
|
|
def _refresh_ollama(self):
|
|
"""Force refresh of Ollama information"""
|
|
self._update_ollama_info()
|
|
self._log("Ollama information refreshed")
|
|
|
|
def _check_gpu_available(self):
|
|
"""Check if an NVIDIA GPU is available"""
|
|
try:
|
|
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
|
|
return result.returncode == 0
|
|
except:
|
|
return False
|
|
|
|
def _format_size(self, size_bytes):
|
|
"""Format size in readable units"""
|
|
if size_bytes < 1024:
|
|
return f"{size_bytes} B"
|
|
elif size_bytes < 1024 * 1024:
|
|
return f"{size_bytes/1024:.1f} KB"
|
|
elif size_bytes < 1024 * 1024 * 1024:
|
|
return f"{size_bytes/(1024*1024):.1f} MB"
|
|
else:
|
|
return f"{size_bytes/(1024*1024*1024):.1f} GB"
|
|
|
|
def _log(self, message):
|
|
"""Add a message to logs"""
|
|
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
log_message = f"[{timestamp}] {message}\n"
|
|
|
|
# Add to text
|
|
self.log_text.insert(tk.END, log_message)
|
|
self.log_text.see(tk.END) # Scroll down
|
|
|
|
def _on_close(self):
|
|
"""Handle application closure"""
|
|
self.running = False
|
|
time.sleep(0.5) # Wait for thread to terminate
|
|
self.root.destroy()
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
root = tk.Tk()
|
|
app = SystemMonitor(root)
|
|
root.mainloop()
|
|
|
|
if __name__ == "__main__":
|
|
main() |