""" System resource monitor for Ollama and NVIDIA GPU resources """ import tkinter as tk from tkinter import ttk import psutil import threading import time import subprocess import json import os import platform import requests from datetime import datetime class SystemMonitor: def __init__(self, root): self.root = root self.root.title("LLM Lab Monitor") self.root.geometry("800x600") self.root.minsize(700, 500) # Style self.style = ttk.Style() self.style.theme_use('alt') # 'clam', 'alt', 'default', 'classic' # Variables self.update_interval = 2 # seconds self.running = True self.ollama_models = [] self.active_model = None self.gpu_available = self._check_gpu_available() # Create UI self._create_widgets() # Start update thread self.update_thread = threading.Thread(target=self._update_loop) self.update_thread.daemon = True self.update_thread.start() # Intercept window close self.root.protocol("WM_DELETE_WINDOW", self._on_close) def _create_widgets(self): # Create notebook (tabs) self.notebook = ttk.Notebook(self.root) self.notebook.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # Tab 1: System monitoring self.system_frame = ttk.Frame(self.notebook) self.notebook.add(self.system_frame, text="System") # Tab 2: Ollama self.ollama_frame = ttk.Frame(self.notebook) self.notebook.add(self.ollama_frame, text="Ollama") # Tab 3: GPU self.gpu_frame = ttk.Frame(self.notebook) self.notebook.add(self.gpu_frame, text="GPU") # Tab 4: Logs self.logs_frame = ttk.Frame(self.notebook) self.notebook.add(self.logs_frame, text="Logs") # === System tab configuration === system_label = ttk.Label(self.system_frame, text="System Resources", font=("Arial", 14, "bold")) system_label.pack(pady=10) # System info system_info_frame = ttk.LabelFrame(self.system_frame, text="System Information") system_info_frame.pack(fill=tk.X, padx=10, pady=5) # OS os_frame = ttk.Frame(system_info_frame) os_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(os_frame, text="Operating System:").pack(side=tk.LEFT, padx=5) self.os_label = ttk.Label(os_frame, text="") self.os_label.pack(side=tk.LEFT, padx=5) # CPU cpu_frame = ttk.Frame(system_info_frame) cpu_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(cpu_frame, text="Processor:").pack(side=tk.LEFT, padx=5) self.cpu_label = ttk.Label(cpu_frame, text="") self.cpu_label.pack(side=tk.LEFT, padx=5) # RAM ram_frame = ttk.Frame(system_info_frame) ram_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(ram_frame, text="RAM Memory:").pack(side=tk.LEFT, padx=5) self.ram_label = ttk.Label(ram_frame, text="") self.ram_label.pack(side=tk.LEFT, padx=5) # Progress bars progress_frame = ttk.LabelFrame(self.system_frame, text="Resource Usage") progress_frame.pack(fill=tk.X, padx=10, pady=5) # CPU Usage cpu_usage_frame = ttk.Frame(progress_frame) cpu_usage_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(cpu_usage_frame, text="CPU:").pack(side=tk.LEFT, padx=5) self.cpu_progress = ttk.Progressbar(cpu_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate') self.cpu_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.cpu_percent = ttk.Label(cpu_usage_frame, text="0%") self.cpu_percent.pack(side=tk.LEFT, padx=5) # RAM Usage ram_usage_frame = ttk.Frame(progress_frame) ram_usage_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(ram_usage_frame, text="RAM:").pack(side=tk.LEFT, padx=5) self.ram_progress = ttk.Progressbar(ram_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate') self.ram_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.ram_percent = ttk.Label(ram_usage_frame, text="0%") self.ram_percent.pack(side=tk.LEFT, padx=5) # Disk Usage disk_usage_frame = ttk.Frame(progress_frame) disk_usage_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(disk_usage_frame, text="Disk:").pack(side=tk.LEFT, padx=5) self.disk_progress = ttk.Progressbar(disk_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate') self.disk_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.disk_percent = ttk.Label(disk_usage_frame, text="0%") self.disk_percent.pack(side=tk.LEFT, padx=5) # === Ollama tab configuration === ollama_label = ttk.Label(self.ollama_frame, text="Ollama Server", font=("Arial", 14, "bold")) ollama_label.pack(pady=10) # Server status server_frame = ttk.LabelFrame(self.ollama_frame, text="Server Status") server_frame.pack(fill=tk.X, padx=10, pady=5) # Status status_frame = ttk.Frame(server_frame) status_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(status_frame, text="Status:").pack(side=tk.LEFT, padx=5) self.status_label = ttk.Label(status_frame, text="Checking...") self.status_label.pack(side=tk.LEFT, padx=5) # URL url_frame = ttk.Frame(server_frame) url_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(url_frame, text="URL:").pack(side=tk.LEFT, padx=5) self.url_label = ttk.Label(url_frame, text="http://localhost:11434") self.url_label.pack(side=tk.LEFT, padx=5) # Available models models_frame = ttk.LabelFrame(self.ollama_frame, text="Available Models") models_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # Models list self.tree = ttk.Treeview(models_frame, columns=("Nom", "Taille", "Modifié"), show='headings') self.tree.heading("Nom", text="LLM Model") self.tree.heading("Taille", text="Size") self.tree.heading("Modifié", text="Modified") self.tree.column("Nom", width=150) self.tree.column("Taille", width=100) self.tree.column("Modifié", width=150) self.tree.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # Buttons button_frame = ttk.Frame(self.ollama_frame) button_frame.pack(fill=tk.X, padx=10, pady=5) refresh_button = ttk.Button(button_frame, text="Refresh", command=self._refresh_ollama) refresh_button.pack(side=tk.LEFT, padx=5) # === GPU tab configuration === gpu_label = ttk.Label(self.gpu_frame, text="GPU Resources", font=("Arial", 14, "bold")) gpu_label.pack(pady=10) if self.gpu_available: # GPU Info gpu_info_frame = ttk.LabelFrame(self.gpu_frame, text="GPU Information") gpu_info_frame.pack(fill=tk.X, padx=10, pady=5) # GPU Model gpu_model_frame = ttk.Frame(gpu_info_frame) gpu_model_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(gpu_model_frame, text="Model:").pack(side=tk.LEFT, padx=5) self.gpu_model_label = ttk.Label(gpu_model_frame, text="") self.gpu_model_label.pack(side=tk.LEFT, padx=5) # GPU Memory gpu_memory_frame = ttk.Frame(gpu_info_frame) gpu_memory_frame.pack(fill=tk.X, padx=5, pady=2) ttk.Label(gpu_memory_frame, text="Memory:").pack(side=tk.LEFT, padx=5) self.gpu_memory_label = ttk.Label(gpu_memory_frame, text="") self.gpu_memory_label.pack(side=tk.LEFT, padx=5) # GPU Usage gpu_usage_frame = ttk.LabelFrame(self.gpu_frame, text="Utilization") gpu_usage_frame.pack(fill=tk.X, padx=10, pady=5) # GPU Compute gpu_compute_frame = ttk.Frame(gpu_usage_frame) gpu_compute_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(gpu_compute_frame, text="Compute:").pack(side=tk.LEFT, padx=5) self.gpu_compute_progress = ttk.Progressbar(gpu_compute_frame, orient=tk.HORIZONTAL, length=300, mode='determinate') self.gpu_compute_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.gpu_compute_percent = ttk.Label(gpu_compute_frame, text="0%") self.gpu_compute_percent.pack(side=tk.LEFT, padx=5) # GPU Memory gpu_mem_usage_frame = ttk.Frame(gpu_usage_frame) gpu_mem_usage_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(gpu_mem_usage_frame, text="Memory:").pack(side=tk.LEFT, padx=5) self.gpu_mem_progress = ttk.Progressbar(gpu_mem_usage_frame, orient=tk.HORIZONTAL, length=300, mode='determinate') self.gpu_mem_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.gpu_mem_percent = ttk.Label(gpu_mem_usage_frame, text="0%") self.gpu_mem_percent.pack(side=tk.LEFT, padx=5) # Temperature gpu_temp_frame = ttk.Frame(gpu_usage_frame) gpu_temp_frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(gpu_temp_frame, text="Temperature:").pack(side=tk.LEFT, padx=5) self.gpu_temp_progress = ttk.Progressbar(gpu_temp_frame, orient=tk.HORIZONTAL, length=300, mode='determinate') self.gpu_temp_progress.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True) self.gpu_temp_label = ttk.Label(gpu_temp_frame, text="0°C") self.gpu_temp_label.pack(side=tk.LEFT, padx=5) # Active processes graphs gpu_processes_frame = ttk.LabelFrame(self.gpu_frame, text="GPU Processes") gpu_processes_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # Process list self.gpu_process_tree = ttk.Treeview(gpu_processes_frame, columns=("PID", "Nom", "Mémoire"), show='headings') self.gpu_process_tree.heading("PID", text="PID") self.gpu_process_tree.heading("Nom", text="Process") self.gpu_process_tree.heading("Mémoire", text="Memory") self.gpu_process_tree.column("PID", width=50) self.gpu_process_tree.column("Nom", width=200) self.gpu_process_tree.column("Mémoire", width=100) self.gpu_process_tree.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) else: no_gpu_label = ttk.Label(self.gpu_frame, text="No NVIDIA GPU detected.", font=("Arial", 12)) no_gpu_label.pack(pady=50) install_label = ttk.Label(self.gpu_frame, text="To monitor an NVIDIA GPU, install nvidia-smi and nvitop.", font=("Arial", 10)) install_label.pack(pady=10) # === Logs tab configuration === logs_label = ttk.Label(self.logs_frame, text="Activity Logs", font=("Arial", 14, "bold")) logs_label.pack(pady=10) # Log area log_area_frame = ttk.Frame(self.logs_frame) log_area_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # Scrollbar scrollbar = ttk.Scrollbar(log_area_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) # Text area self.log_text = tk.Text(log_area_frame, yscrollcommand=scrollbar.set) self.log_text.pack(fill=tk.BOTH, expand=True) scrollbar.config(command=self.log_text.yview) # Buttons log_button_frame = ttk.Frame(self.logs_frame) log_button_frame.pack(fill=tk.X, padx=10, pady=5) clear_log_button = ttk.Button(log_button_frame, text="Clear Logs", command=lambda: self.log_text.delete(1.0, tk.END)) clear_log_button.pack(side=tk.LEFT, padx=5) # Status bar at bottom self.status_bar = ttk.Label(self.root, text="LLM Lab Monitor - Last update: Never", relief=tk.SUNKEN, anchor=tk.W) self.status_bar.pack(side=tk.BOTTOM, fill=tk.X) def _update_loop(self): """Main update thread""" while self.running: try: # System update self._update_system_info() # Ollama update if self.notebook.index(self.notebook.select()) == 1: # Ollama tab self._update_ollama_info() # GPU update if self.gpu_available and self.notebook.index(self.notebook.select()) == 2: # GPU tab self._update_gpu_info() # Status bar update now = datetime.now().strftime("%H:%M:%S") self.status_bar.config(text=f"LLM Lab Monitor - Last update: {now}") except Exception as e: self._log(f"Update error: {str(e)}") time.sleep(self.update_interval) def _update_system_info(self): """Updates system information""" # System information self.os_label.config(text=f"{platform.system()} {platform.release()}") self.cpu_label.config(text=f"{psutil.cpu_count(logical=False)} cores ({psutil.cpu_count()} threads)") # Advanced RAM detection try: ram = psutil.virtual_memory() total_ram = ram.total / (1024 * 1024 * 1024) # GB # Additional check for Linux if platform.system() == "Linux": try: # Use /proc/meminfo for more accurate detection with open('/proc/meminfo', 'r') as f: for line in f: if 'MemTotal' in line: # MemTotal is in kB mem_kb = int(line.split()[1]) linux_ram = mem_kb / (1024 * 1024) # GB # Use the higher value total_ram = max(total_ram, linux_ram) break except Exception as e: self._log(f"Error reading /proc/meminfo: {str(e)}") self.ram_label.config(text=f"{total_ram:.1f} GB") except Exception as e: self._log(f"Error detecting RAM: {str(e)}") self.ram_label.config(text="Detection failed") # CPU Usage cpu_percent = psutil.cpu_percent() self.cpu_progress["value"] = cpu_percent self.cpu_percent.config(text=f"{cpu_percent:.1f}%") # RAM Usage ram_percent = ram.percent self.ram_progress["value"] = ram_percent self.ram_percent.config(text=f"{ram_percent:.1f}%") # Disk usage disk = psutil.disk_usage('/') disk_percent = disk.percent self.disk_progress["value"] = disk_percent self.disk_percent.config(text=f"{disk_percent:.1f}%") def _update_ollama_info(self): """Updates Ollama information""" try: # Check if server is running response = requests.get("http://localhost:11434/api/tags", timeout=2) if response.status_code == 200: self.status_label.config(text="Online", foreground="green") # Update model list data = response.json() models = data.get("models", []) # Clear current list for item in self.tree.get_children(): self.tree.delete(item) # Add models for model in models: model_name = model.get("name", "") model_size = self._format_size(model.get("size", 0)) modified = model.get("modified_at", "") # Convert date format if modified: try: modified_dt = datetime.fromisoformat(modified.replace('Z', '+00:00')) modified = modified_dt.strftime("%d/%m/%Y %H:%M") except: pass self.tree.insert("", tk.END, text=model_name, values=(model_name, model_size, modified), iid=model_name) # Update global list and count active models self.ollama_models = [model.get("name", "") for model in models] model_count = len(models) # Check if there's an active model via nvidia-smi if GPU available active_models = [] if self.gpu_available: try: # Check processes using GPU result_processes = subprocess.run( ['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader,nounits'], capture_output=True, text=True, check=False ) if result_processes.returncode == 0: processes = result_processes.stdout.strip().split('\n') for process in processes: if process.strip(): process_data = process.split(',') if len(process_data) >= 3: pid = process_data[0].strip() name = process_data[1].strip() memory = process_data[2].strip() # If it's Ollama, search which model is active if "ollama" in name.lower(): try: process_info = psutil.Process(int(pid)) cmd_line = " ".join(process_info.cmdline()) for model in self.ollama_models: if model in cmd_line: active_models.append(model) self.active_model = model self._log(f"Active model detected: {model} (PID {pid}, using {memory} MiB)") # Highlight in list self.tree.selection_set(model) self.tree.see(model) # Add "ACTIVE" to the list values = self.tree.item(model, "values") self.tree.item(model, values=values, tags=("active",)) self.style.configure("Treeview", background="#FFFFFF") self.style.map("Treeview", foreground=[("selected", "#000000")], background=[("selected", "#e1e1e1")]) self.tree.tag_configure("active", background="#e0f7fa") except Exception as e: self._log(f"Error analyzing process: {str(e)}") except Exception as e: self._log(f"Error checking GPU processes: {str(e)}") # Update models section title with count active_count = len(active_models) models_text = f"Available Models ({model_count})" if active_count > 0: models_text += f" - Active: {', '.join(active_models)}" # Find the models LabelFrame and update its text for child in self.ollama_frame.winfo_children(): if isinstance(child, ttk.LabelFrame) and "Available Models" in child["text"]: child["text"] = models_text break else: self.status_label.config(text="Error", foreground="red") self._log(f"Error connecting to Ollama server: {response.status_code}") except requests.exceptions.RequestException: self.status_label.config(text="Offline", foreground="red") self._log("Ollama server not available") def _update_gpu_info(self): """Updates GPU information""" if not self.gpu_available: return try: # Run nvidia-smi to get GPU information result = subprocess.run( ['nvidia-smi', '--query-gpu=name,memory.total,memory.used,utilization.gpu,temperature.gpu', '--format=csv,noheader,nounits'], capture_output=True, text=True, check=True ) if result.returncode == 0: # Parse results gpu_data = result.stdout.strip().split(',') if len(gpu_data) >= 5: # Model name model_name = gpu_data[0].strip() self.gpu_model_label.config(text=model_name) # Total and used memory total_memory = float(gpu_data[1].strip()) used_memory = float(gpu_data[2].strip()) memory_percent = (used_memory / total_memory) * 100 if total_memory > 0 else 0 self.gpu_memory_label.config(text=f"{used_memory:.0f} MiB / {total_memory:.0f} MiB") self.gpu_mem_progress["value"] = memory_percent self.gpu_mem_percent.config(text=f"{memory_percent:.1f}%") # GPU utilization gpu_util = float(gpu_data[3].strip()) self.gpu_compute_progress["value"] = gpu_util self.gpu_compute_percent.config(text=f"{gpu_util:.1f}%") # Temperature temp = float(gpu_data[4].strip()) # Temperature scale: 0-100°C self.gpu_temp_progress["value"] = temp self.gpu_temp_label.config(text=f"{temp:.1f}°C") # Get GPU processes result_processes = subprocess.run( ['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader,nounits'], capture_output=True, text=True ) if result_processes.returncode == 0: # Clear current list for item in self.gpu_process_tree.get_children(): self.gpu_process_tree.delete(item) # Add processes processes = result_processes.stdout.strip().split('\n') for process in processes: if process.strip(): process_data = process.split(',') if len(process_data) >= 3: pid = process_data[0].strip() name = process_data[1].strip() memory = f"{process_data[2].strip()} MiB" # Add to list self.gpu_process_tree.insert("", tk.END, text=pid, values=(pid, name, memory)) # If it's Ollama, mark as active model if "ollama" in name.lower(): self._log(f"Ollama detected on GPU: PID {pid}, using {memory}") # Find which model is active try: process_info = psutil.Process(int(pid)) cmd_line = " ".join(process_info.cmdline()) for model in self.ollama_models: if model in cmd_line: self.active_model = model self._log(f"Active model detected: {model}") break except: pass except subprocess.SubprocessError as e: self._log(f"Error executing nvidia-smi: {str(e)}") except Exception as e: self._log(f"GPU update error: {str(e)}") def _refresh_ollama(self): """Force refresh of Ollama information""" self._update_ollama_info() self._log("Ollama information refreshed") def _check_gpu_available(self): """Check if an NVIDIA GPU is available""" try: result = subprocess.run(['nvidia-smi'], capture_output=True, text=True) return result.returncode == 0 except: return False def _format_size(self, size_bytes): """Format size in readable units""" if size_bytes < 1024: return f"{size_bytes} B" elif size_bytes < 1024 * 1024: return f"{size_bytes/1024:.1f} KB" elif size_bytes < 1024 * 1024 * 1024: return f"{size_bytes/(1024*1024):.1f} MB" else: return f"{size_bytes/(1024*1024*1024):.1f} GB" def _log(self, message): """Add a message to logs""" timestamp = datetime.now().strftime("%H:%M:%S") log_message = f"[{timestamp}] {message}\n" # Add to text self.log_text.insert(tk.END, log_message) self.log_text.see(tk.END) # Scroll down def _on_close(self): """Handle application closure""" self.running = False time.sleep(0.5) # Wait for thread to terminate self.root.destroy() def main(): """Main entry point""" root = tk.Tk() app = SystemMonitor(root) root.mainloop() if __name__ == "__main__": main()