Files
smart-report/smart_report/core.py
kilyabin 19b79a4e13 feat: S.M.A.R.T. disk health monitoring with CLI and GUI
- Add core module with SMART data parsing and health calculation
- Add CLI with Rich-based terminal UI and health bar visualization
- Add GUI with PyQt6 tabs for summary and detailed views
- Support multiple health indicators (ID 231, 169, 233) for different SSD manufacturers
- Add bilingual support (Russian/English) with auto-detection
- Add GitHub Actions workflow for building binaries on Linux, Windows, macOS
- Calculate health based on reallocated sectors, pending sectors, SSD life, and more

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-03-15 00:17:01 +04:00

545 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Core SMART data collection logic with bilingual support."""
import locale
import subprocess
from dataclasses import dataclass, field
from typing import Dict, List, Optional
@dataclass
class DiskSmartData:
"""S.M.A.R.T. data for a single disk."""
disk: str
model: str = "Unknown"
size: str = "Unknown"
status: str = "UNKNOWN"
temp: str = "N/A"
power_hours: str = "N/A"
power_cycles: str = "N/A"
reallocated: int = 0
pending: int = 0
uncorrectable: int = 0
attrs: Dict[str, dict] = field(default_factory=dict)
health: int = 100
warnings: List[str] = field(default_factory=list)
error: Optional[str] = None
smart_supported: bool = True
# Additional SMART attributes for health calculation
ssd_life_left: int = 100
remaining_lifetime: int = 100 # ID 169 - more reliable for some SSDs
media_wearout_indicator: int = 100 # ID 233 - Intel/Crucial
crc_errors: int = 0
program_fail_count: int = 0
erase_fail_count: int = 0
command_timeout: int = 0
spin_retry_count: int = 0
reallocated_event_count: int = 0
reported_uncorrect: int = 0
host_writes_gb: float = 0 # Calculated from attribute 241/233
def get_locale() -> str:
"""Detect system locale and return 'ru' or 'en'."""
try:
loc = locale.getdefaultlocale()[0] or ""
return "ru" if loc.startswith("ru") else "en"
except Exception:
return "en"
MESSAGES = {
"en": {
"smart_not_installed": "❌ smartmontools is not installed!",
"install_command": "Install: sudo pacman -S smartmontools",
"no_disks_found": "❌ No disks found",
"disk_monitor": "DISK HEALTH MONITORING (S.M.A.R.T.)",
"disk": "Disk",
"model": "Model",
"size": "Size",
"health": "Health",
"status": "Status",
"temperature": "Temperature",
"power_hours": "Power-On Hours",
"power_cycles": "Cycles",
"critical_attrs": "Critical Attributes",
"reallocated": "Reallocated Sectors",
"pending": "Pending Sectors",
"uncorrectable": "Uncorrectable Errors",
"smart_status_bad": "🔴 S.M.A.R.T. status: BAD",
"critical_reallocated_500": "🔴 CRITICAL: {0} reallocated sectors! Disk may fail!",
"warning_reallocated_100": "🟠 WARNING: {0} reallocated sectors. Start backup!",
"warning_reallocated_10": "🟡 WARNING: {0} reallocated sectors",
"critical_pending": "🔴 CRITICAL: {0} pending sectors!",
"critical_uncorrectable": "🔴 CRITICAL: {0} uncorrectable errors!",
"smart_not_supported": "S.M.A.R.T.: Not supported",
"running_as_root": "✓ Running as root",
"run_with_sudo": "⚠️ Run with sudo for full access",
"collecting_data": "Collecting data...",
"disks_found": "Found {0} disk(s)",
"error": "Error",
"refresh": "🔄 Refresh",
"disk_health_report": "📊 S.M.A.R.T. Disk Health Report",
# Additional health warnings
"warning_ssd_life": "🟠 SSD life remaining: {0}%",
"warning_crc_errors": "🟡 CRC errors: {0} (check SATA cable)",
"warning_program_fail": "🔴 Program failures: {0}",
"warning_erase_fail": "🔴 Erase failures: {0}",
"warning_command_timeout": "🟡 Command timeouts: {0}",
"warning_spin_retry": "🟡 Spin retry count: {0}",
"warning_reallocated_event": "🟡 Reallocation events: {0}",
"warning_reported_uncorrect": "🔴 Reported uncorrect errors: {0}",
},
"ru": {
"smart_not_installed": "❌ smartmontools не установлен!",
"install_command": "Установите: sudo pacman -S smartmontools",
"no_disks_found": "❌ Диски не найдены",
"disk_monitor": "МОНИТОРИНГ ЗДОРОВЬЯ ДИСКОВ (S.M.A.R.T.)",
"disk": "Диск",
"model": "Модель",
"size": "Размер",
"health": "Здоровье",
"status": "Статус",
"temperature": "Температура",
"power_hours": "Часов работы",
"power_cycles": "Циклов",
"critical_attrs": "Критические атрибуты",
"reallocated": "Переназначенные сектора",
"pending": "Ожидающие сектора",
"uncorrectable": "Неисправимые ошибки",
"smart_status_bad": "🔴 S.M.A.R.T. статус: BAD",
"critical_reallocated_500": "🔴 КРИТИЧНО: {0} переназначенных секторов! Диск может отказать!",
"warning_reallocated_100": "🟠 ВНИМАНИЕ: {0} переназначенных секторов. Начните резервное копирование!",
"warning_reallocated_10": "🟡 ВНИМАНИЕ: {0} переназначенных секторов",
"critical_pending": "🔴 КРИТИЧНО: {0} ожидающих секторов!",
"critical_uncorrectable": "🔴 КРИТИЧНО: {0} неисправимых ошибок!",
"smart_not_supported": "S.M.A.R.T.: Не поддерживается",
"running_as_root": "✓ Запуск от root",
"run_with_sudo": "⚠️ Запустите с sudo для полного доступа",
"collecting_data": "Сбор данных...",
"disks_found": "Найдено дисков: {0}",
"error": "Ошибка",
"refresh": "🔄 Обновить",
"disk_health_report": "📊 Отчет о здоровье дисков (S.M.A.R.T.)",
# Additional health warnings
"warning_ssd_life": "🟠 Остаток ресурса SSD: {0}%",
"warning_crc_errors": "🟡 Ошибки CRC: {0} (проверьте SATA кабель)",
"warning_program_fail": "🔴 Ошибки программирования: {0}",
"warning_erase_fail": "🔴 Ошибки стирания: {0}",
"warning_command_timeout": "🟡 Таймауты команд: {0}",
"warning_spin_retry": "🟡 Повторы раскрутки: {0}",
"warning_reallocated_event": "🟡 События переназначения: {0}",
"warning_reported_uncorrect": "🔴 Сообщённые ошибки: {0}",
},
}
def get_message(key: str, lang: str = None, *args) -> str:
"""Get localized message."""
if lang is None:
lang = get_locale()
msg = MESSAGES.get(lang, MESSAGES["en"]).get(key, MESSAGES["en"].get(key, key))
if args:
return msg.format(*args)
return msg
def check_smartctl() -> bool:
"""Check if smartctl is installed."""
try:
subprocess.run(["which", "smartctl"], capture_output=True, check=True)
return True
except subprocess.CalledProcessError:
return False
def get_disk_list() -> List[str]:
"""Get list of all physical disks (/dev/sda, /dev/nvme0n1, etc.)."""
try:
result = subprocess.run(
["lsblk", "-d", "-n", "-o", "NAME"],
capture_output=True,
text=True,
check=True,
)
return [f"/dev/{disk}" for disk in result.stdout.strip().split("\n") if disk]
except Exception:
return []
def get_disk_info(disk: str) -> tuple:
"""Get disk model and size."""
try:
model = subprocess.run(
["lsblk", "-d", "-n", "-o", "MODEL", disk],
capture_output=True,
text=True,
).stdout.strip() or "Unknown"
size = subprocess.run(
["lsblk", "-d", "-n", "-o", "SIZE", disk],
capture_output=True,
text=True,
).stdout.strip() or "Unknown"
return model, size
except Exception:
return "Unknown", "Unknown"
def parse_smart_data(disk: str) -> Optional[DiskSmartData]:
"""Parse S.M.A.R.T. data for a disk (supports both ATA and NVMe)."""
data = DiskSmartData(disk=disk)
try:
result = subprocess.run(
["sudo", "smartctl", "-a", disk],
capture_output=True,
text=True,
)
output = result.stdout
except Exception as e:
data.error = str(e)
data.smart_supported = False
return data
if not output.strip():
data.smart_supported = False
return data
# Parse status
if "PASSED" in output:
data.status = "GOOD"
elif "FAILED" in output:
data.status = "BAD"
else:
data.status = "UNKNOWN"
# Check if NVMe format
is_nvme = "NVMe" in output or "SMART overall-health" not in output
# Parse attributes (ATA format)
for line in output.split("\n"):
parts = line.split()
if len(parts) < 10:
# Try NVMe format parsing
if is_nvme:
# NVMe: "Temperature: 35 Celsius"
if "Temperature:" in line:
try:
temp_val = line.split(":")[1].strip().split()[0]
data.temp = f"{temp_val}°C"
except (IndexError, ValueError):
pass
# NVMe: "Power On Hours: 1234"
if "Power On Hours:" in line:
try:
hours = int(line.split(":")[1].strip())
data.power_hours = f"{hours}h ({hours // 24}d)"
except (IndexError, ValueError):
pass
# NVMe: "Power Cycle Count: 5678"
if "Power Cycle Count:" in line:
try:
data.power_cycles = line.split(":")[1].strip()
except (IndexError, ValueError):
pass
# NVMe: "Media and Data Integrity Errors: 0"
if "Media and Data Integrity Errors:" in line:
try:
data.uncorrectable = int(line.split(":")[1].strip())
except (IndexError, ValueError):
pass
continue
# ATA format parsing
# Temperature (ID 194)
if parts[0] == "194" or "Temperature_Celsius" in line:
try:
data.temp = f"{parts[9]}°C"
except (IndexError, ValueError):
pass
# Power-on hours (ID 9)
if parts[0] == "9" or "Power_On_Hours" in line:
try:
hours = int(parts[9])
data.power_hours = f"{hours}h ({hours // 24}d)"
except (IndexError, ValueError):
pass
# Power cycle count (ID 12)
if parts[0] == "12" or "Power_Cycle_Count" in line:
try:
data.power_cycles = parts[9]
except (IndexError, ValueError):
pass
# Reallocated sectors (ID 5)
if parts[0] == "5" or "Reallocated_Sector_Ct" in line:
try:
data.reallocated = int(parts[9])
except (IndexError, ValueError):
pass
# Current pending sectors (ID 197)
if parts[0] == "197" or "Current_Pending_Sect" in line:
try:
data.pending = int(parts[9])
except (IndexError, ValueError):
pass
# Offline uncorrectable (ID 198)
if parts[0] == "198" or "Offline_Uncorrectable" in line:
try:
data.uncorrectable = int(parts[9])
except (IndexError, ValueError):
pass
# SSD Life Left (ID 231) - crucial for SSD health
if parts[0] == "231" or "SSD_Life_Left" in line:
try:
data.ssd_life_left = int(parts[9])
except (IndexError, ValueError):
pass
# Remaining Lifetime Percent (ID 169) - more reliable for some SSDs
# NOTE: Use normalized VALUE (parts[3]), not raw!
if parts[0] == "169" and "Remaining_Lifetime" in line:
try:
data.remaining_lifetime = int(parts[3]) # Normalized value 0-100
except (IndexError, ValueError):
pass
# Media Wearout Indicator (ID 233) - Intel/Crucial/WD
# NOTE: Use normalized VALUE (parts[3]), not raw!
if parts[0] == "233" and ("Media_Wearout" in line or "Wear_Leveling" in line):
try:
data.media_wearout_indicator = int(parts[3]) # Normalized value 0-100
except (IndexError, ValueError):
pass
# Host Writes (ID 241) - for calculating actual write volume
if parts[0] == "241" or "Host_Writes" in line or "Lifetime_Writes" in line:
try:
raw_value = int(parts[9])
# Convert from 32MiB blocks to GB
data.host_writes_gb = round(raw_value * 32 / 1024, 1)
except (IndexError, ValueError):
pass
# CRC Error Count (ID 199) - indicates cable/connection issues
if parts[0] == "199" or "CRC_Error_Count" in line or "UDMA_CRC_Error" in line:
try:
data.crc_errors = int(parts[9])
except (IndexError, ValueError):
pass
# Program Fail Count (ID 181)
if parts[0] == "181" or "Program_Fail_Count" in line:
try:
data.program_fail_count = int(parts[9])
except (IndexError, ValueError):
pass
# Erase Fail Count (ID 172 or 182)
if parts[0] in ["172", "182"] or "Erase_Fail_Count" in line:
try:
data.erase_fail_count = int(parts[9])
except (IndexError, ValueError):
pass
# Command Timeout (ID 188)
if parts[0] == "188" or "Command_Timeout" in line:
try:
data.command_timeout = int(parts[9])
except (IndexError, ValueError):
pass
# Spin Retry Count (ID 10)
if parts[0] == "10" or "Spin_Retry_Count" in line:
try:
data.spin_retry_count = int(parts[9])
except (IndexError, ValueError):
pass
# Reallocated Event Count (ID 196)
if parts[0] == "196" or "Reallocated_Event_Count" in line:
try:
data.reallocated_event_count = int(parts[9])
except (IndexError, ValueError):
pass
# Reported Uncorrect Errors (ID 187)
if parts[0] == "187" or "Reported_Uncorrect" in line:
try:
data.reported_uncorrect = int(parts[9])
except (IndexError, ValueError):
pass
# Store all attributes
if parts and parts[0].isdigit() and len(parts) >= 10:
try:
attr_id = parts[0]
attr_name = parts[1] if len(parts) > 1 else "Unknown"
data.attrs[attr_id] = {
"name": attr_name,
"value": parts[3],
"worst": parts[4],
"threshold": parts[5],
"raw": parts[9],
}
except (IndexError, ValueError):
pass
return data
def calculate_health(data: DiskSmartData, lang: str = None) -> tuple:
"""Calculate disk health percentage and warnings based on multiple SMART attributes."""
if lang is None:
lang = get_locale()
if data.error or not data.smart_supported:
return 50, []
if data.status == "BAD":
return 5, [get_message("smart_status_bad", lang)]
health = 100
warnings = []
# === SSD WEAR INDICATORS - use the most reliable one ===
# Priority: remaining_lifetime (169) > media_wearout (233) > ssd_life_left (231)
# Some manufacturers (ADATA, Silicon Motion) have unreliable ID 231
ssd_wear_values = []
# ID 169 - Remaining Lifetime (more reliable for ADATA, Silicon Motion)
if data.remaining_lifetime < 100 and data.remaining_lifetime > 0:
ssd_wear_values.append(("Remaining Lifetime (169)", data.remaining_lifetime))
# ID 233 - Media Wearout Indicator (Intel, Crucial, WD)
if data.media_wearout_indicator < 100 and data.media_wearout_indicator > 0:
ssd_wear_values.append(("Media Wearout (233)", data.media_wearout_indicator))
# ID 231 - SSD Life Left (Kingston, Samsung, some others)
# Only use if no other indicators or if consistent with them
if data.ssd_life_left < 100 and data.ssd_life_left > 0:
ssd_wear_values.append(("SSD Life Left (231)", data.ssd_life_left))
# Choose the most reliable indicator
if ssd_wear_values:
# Prefer ID 169 if available (most reliable)
preferred = next((v for n, v in ssd_wear_values if "169" in n), None)
if preferred is not None:
health = min(health, preferred)
if preferred < 50:
warnings.append(get_message("warning_ssd_life", lang, preferred))
else:
# Use minimum of available values
min_wear = min(v for _, v in ssd_wear_values)
health = min(health, min_wear)
if min_wear < 50:
warnings.append(get_message("warning_ssd_life", lang, min_wear))
# === REALLOCATED SECTORS (ID 5) ===
if data.reallocated > 0:
if data.reallocated > 500:
penalty = min(80, data.reallocated * 0.5)
health -= penalty
warnings.append(get_message("critical_reallocated_500", lang, data.reallocated))
elif data.reallocated > 100:
penalty = min(70, data.reallocated * 0.3)
health -= penalty
warnings.append(get_message("warning_reallocated_100", lang, data.reallocated))
elif data.reallocated > 10:
penalty = data.reallocated * 0.2
health -= penalty
warnings.append(get_message("warning_reallocated_10", lang, data.reallocated))
else:
health -= data.reallocated * 0.1
# === REALLOCATION EVENTS (ID 196) ===
if data.reallocated_event_count > 0:
if data.reallocated_event_count > 100:
health -= min(40, data.reallocated_event_count * 0.4)
warnings.append(get_message("warning_reallocated_event", lang, data.reallocated_event_count))
elif data.reallocated_event_count > 0:
health -= min(20, data.reallocated_event_count * 0.2)
# === PENDING SECTORS (ID 197) ===
if data.pending > 0:
health -= min(70, data.pending * 2)
warnings.append(get_message("critical_pending", lang, data.pending))
# === UNCORRECTABLE ERRORS (ID 198) ===
if data.uncorrectable > 0:
health -= min(80, data.uncorrectable * 5)
warnings.append(get_message("critical_uncorrectable", lang, data.uncorrectable))
# === REPORTED UNCORRECT ERRORS (ID 187) ===
if data.reported_uncorrect > 0:
health -= min(60, data.reported_uncorrect * 5)
warnings.append(get_message("warning_reported_uncorrect", lang, data.reported_uncorrect))
# === PROGRAM FAIL COUNT (ID 181) ===
if data.program_fail_count > 0:
health -= min(50, data.program_fail_count * 10)
warnings.append(get_message("warning_program_fail", lang, data.program_fail_count))
# === ERASE FAIL COUNT (ID 172/182) ===
if data.erase_fail_count > 0:
health -= min(50, data.erase_fail_count * 10)
warnings.append(get_message("warning_erase_fail", lang, data.erase_fail_count))
# === CRC ERRORS (ID 199) - Usually cable issue ===
if data.crc_errors > 0:
if data.crc_errors > 100:
health -= min(30, data.crc_errors * 0.3)
elif data.crc_errors > 0:
health -= min(15, data.crc_errors * 0.15)
warnings.append(get_message("warning_crc_errors", lang, data.crc_errors))
# === COMMAND TIMEOUT (ID 188) ===
if data.command_timeout > 0:
health -= min(25, data.command_timeout * 2)
warnings.append(get_message("warning_command_timeout", lang, data.command_timeout))
# === SPIN RETRY COUNT (ID 10) - For HDDs ===
if data.spin_retry_count > 0:
health -= min(30, data.spin_retry_count * 5)
warnings.append(get_message("warning_spin_retry", lang, data.spin_retry_count))
data.health = max(5, int(health))
data.warnings = warnings
return data.health, warnings
def collect_all_disks_data(lang: str = None) -> List[DiskSmartData]:
"""Collect S.M.A.R.T. data for all disks."""
if lang is None:
lang = get_locale()
disks = get_disk_list()
results = []
for disk in disks:
model, size = get_disk_info(disk)
smart_data = parse_smart_data(disk)
if smart_data:
smart_data.model = model
smart_data.size = size
calculate_health(smart_data, lang)
results.append(smart_data)
return results
def is_root() -> bool:
"""Check if running as root."""
try:
result = subprocess.run(["id", "-u"], capture_output=True, text=True)
return result.stdout.strip() == "0"
except Exception:
return False