#!/usr/bin/env python3
"""
Pi Dashboard - Service monitor, log viewer, and health reporter.
Runs on your Raspberry Pi alongside your other projects.
"""

import subprocess
import json
import os
import re
import socket
import time
from datetime import datetime
import threading
import smtplib
from email.mime.text import MIMEText
from flask import Flask, render_template, jsonify, request

app = Flask(__name__)

# Storage cache - updated every 5 minutes in background
_storage_cache = {"drives": [], "last_updated": None}
_storage_lock = threading.Lock()

def _scan_storage():
    """Background storage scanner."""
    drives = []
    for mount, label in [("/", "SD Card"), ("/mnt/storage", "Storage"), ("/mnt/backup", "Backup")]:
        try:
            d_out, _ = run_cmd(f"df --output=size,used,avail {mount} 2>/dev/null | tail -1")
            p = d_out.strip().split()
            if len(p) != 3:
                continue
            total_gb = round(int(p[0]) / 1024 / 1024, 1)
            used_gb = round(int(p[1]) / 1024 / 1024, 1)
            avail_gb = round(int(p[2]) / 1024 / 1024, 1)
            pct = round(used_gb / total_gb * 100, 1) if total_gb else 0

            du_out, _ = run_cmd(f"du -h --max-depth=1 {mount}/ 2>/dev/null | sort -rh | head -15", timeout=300)
            folders = []
            for line in du_out.strip().split("\n"):
                parts = line.split(None, 1)
                if len(parts) == 2:
                    size_str = parts[0]
                    path = parts[1]
                    if path.rstrip("/") == mount.rstrip("/"):
                        continue
                    name = path.replace(mount + "/", "").replace(mount, "").strip("/")
                    if not name or name.startswith("."):
                        continue
                    folders.append({"name": name, "size": size_str, "path": path})

            drives.append({
                "name": label,
                "mount": mount,
                "total_gb": total_gb,
                "used_gb": used_gb,
                "avail_gb": avail_gb,
                "percent": pct,
                "folders": folders[:10],
            })
        except Exception as e:
            print(f"Error scanning {mount}: {e}")

    with _storage_lock:
        _storage_cache["drives"] = drives
        _storage_cache["last_updated"] = datetime.now().isoformat()
    print(f"Storage scan complete: {len(drives)} drives")

def _storage_loop():
    """Run storage scan every 5 minutes."""
    import time as _time
    while True:
        _scan_storage()
        _time.sleep(300)


# ─── Service Definitions ───────────────────────────────────────────────
SERVICES = [
    {
        "id": "homeassistant",
        "name": "Home Assistant",
        "icon": "🏠",
        "type": "docker",
        "container": "homeassistant",
        "port": 8123,
        "url": "https://sosopi.tailb27814.ts.net/ha/",
        "description": "Smart home automation",
    },
    {
        "id": "immich",
        "name": "Immich",
        "icon": "📸",
        "type": "docker",
        "container": "immich_server",
        "port": 2283,
        "url": "https://sosopi.tailb27814.ts.net/photos/",
        "description": "Photo & video backup",
        "related_containers": [
            "immich_postgres",
            "immich_machine_learning",
            "immich_redis",
        ],
    },
    {
        "id": "nextcloud",
        "name": "Nextcloud",
        "icon": "☁️",
        "type": "docker",
        "container": "nextcloud-nextcloud-1",
        "port": 8080,
        "url": "https://sosopi.tailb27814.ts.net/cloud/",
        "description": "File cloud storage",
        "related_containers": ["nextcloud-db-1"],
    },
    {
        "id": "adguard",
        "name": "AdGuard Home",
        "icon": "🛡️",
        "type": "systemd",
        "service": "AdGuardHome",
        "port": 3000,
        "url": "https://sosopi.tailb27814.ts.net/adguard/",
        "description": "DNS ad blocker",
    },
    {
        "id": "cafedash",
        "name": "Cafe Dash",
        "icon": "☕",
        "type": "systemd",
        "service": "cafedash",
        "port": 5000,
        "url": "https://sosopi.tailb27814.ts.net/cafe/",
        "description": "Cafe dashboard app",
        "extra_links": [
            {"label": "order", "url": "https://sosopi.tailb27814.ts.net/cafe/order"},
            {"label": "admin", "url": "https://sosopi.tailb27814.ts.net/cafe/admin"},
            {"label": "dashboard", "url": "https://sosopi.tailb27814.ts.net/cafe/dashboard"},
        ],
    },
    {
        "id": "samba",
        "name": "Samba",
        "icon": "📁",
        "type": "systemd",
        "service": "smbd",
        "port": 445,
        "description": "Network file sharing",
    },

    {
        "id": "tailscale",
        "name": "Tailscale",
        "icon": "🔗",
        "type": "systemd",
        "service": "tailscaled",
        "description": "Mesh VPN",
    },
]


def run_cmd(cmd, timeout=10):
    """Run a shell command and return stdout."""
    try:
        result = subprocess.run(
            cmd, shell=True, capture_output=True, text=True, timeout=timeout
        )
        return result.stdout.strip(), result.returncode
    except subprocess.TimeoutExpired:
        return "Command timed out", 1
    except Exception as e:
        return str(e), 1


def check_port(port, host="127.0.0.1", timeout=2):
    """Check if a port is listening."""
    try:
        with socket.create_connection((host, port), timeout=timeout):
            return True
    except (socket.timeout, ConnectionRefusedError, OSError):
        return False


def get_docker_status(container_name):
    """Get Docker container status."""
    out, rc = run_cmd(
        f"docker inspect --format='{{{{.State.Status}}}}' {container_name} 2>/dev/null"
    )
    if rc != 0:
        return {"running": False, "status": "not found", "healthy": False}

    status = out.replace("'", "").strip()

    # Health check is optional — not all containers define one
    health = "none"
    h_out, h_rc = run_cmd(
        f"docker inspect --format='{{{{if .State.Health}}}}{{{{.State.Health.Status}}}}{{{{end}}}}' {container_name} 2>/dev/null"
    )
    if h_rc == 0 and h_out.replace("'", "").strip():
        health = h_out.replace("'", "").strip()

    return {
        "running": status == "running",
        "status": status,
        "healthy": health in ("healthy", "none"),
        "health_status": health,
    }


def get_systemd_status(service_name):
    """Get systemd service status."""
    out, rc = run_cmd(f"systemctl is-active {service_name} 2>/dev/null")
    active = out.strip() == "active"

    mem_out, _ = run_cmd(
        f"systemctl show {service_name} --property=MemoryCurrent 2>/dev/null"
    )
    mem = mem_out.replace("MemoryCurrent=", "").strip()
    if mem and mem != "[not set]" and mem.isdigit():
        mem_mb = round(int(mem) / 1024 / 1024, 1)
    else:
        mem_mb = None

    uptime_out, _ = run_cmd(
        f"systemctl show {service_name} --property=ActiveEnterTimestamp 2>/dev/null"
    )
    uptime = uptime_out.replace("ActiveEnterTimestamp=", "").strip()

    return {
        "running": active,
        "status": "active" if active else "inactive",
        "memory_mb": mem_mb,
        "since": uptime,
    }


def get_container_stats(container_name):
    """Get container CPU/memory stats."""
    out, rc = run_cmd(
        f"docker stats --no-stream --format '{{{{.CPUPerc}}}} {{{{.MemUsage}}}}' {container_name} 2>/dev/null"
    )
    if rc != 0:
        return {}
    parts = out.strip().split()
    return {
        "cpu": parts[0] if parts else "N/A",
        "memory": " ".join(parts[1:]) if len(parts) > 1 else "N/A",
    }


# ─── Auto-Discovery ────────────────────────────────────────────────────

KNOWN_SYSTEM_SERVICES = {
    "ssh", "sshd", "systemd-journald", "systemd-logind", "systemd-udevd",
    "systemd-timesyncd", "dbus", "polkit", "getty@tty1", "ModemManager",
    "serial-getty@ttyAMA10", "user@1000", "wpa_supplicant", "avahi-daemon",
    "containerd", "cron", "NetworkManager", "bluetooth",
    "docker", "nmbd", "winbind", "smbd", "pidash", "smartmontools",
    "smartd", "guest-portal",
}

def pick_emoji(name, description="", image=""):
    """Pick an emoji based on service name/description."""
    text = (name + " " + description + " " + image).lower()
    
    mappings = [
        (["photo", "immich", "gallery", "image", "camera", "pic"], "📸"),
        (["cloud", "nextcloud", "drive", "file", "storage", "sync"], "☁️"),
        (["home", "assistant", "hass", "automation", "smart"], "🏠"),
        (["ad", "guard", "block", "dns", "pihole", "filter"], "🛡️"),
        (["cafe", "coffee", "restaurant", "food", "menu", "order"], "☕"),
        (["samba", "smb", "share", "nas", "cifs"], "📁"),
        (["vpn", "tailscale", "wireguard", "tunnel"], "🔗"),
        (["nginx", "caddy", "proxy", "reverse", "haproxy", "traefik"], "🌐"),
        (["monitor", "uptime", "watch", "alert", "grafana"], "📊"),
        (["database", "postgres", "mysql", "maria", "redis", "mongo", "sql"], "🗄️"),
        (["mail", "email", "smtp", "imap"], "📧"),
        (["media", "plex", "jellyfin", "emby", "stream", "video", "movie"], "🎬"),
        (["music", "audio", "spotify", "navidrome"], "🎵"),
        (["game", "minecraft"], "🎮"),
        (["git", "gitea", "gitlab", "code", "repo"], "💻"),
        (["download", "torrent", "sonarr", "radarr", "arr"], "📥"),
        (["backup", "borg", "restic", "duplicati"], "💾"),
        (["web", "site", "page", "html", "http", "www"], "🌍"),
        (["travel", "trip", "nyc", "vacation", "city", "map", "tour"], "✈️"),
        (["chat", "message", "matrix", "signal", "irc"], "💬"),
        (["wiki", "note", "doc", "book", "knowledge"], "📝"),
        (["dashboard", "panel", "admin", "portal"], "📱"),
        (["cron", "schedule", "task", "job"], "⏰"),
        (["print", "cups", "3d"], "🖨️"),
        (["weather", "climate", "temp"], "🌤️"),
        (["security", "auth", "vault", "password"], "🔒"),
        (["network", "ping", "speed"], "📡"),
        (["python", "flask", "django", "node", "app"], "⚡"),
    ]
    
    for keywords, emoji in mappings:
        for kw in keywords:
            if kw in text:
                return emoji
    
    return "📦"  # Default for unknown services


HIDDEN_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "hidden_services.json")

def load_hidden():
    if os.path.exists(HIDDEN_FILE):
        with open(HIDDEN_FILE, "r") as f:
            return json.load(f)
    return []

def save_hidden(hidden):
    with open(HIDDEN_FILE, "w") as f:
        json.dump(hidden, f)

def discover_services():
    """Scan for Docker containers and custom systemd services."""
    discovered = []
    hidden = load_hidden()

    # Discover Docker containers
    out, rc = run_cmd("docker ps --format '{{.Names}}\t{{.Image}}\t{{.Ports}}\t{{.Status}}' 2>/dev/null")
    if rc == 0 and out.strip():
        for line in out.strip().split("\n"):
            parts = line.split("\t")
            if len(parts) < 2:
                continue
            name = parts[0]
            image = parts[1] if len(parts) > 1 else ""
            ports = parts[2] if len(parts) > 2 else ""
            
            # Check if already in SERVICES (main or related)
            existing = next((s for s in SERVICES if s.get("container") == name), None)
            is_related = any(name in s.get("related_containers", []) for s in SERVICES)
            if existing or is_related or name in hidden:
                continue

            # Parse port
            port = None
            if ports:
                import re as _re
                m = _re.search(r'0\.0\.0\.0:(\d+)->', ports)
                if m:
                    port = int(m.group(1))

            desc = image.split("/")[-1].split(":")[0]
            svc = {
                "id": "auto_docker_" + name.replace("-", "_").replace(".", "_"),
                "name": name,
                "icon": pick_emoji(name, desc, image),
                "type": "docker",
                "container": name,
                "description": desc,
                "discovered": True,
            }
            if port:
                svc["port"] = port
                svc["url"] = f"http://10.0.0.50:{port}"
            discovered.append(svc)

    # Discover custom systemd services
    out, rc = run_cmd("systemctl list-units --type=service --state=running --no-pager --no-legend 2>/dev/null")
    if rc == 0 and out.strip():
        for line in out.strip().split("\n"):
            parts = line.split()
            if not parts:
                continue
            unit = parts[0].replace(".service", "")

            # Skip known system services and already tracked ones
            if unit in KNOWN_SYSTEM_SERVICES or unit in hidden:
                continue
            existing = next((s for s in SERVICES if s.get("service") == unit or s.get("id") == unit), None)
            if existing:
                continue

            # Get description
            desc_out, _ = run_cmd(f"systemctl show {unit} --property=Description --value 2>/dev/null")
            
            # Check for listening port
            pid_out, _ = run_cmd(f"systemctl show {unit} --property=MainPID --value 2>/dev/null")
            port = None
            if pid_out.strip() and pid_out.strip() != "0":
                port_out, _ = run_cmd(f"ss -tlnp 2>/dev/null | grep 'pid={pid_out.strip()}' | head -1")
                if port_out:
                    import re as _re
                    m = _re.search(r':(\d+)\s', port_out)
                    if m:
                        port = int(m.group(1))

            desc = desc_out.strip() if desc_out.strip() else unit
            svc = {
                "id": "auto_systemd_" + unit.replace("-", "_").replace(".", "_").replace("@", "_"),
                "name": unit,
                "icon": pick_emoji(unit, desc),
                "type": "systemd",
                "service": unit,
                "description": desc,
                "discovered": True,
            }
            if port:
                svc["port"] = port
                svc["url"] = f"http://10.0.0.50:{port}"
            discovered.append(svc)

    return discovered

_discovered_cache = []
_discover_lock = threading.Lock()

def _discover_loop():
    """Run discovery every 2 minutes."""
    import time as _time
    while True:
        try:
            found = discover_services()
            with _discover_lock:
                global _discovered_cache
                _discovered_cache = found
        except Exception as e:
            print(f"Discovery error: {e}")
        _time.sleep(120)


# ─── Alerting ──────────────────────────────────────────────────────────

def load_alert_config():
    config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "alert_config.json")
    if os.path.exists(config_path):
        with open(config_path, "r") as f:
            return json.load(f)
    return None

def send_sms(message):
    """Send SMS via email-to-SMS gateway."""
    config = load_alert_config()
    if not config:
        print("No alert config found")
        return False
    try:
        msg = MIMEText(message)
        msg["From"] = config["email"]
        msg["To"] = config["phone_gateway"]
        msg["Subject"] = ""
        server = smtplib.SMTP(config["smtp_server"], config["smtp_port"])
        server.starttls()
        server.login(config["email"], config["password"])
        server.sendmail(config["email"], config["phone_gateway"], msg.as_string())
        server.quit()
        print(f"SMS sent: {message}")
        return True
    except Exception as e:
        print(f"SMS send error: {e}")
        return False


# ─── Uptime Monitor ───────────────────────────────────────────────────

_service_states = {}  # id -> {"up": bool, "since": timestamp, "last_alert": timestamp}
_uptime_history = []  # list of {"id", "name", "event", "timestamp"}
_uptime_lock = threading.Lock()

def _check_service_up(svc):
    """Check if a single service is up."""
    if svc["type"] == "docker":
        out, rc = run_cmd(f"docker inspect --format='{{{{.State.Status}}}}' {svc['container']} 2>/dev/null")
        status = out.replace("'", "").strip()
        return status == "running"
    else:
        out, rc = run_cmd(f"systemctl is-active {svc['service']} 2>/dev/null")
        return out.strip() == "active"

def _uptime_loop():
    """Monitor all services every 60 seconds."""
    import time as _time
    _time.sleep(30)  # Wait for initial startup

    config = load_alert_config()
    cooldown = (config.get("alert_cooldown_minutes", 15) * 60) if config else 900

    while True:
        try:
            with _discover_lock:
                all_services = SERVICES + _discovered_cache

            for svc in all_services:
                sid = svc["id"]
                name = svc["name"]
                is_up = _check_service_up(svc)
                now = _time.time()

                with _uptime_lock:
                    prev = _service_states.get(sid, {"up": True, "since": now, "last_alert": 0})

                    if prev["up"] and not is_up:
                        # Service went DOWN
                        _service_states[sid] = {"up": False, "since": now, "last_alert": now}
                        _uptime_history.append({
                            "id": sid, "name": name,
                            "event": "down",
                            "timestamp": datetime.now().isoformat()
                        })
                        if len(_uptime_history) > 200:
                            _uptime_history[:] = _uptime_history[-200:]
                        send_sms(f"DOWN: {name} is offline")

                    elif not prev["up"] and is_up:
                        # Service came BACK UP
                        downtime = round((now - prev["since"]) / 60, 1)
                        _service_states[sid] = {"up": True, "since": now, "last_alert": 0}
                        _uptime_history.append({
                            "id": sid, "name": name,
                            "event": "up",
                            "timestamp": datetime.now().isoformat(),
                            "downtime_min": downtime
                        })
                        if len(_uptime_history) > 200:
                            _uptime_history[:] = _uptime_history[-200:]
                        send_sms(f"UP: {name} is back online (was down {downtime} min)")

                    elif not is_up and (now - prev["last_alert"]) > cooldown:
                        # Still down, send reminder
                        down_min = round((now - prev["since"]) / 60, 1)
                        prev["last_alert"] = now
                        _service_states[sid] = prev
                        send_sms(f"STILL DOWN: {name} ({down_min} min)")

                    elif prev["up"] and is_up:
                        # Still up, just update
                        if sid not in _service_states:
                            _service_states[sid] = {"up": True, "since": now, "last_alert": 0}

        except Exception as e:
            print(f"Uptime check error: {e}")

        _time.sleep(60)


# ─── Drive Watchdog ────────────────────────────────────────────────────

DRIVES_TO_MONITOR = [
    {"dev": "/dev/sda", "name": "Backup Drive", "mount": "/mnt/backup"},
    {"dev": "/dev/sdb", "name": "Storage Drive", "mount": "/mnt/storage"},
]

SMART_THRESHOLDS = {
    "Reallocated_Sector_Ct": {"warn": 5, "crit": 20},
    "Current_Pending_Sector": {"warn": 1, "crit": 10},
    "Offline_Uncorrectable": {"warn": 1, "crit": 5},
    "Reallocated_Event_Count": {"warn": 5, "crit": 20},
    "UDMA_CRC_Error_Count": {"warn": 50, "crit": 200},
}

_drive_health_cache = {"drives": [], "last_checked": None}
_drive_health_lock = threading.Lock()
_drive_alert_sent = {}

def _check_drive_smart(dev, name):
    """Check SMART data for a drive."""
    result = {
        "dev": dev,
        "name": name,
        "healthy": True,
        "status": "ok",
        "warnings": [],
        "attributes": {},
    }

    # Overall health
    out, rc = run_cmd(f"smartctl -H {dev} 2>/dev/null")
    if "PASSED" in out:
        result["smart_passed"] = True
    elif "FAILED" in out:
        result["smart_passed"] = False
        result["healthy"] = False
        result["status"] = "critical"
        result["warnings"].append("SMART overall health: FAILED")
    else:
        result["smart_passed"] = None
        result["warnings"].append("SMART health check unavailable")

    # Get attributes
    out, rc = run_cmd(f"smartctl -a {dev} 2>/dev/null")
    for line in out.split("\n"):
        for attr_name, thresholds in SMART_THRESHOLDS.items():
            if attr_name in line:
                parts = line.split()
                # Raw value is the last column
                try:
                    raw_val = int(parts[-1])
                    result["attributes"][attr_name] = raw_val
                    if raw_val >= thresholds["crit"]:
                        result["healthy"] = False
                        result["status"] = "critical"
                        result["warnings"].append(f"{attr_name}: {raw_val} (critical)")
                    elif raw_val >= thresholds["warn"]:
                        if result["status"] != "critical":
                            result["status"] = "warning"
                        result["warnings"].append(f"{attr_name}: {raw_val} (warning)")
                except (ValueError, IndexError):
                    pass

    # Temperature
    temp_line = [l for l in out.split("\n") if "Temperature_Celsius" in l]
    if temp_line:
        try:
            result["temp_c"] = int(temp_line[0].split()[-1])
        except (ValueError, IndexError):
            pass

    # Power on hours
    hours_line = [l for l in out.split("\n") if "Power_On_Hours" in l]
    if hours_line:
        try:
            hours = int(hours_line[0].split()[-1])
            result["power_on_hours"] = hours
            result["power_on_years"] = round(hours / 8760, 1)
        except (ValueError, IndexError):
            pass

    # Disk usage - use mount point from DRIVES_TO_MONITOR
    mount = next((d["mount"] for d in DRIVES_TO_MONITOR if d["dev"] == dev), "")
    df_out, _ = run_cmd(f"df -h {mount} 2>/dev/null | tail -1") if mount else ("", 1)
    if df_out:
        parts = df_out.split()
        if len(parts) >= 5:
            result["disk_size"] = parts[1]
            result["disk_used"] = parts[2]
            result["disk_avail"] = parts[3]
            result["disk_pct"] = parts[4]

    return result


def _drive_watchdog_loop():
    """Check drives every 30 minutes."""
    import time as _time
    _time.sleep(60)

    while True:
        try:
            drives = []
            for d in DRIVES_TO_MONITOR:
                health = _check_drive_smart(d["dev"], d["name"])
                drives.append(health)

                # Send alerts for new issues
                alert_key = f"{d['dev']}_{health['status']}"
                if health["status"] in ("warning", "critical"):
                    last_alert = _drive_alert_sent.get(alert_key, 0)
                    if _time.time() - last_alert > 86400:  # Alert once per day
                        warnings = "; ".join(health["warnings"])
                        send_sms(f"DRIVE {health['status'].upper()}: {d['name']} ({d['dev']}) - {warnings}")
                        _drive_alert_sent[alert_key] = _time.time()

                # Check if mount is accessible
                mount = d.get("mount", "")
                if mount:
                    ls_out, ls_rc = run_cmd(f"ls {mount}/ 2>&1 | head -1", timeout=5)
                    if ls_rc != 0 or "Input/output error" in ls_out:
                        health["healthy"] = False
                        health["status"] = "critical"
                        health["warnings"].append(f"Mount {mount} not accessible")
                        io_key = f"{d['dev']}_io_error"
                        if _time.time() - _drive_alert_sent.get(io_key, 0) > 3600:
                            send_sms(f"DRIVE ERROR: {d['name']} mount {mount} has I/O errors!")
                            _drive_alert_sent[io_key] = _time.time()

            with _drive_health_lock:
                _drive_health_cache["drives"] = drives
                _drive_health_cache["last_checked"] = datetime.now().isoformat()

        except Exception as e:
            print(f"Drive watchdog error: {e}")

        _time.sleep(1800)  # Check every 30 minutes


# ─── Routes ────────────────────────────────────────────────────────────

@app.route("/")
def index():
    return render_template("index.html", services=SERVICES)


@app.route("/api/status")
def api_status():
    """Get status of all services."""
    # Merge manual + discovered services
    with _discover_lock:
        all_services = SERVICES + _discovered_cache
    
    results = []
    for svc in all_services:
        info = {"id": svc["id"], "name": svc["name"], "icon": svc["icon"],
                "description": svc["description"], "type": svc["type"]}

        if svc["type"] == "docker":
            status = get_docker_status(svc["container"])
            info.update(status)
            # Check related containers
            if "related_containers" in svc:
                related = []
                for rc_name in svc["related_containers"]:
                    rc_status = get_docker_status(rc_name)
                    related.append({"name": rc_name, **rc_status})
                info["related"] = related
                # If any related container is down, flag it
                if any(not r["running"] for r in related):
                    info["warning"] = "One or more related containers are down"
        else:
            status = get_systemd_status(svc["service"])
            info.update(status)

        # Port check
        if "port" in svc:
            info["port"] = svc["port"]
            info["port_open"] = check_port(svc["port"])
            if info["running"] and not info["port_open"]:
                info["warning"] = f"Service running but port {svc['port']} not responding"

        if "extra_links" in svc:
            info["extra_links"] = svc["extra_links"]
        if "url" in svc:
            info["url"] = svc["url"]

        results.append(info)

    # System info
    mem_out, _ = run_cmd("free -b | awk '/Mem:/ {print $2, $3, $7}'")
    parts = mem_out.split()
    mem_total = int(parts[0]) if len(parts) > 0 else 0
    mem_used = int(parts[1]) if len(parts) > 1 else 0
    mem_avail = int(parts[2]) if len(parts) > 2 else 0

    cpu_out, _ = run_cmd("awk '{print $1}' /proc/loadavg")
    temp_out, _ = run_cmd("cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null")
    temp_c = round(int(temp_out) / 1000, 1) if temp_out.isdigit() else None

    disks = []
    for mount, label in [("/", "SD Card"), ("/mnt/storage", "Storage"), ("/mnt/backup", "Backup")]:
        d_out, _ = run_cmd(f"df --output=size,used,avail {mount} 2>/dev/null | tail -1")
        p = d_out.strip().split()
        if len(p) == 3:
            disks.append({
                "name": label,
                "mount": mount,
                "total_gb": round(int(p[0]) / 1024 / 1024, 1),
                "used_gb": round(int(p[1]) / 1024 / 1024, 1),
                "avail_gb": round(int(p[2]) / 1024 / 1024, 1),
            })

    uptime_out, _ = run_cmd("uptime -p")

    system = {
        "mem_total_gb": round(mem_total / 1024**3, 1),
        "mem_used_gb": round(mem_used / 1024**3, 1),
        "mem_avail_gb": round(mem_avail / 1024**3, 1),
        "mem_percent": round(mem_used / mem_total * 100, 1) if mem_total else 0,
        "cpu_load": cpu_out,
        "temp_c": temp_c,
        "disks": disks,
        "uptime": uptime_out,
        "timestamp": datetime.now().isoformat(),
    }

    return jsonify({"services": results, "system": system})


@app.route("/api/logs/<service_id>")
def api_logs(service_id):
    """Get recent logs for a service."""
    lines = request.args.get("lines", 100, type=int)
    lines = min(lines, 500)

    with _discover_lock:
        all_svcs = SERVICES + _discovered_cache
    svc = next((s for s in all_svcs if s["id"] == service_id), None)
    if not svc:
        return jsonify({"error": "Service not found"}), 404

    if svc["type"] == "docker":
        cmd = f"docker logs --tail {lines} --timestamps {svc['container']} 2>&1"
    else:
        cmd = f"journalctl -u {svc['service']} -n {lines} --no-pager 2>&1"

    out, rc = run_cmd(cmd, timeout=15)
    return jsonify({"service": svc["name"], "logs": out, "lines": lines})


@app.route("/api/health/<service_id>")
def api_health(service_id):
    """Generate a health report for a service."""
    with _discover_lock:
        all_svcs = SERVICES + _discovered_cache
    svc = next((s for s in all_svcs if s["id"] == service_id), None)
    if not svc:
        return jsonify({"error": "Service not found"}), 404

    report = {
        "service": svc["name"],
        "timestamp": datetime.now().isoformat(),
        "checks": [],
    }

    # 1. Service running check
    if svc["type"] == "docker":
        status = get_docker_status(svc["container"])
        report["checks"].append({
            "name": "Container Running",
            "status": "pass" if status["running"] else "fail",
            "detail": f"Status: {status['status']}"
        })
        if status.get("health_status") and status["health_status"] != "none":
            report["checks"].append({
                "name": "Container Health Check",
                "status": "pass" if status["healthy"] else "fail",
                "detail": f"Health: {status['health_status']}"
            })

        # Container stats
        stats = get_container_stats(svc["container"])
        if stats:
            report["checks"].append({
                "name": "Resource Usage",
                "status": "info",
                "detail": f"CPU: {stats.get('cpu', 'N/A')}, Memory: {stats.get('memory', 'N/A')}"
            })

        # Container restart count
        restarts_out, _ = run_cmd(
            f"docker inspect --format='{{{{.RestartCount}}}}' {svc['container']} 2>/dev/null"
        )
        restarts = restarts_out.replace("'", "").strip()
        if restarts.isdigit():
            report["checks"].append({
                "name": "Restart Count",
                "status": "warn" if int(restarts) > 5 else "pass",
                "detail": f"{restarts} restarts"
            })

        # Container uptime
        started_out, _ = run_cmd(
            f"docker inspect --format='{{{{.State.StartedAt}}}}' {svc['container']} 2>/dev/null"
        )
        report["checks"].append({
            "name": "Container Started",
            "status": "info",
            "detail": started_out.replace("'", "").strip()
        })

        # Related containers
        if "related_containers" in svc:
            for rc_name in svc["related_containers"]:
                rc_status = get_docker_status(rc_name)
                report["checks"].append({
                    "name": f"Dependency: {rc_name}",
                    "status": "pass" if rc_status["running"] else "fail",
                    "detail": f"Status: {rc_status['status']}"
                })
    else:
        status = get_systemd_status(svc["service"])
        report["checks"].append({
            "name": "Service Active",
            "status": "pass" if status["running"] else "fail",
            "detail": f"Status: {status['status']}"
        })
        if status["memory_mb"]:
            report["checks"].append({
                "name": "Memory Usage",
                "status": "info",
                "detail": f"{status['memory_mb']} MB"
            })
        if status["since"]:
            report["checks"].append({
                "name": "Running Since",
                "status": "info",
                "detail": status["since"]
            })

    # 2. Port check
    if "port" in svc:
        port_open = check_port(svc["port"])
        report["checks"].append({
            "name": f"Port {svc['port']} Responding",
            "status": "pass" if port_open else "fail",
            "detail": "Accepting connections" if port_open else "Connection refused"
        })

    # 3. Recent errors in logs
    if svc["type"] == "docker":
        err_cmd = f"docker logs --tail 200 {svc['container']} 2>&1 | grep -i -c 'error\\|fatal\\|exception\\|critical'"
    else:
        err_cmd = f"journalctl -u {svc['service']} -n 200 --no-pager 2>&1 | grep -i -c 'error\\|fatal\\|exception\\|critical'"
    err_out, _ = run_cmd(err_cmd)
    err_count = int(err_out) if err_out.isdigit() else 0
    report["checks"].append({
        "name": "Recent Errors (last 200 lines)",
        "status": "pass" if err_count < 20 else ("warn" if err_count < 50 else "fail"),
        "detail": f"{err_count} error-level log entries"
    })

    # 4. Disk check for the system
    disk_out, _ = run_cmd("df / --output=pcent | tail -1")
    disk_pct = int(disk_out.strip().replace("%", "")) if disk_out.strip().replace("%", "").isdigit() else 0
    report["checks"].append({
        "name": "Root Disk Usage",
        "status": "pass" if disk_pct < 80 else ("warn" if disk_pct < 90 else "fail"),
        "detail": f"{disk_pct}% used"
    })

    # Overall status
    statuses = [c["status"] for c in report["checks"]]
    if "fail" in statuses:
        report["overall"] = "critical"
    elif "warn" in statuses:
        report["overall"] = "warning"
    else:
        report["overall"] = "healthy"

    return jsonify(report)


@app.route("/api/system")
def api_system():
    """Detailed system stats."""
    temp_out, _ = run_cmd("cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null")
    temp_c = round(int(temp_out) / 1000, 1) if temp_out.isdigit() else None

    load_out, _ = run_cmd("cat /proc/loadavg")
    uptime_out, _ = run_cmd("uptime -p")

    # Top processes by memory
    top_out, _ = run_cmd("ps aux --sort=-%mem | head -8 | awk '{print $4, $11}'")
    top_procs = []
    for line in top_out.strip().split("\n")[1:]:
        parts = line.split(None, 1)
        if len(parts) == 2:
            top_procs.append({"mem_pct": parts[0], "name": parts[1][:50]})

    return jsonify({
        "temp_c": temp_c,
        "load": load_out,
        "uptime": uptime_out,
        "top_processes": top_procs,
    })




@app.route("/api/storage")
def api_storage():
    """Return cached storage breakdown."""
    with _storage_lock:
        return jsonify(_storage_cache)


@app.route("/api/storage/rescan", methods=["POST"])
def api_storage_rescan():
    """Trigger a manual storage rescan in background."""
    t = threading.Thread(target=_scan_storage, daemon=True)
    t.start()
    return jsonify({"success": True, "message": "Rescan started"})


@app.route("/api/remove/<service_id>", methods=["POST"])
def api_remove(service_id):
    """Stop a service and remove it from the dashboard."""
    with _discover_lock:
        all_svcs = SERVICES + _discovered_cache
    svc = next((s for s in all_svcs if s["id"] == service_id), None)
    if not svc:
        return jsonify({"error": "Service not found"}), 404

    data = request.get_json() or {}
    confirm_name = data.get("confirm", "")
    if confirm_name != svc["name"]:
        return jsonify({"error": "Confirmation name does not match"}), 400

    results = []

    # Stop the service
    if svc["type"] == "docker":
        out, rc = run_cmd(f"docker stop {svc['container']} 2>&1", timeout=30)
        results.append(f"docker stop {svc['container']}: {out}")
        if "related_containers" in svc:
            for rc_name in svc["related_containers"]:
                out2, _ = run_cmd(f"docker stop {rc_name} 2>&1", timeout=30)
                results.append(f"docker stop {rc_name}: {out2}")
    else:
        out, rc = run_cmd(f"systemctl stop {svc['service']} 2>&1", timeout=15)
        results.append(f"systemctl stop {svc['service']}: {out}")
        out2, _ = run_cmd(f"systemctl disable {svc['service']} 2>&1", timeout=15)
        results.append(f"systemctl disable {svc['service']}: {out2}")

    # Remove from SERVICES list or hide discovered service
    SERVICES[:] = [s for s in SERVICES if s["id"] != service_id]
    with _discover_lock:
        _discovered_cache[:] = [s for s in _discovered_cache if s["id"] != service_id]
    # Add to hidden list so it doesn't get re-discovered
    hidden = load_hidden()
    if svc.get("container"):
        hidden.append(svc["container"])
    if svc.get("service"):
        hidden.append(svc["service"])
    save_hidden(hidden)

    return jsonify({"success": True, "details": results})


@app.route("/api/uptime")
def api_uptime():
    """Return uptime status and history."""
    with _uptime_lock:
        states = {}
        for sid, state in _service_states.items():
            states[sid] = {
                "up": state["up"],
                "since": datetime.fromtimestamp(state["since"]).isoformat() if state["since"] else None,
            }
        return jsonify({
            "states": states,
            "history": list(reversed(_uptime_history[-50:])),
        })


@app.route("/api/uptime/test", methods=["POST"])
def api_uptime_test():
    """Send a test SMS."""
    success = send_sms("Test alert from sosopi dashboard")
    return jsonify({"success": success})


@app.route("/api/drives")
def api_drives():
    """Return drive health data."""
    with _drive_health_lock:
        return jsonify(_drive_health_cache)


@app.route("/api/drives/check", methods=["POST"])
def api_drives_check():
    """Trigger immediate drive check."""
    drives = []
    for d in DRIVES_TO_MONITOR:
        health = _check_drive_smart(d["dev"], d["name"])
        drives.append(health)
    with _drive_health_lock:
        _drive_health_cache["drives"] = drives
        _drive_health_cache["last_checked"] = datetime.now().isoformat()
    return jsonify(_drive_health_cache)


if __name__ == "__main__":
    t = threading.Thread(target=_storage_loop, daemon=True)
    t.start()
    t2 = threading.Thread(target=_discover_loop, daemon=True)
    t2.start()
    t3 = threading.Thread(target=_uptime_loop, daemon=True)
    t3.start()
    t4 = threading.Thread(target=_drive_watchdog_loop, daemon=True)
    t4.start()
    app.run(host="0.0.0.0", port=9090, debug=False)
