basic metrics

disable auto update
opencode config
2026-07-05 11:11:11 +00:00 · 2026-04-25 12:32:18 +02:00 · 2026-04-25 11:31:16 +02:00 · 2026-04-25 10:48:43 +02:00 · 2026-04-23 23:38:46 +02:00
10 changed files with 307 additions and 0 deletions
--- a/.config/hypr/hyprland.conf
+++ b/.config/hypr/hyprland.conf
@ -104,6 +104,7 @@ exec-once = fcitx5
 exec-once = easyeffects --gapplication-service
 exec-once = dms run
 exec-once = voxtype daemon
+exec-once = openrgb --startminimized

 #$swaylock = swaylock --screenshots --clock --indicator --effect-blur 6x6 --fade-in 0.2 --ring-color 4e9dc2 --key-hl-color 71b0ce
 $swaylock = hyprlock
--- a/.config/opencode/opencode.jsonc
+++ b/.config/opencode/opencode.jsonc
@ -0,0 +1,72 @@
+{
+  "$schema": "https://opencode.ai/config.json",
+  "autoupdate": false,
+  "mcp": {
+    "context7": {
+      "type": "remote",
+      "url": "https://mcp.context7.com/mcp",
+      "headers": {
+        "CONTEXT7_API_KEY": "",
+      },
+      "enabled": true,
+    },
+    "firecrawl-mcp": {
+      "type": "local",
+      "command": ["bun", "x", "firecrawl-mcp"],
+      "environment": {
+        "FIRECRAWL_API_KEY": "",
+      },
+    },
+  },
+  "agent": {
+    "explore": {
+      "_model": "github-copilot/gpt-5-mini",
+    },
+    "title": {
+      "_model": "github-copilot/gpt-5-mini",
+    },
+    "summary": {
+      "_model": "github-copilot/gpt-5-mini",
+    },
+  },
+
+  "provider": {
+    "lmstudio": {
+      "npm": "@ai-sdk/openai-compatible",
+      "name": "LM Studio (local)",
+      "options": {
+        "baseURL": "http://127.0.0.1:8080/v1",
+      },
+      "models": {
+        "gemma-4-26B-A4B": {
+          "name": "Gemma 4 26B A4B",
+          "limit": {
+            "context": 100000,
+            "output": 8192,
+          },
+        },
+        "qwen3.6-35B-A3B": {
+          "name": "Qwen 3.6 35B A3B",
+          "limit": {
+            "context": 100000,
+            "output": 8192,
+          },
+        },
+        "qwen3.5-9B": {
+          "name": "Qwen 3.5 9B",
+          "limit": {
+            "context": 100000,
+            "output": 8192,
+          },
+        },
+        "qwen3.5-9B-sushi": {
+          "name": "Qwen 3.5 9B Sushi coder",
+          "limit": {
+            "context": 100000,
+            "output": 8192,
+          },
+        },
+      },
+    },
+  },
+}
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+result
--- a/modules/llama-swap-exporter.nix
+++ b/modules/llama-swap-exporter.nix
@ -0,0 +1,58 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+
+let
+  cfg = config.services.llama-swap-exporter;
+  exporter = pkgs.callPackage ../servers/eisen/llama-swap-exporter/default.nix { };
+in
+
+{
+  options.services.llama-swap-exporter = {
+    enable = lib.mkEnableOption "llama-swap Prometheus exporter";
+
+    port = lib.mkOption {
+      type = lib.types.port;
+      default = 9409;
+      description = "Port for the Prometheus metrics endpoint.";
+    };
+
+    url = lib.mkOption {
+      type = lib.types.str;
+      default = "http://localhost:8080/api/metrics";
+      description = "llama-swap metrics endpoint URL.";
+    };
+
+    interval = lib.mkOption {
+      type = lib.types.int;
+      default = 15;
+      description = "Scrape interval in seconds.";
+    };
+  };
+
+  config = lib.mkIf cfg.enable {
+    systemd.services.llama-swap-exporter = {
+      description = "llama-swap Prometheus exporter";
+      wantedBy = [ "multi-user.target" ];
+      after = [ "network.target" ];
+
+      serviceConfig = {
+        Type = "simple";
+        ExecStart = "${exporter}/bin/exporter.py";
+        Restart = "on-failure";
+        RestartSec = "5s";
+        User = "root";
+        Group = "root";
+        Environment = [
+          "PROMETHEUS_PORT=${toString cfg.port}"
+          "LLAMA_SWAP_URL=${cfg.url}"
+          "SCRAPE_INTERVAL=${toString cfg.interval}"
+        ];
+        ReadWritePaths = [ "/tmp" ];
+      };
+    };
+  };
+}
--- a/servers/eisen/configuration.nix
+++ b/servers/eisen/configuration.nix
@ -21,6 +21,7 @@ let
    grafana = 3002;
    tolgee = 8200;
    # ntfy = 3003;
+    llama-swap = 8080;
  };
  internalPorts = {
    prometheus-node = 9000;
@ -28,6 +29,7 @@ let
    prometheus-sonarr = 9101;
    prometheus-radarr = 9102;
    prometheus-prowlarr = 9103;
+    prometheus-llama-swap = 9409;
    prometheus = 9090;
  };
 in
@ -42,6 +44,7 @@ in
  imports = [
    nix-index-database.nixosModules.nix-index
    ./hardware-configuration.nix
+    ../../modules/llama-swap-exporter.nix
  ];

  nix = {
@ -105,6 +108,12 @@ in
      environmentFile = "/etc/secrets/karakeep.env";
    };

+    llama-swap-exporter = {
+      enable = true;
+      url = "http://100.120.15.10:${toString ports.llama-swap}/api/metrics";
+      port = internalPorts.prometheus-llama-swap;
+    };
+
    dnsmasq = {
      enable = true;
    };
@ -185,6 +194,14 @@ in
            }
          ];
        }
+        {
+          job_name = "llama-swap";
+          static_configs = [
+            {
+              targets = [ "localhost:${toString internalPorts.prometheus-llama-swap}" ];
+            }
+          ];
+        }
      ];
    };

--- a/servers/eisen/llama-swap-exporter/default.nix
+++ b/servers/eisen/llama-swap-exporter/default.nix
@ -0,0 +1,23 @@
+{
+  lib,
+  pkgs ? import <nixpkgs> { },
+  ...
+}:
+
+pkgs.python3Packages.buildPythonApplication {
+  pname = "llama-swap-exporter";
+  version = "0.1.0";
+
+  src = ./src;
+  build-system = with pkgs.python3Packages; [
+    setuptools
+    setuptools-scm
+  ];
+  pyproject = true;
+  meta = {
+    description = "Prometheus exporter for llama-swap metrics";
+    license = lib.licenses.mit;
+    maintainers = [ ];
+    platforms = lib.platforms.linux;
+  };
+}
--- a/servers/eisen/llama-swap-exporter/src/exporter.py
+++ b/servers/eisen/llama-swap-exporter/src/exporter.py
@ -0,0 +1,114 @@
+#!/usr/bin/env python
+
+"""Prometheus exporter for llama-swap metrics endpoint."""
+
+import json
+import os
+import time
+import urllib.request
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+LLAMA_SWAP_URL = os.environ.get("LLAMA_SWAP_URL", "http://localhost:8080/metrics")
+PROMETHEUS_PORT = int(os.environ.get("PROMETHEUS_PORT", "9409"))
+SCRAPE_INTERVAL = int(os.environ.get("SCRAPE_INTERVAL", "15"))
+
+last_metrics = {}
+last_scrape_time = 0
+
+
+def scrape_llama_swap():
+    try:
+        req = urllib.request.Request(LLAMA_SWAP_URL)
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            data = json.loads(resp.read().decode())
+            if not data:
+                print("No data found when scraping (request successful, empty data)")
+                return {}
+            entry = data[-1]
+            return {
+                "llama_cache_tokens": float(entry.get("cache_tokens", 0)),
+                "llama_input_tokens": float(entry.get("input_tokens", 0)),
+                "llama_output_tokens": float(entry.get("output_tokens", 0)),
+                "llama_prompt_per_second": entry.get("prompt_per_second", 0),
+                "llama_tokens_per_second": entry.get("tokens_per_second", 0),
+                "llama_duration_ms": float(entry.get("duration_ms", 0)),
+                "llama_model": entry.get("model", "unknown"),
+                "llama_has_capture": 1 if entry.get("has_capture") else 0,
+            }
+    except Exception as e:
+        print(f"Error scraping llama-swap: {e}")
+        return {}
+
+
+def format_metrics(metrics):
+    if not metrics:
+        metrics = {}
+
+    lines = []
+    model = metrics.get("llama_model", "unknown")
+
+    for name, value in metrics.items():
+        if name == "llama_model":
+            continue
+        if "second" in name:
+            lines.append(f"# HELP {name} Rate from llama-swap")
+            lines.append(f"# TYPE {name} gauge")
+            lines.append(f'{name}{{model="{model}"}} {value}')
+        elif "tokens" in name:
+            lines.append(f"# HELP {name} Total tokens from llama-swap")
+            lines.append(f"# TYPE {name} gauge")
+            lines.append(f'{name}{{model="{model}"}} {value}')
+        elif "duration" in name:
+            value_s = value / 1000.0
+            lines.append("# HELP llama_duration_seconds Inference duration")
+            lines.append("# TYPE llama_duration_seconds gauge")
+            lines.append(f'llama_duration_seconds{{model="{model}"}} {value_s}')
+        elif name == "llama_has_capture":
+            lines.append("# HELP llama_has_capture Whether capture is available")
+            lines.append("# TYPE llama_has_capture gauge")
+            lines.append(f'llama_has_capture{{model="{model}"}} {value}')
+
+    lines.append(
+        "# HELP llama_last_scrape_timestamp_seconds When metrics were last scraped"
+    )
+    lines.append("# TYPE llama_last_scrape_timestamp_seconds gauge")
+    lines.append(f"llama_last_scrape_timestamp_seconds {time.time()}")
+
+    return "\n".join(lines) + "\n"
+
+
+last_metrics = None
+
+
+class MetricsHandler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        global last_scrape_time, last_metrics
+        if time.time() - last_scrape_time > SCRAPE_INTERVAL:
+            last_metrics = scrape_llama_swap()
+            last_scrape_time = time.time()
+
+        if self.path == "/metrics":
+            self.send_response(200)
+            self.send_header("Content-Type", "text/plain; charset=utf-8")
+            self.end_headers()
+            self.wfile.write(format_metrics(last_metrics).encode())
+        elif self.path == "/health":
+            self.send_response(200)
+            self.send_header("Content-Type", "text/plain")
+            self.end_headers()
+            self.wfile.write(b"OK")
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+    def log_message(self, format, *args):
+        pass
+
+
+if __name__ == "__main__":
+    print(f"Starting llama-swap exporter on port {PROMETHEUS_PORT}")
+    print(f"Scraping llama-swap from: {LLAMA_SWAP_URL}")
+    server = HTTPServer(("0.0.0.0", PROMETHEUS_PORT), MetricsHandler)
+    server.serve_forever()
+else:
+    print("Exporter loaded")
--- a/servers/eisen/llama-swap-exporter/src/setup.py
+++ b/servers/eisen/llama-swap-exporter/src/setup.py
@ -0,0 +1,10 @@
+from setuptools import find_packages, setup
+
+setup(
+    name="llama-swap-exporter",
+    version="1.0",
+    # Modules to import from other scripts:
+    packages=find_packages(),
+    # Executables
+    scripts=["exporter.py"],
+)
--- a/servers/fern/configuration.nix
+++ b/servers/fern/configuration.nix
@ -1,5 +1,7 @@
 {
  pkgs,
+  lib,
+  config,
  ...
 }:

@ -113,7 +115,9 @@ in
  ];
  services.llama-swap = {
    enable = true;
+    openFirewall = true;
    settings = {
+      #      listen = "0.0.0.0:8080";
      macros = {
        llama = ''
          ${pkgs.lib.getExe' llama-cpp "llama-server"} \
@ -161,6 +165,11 @@ in
      DynamicUser = pkgs.lib.mkForce false;
      User = pkgs.lib.mkForce "dan";
      Group = pkgs.lib.mkForce "users"; # or dan's primary group
+      ExecStart = lib.mkForce ''
+        ${lib.getExe pkgs.llama-swap} --listen 0.0.0.0:${toString config.services.llama-swap.port} --config ${
+          (pkgs.formats.yaml { }).generate "config.yaml" config.services.llama-swap.settings
+        }
+      '';
    };
  };
  hardware.nvidia = {
--- a/servers/ui-mode/configuration.nix
+++ b/servers/ui-mode/configuration.nix
@ -196,6 +196,7 @@ in
      "https://cache.garnix.io"
      "https://rusic.cachix.org"
      "https://cuda-maintainers.cachix.org"
+      "https://cache.nixos-cuda.org"
    ];
    trusted-public-keys = [
      "hyprland.cachix.org-1:a7pgxzMz7+chwVL3/pzj6jIBMioiJM7ypFP8PwtkuGc="
@ -206,6 +207,7 @@ in
      "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
      "rusic.cachix.org-1:WXMpGpamblLUiJtcoxBxGGGGwIcWxGPJBUxarLiqWmw="
      "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E="
+      "cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M="
    ];
  };
Author	SHA1	Message	Date
Daniel Bulant	52a2ea6904	basic metrics	2026-04-25 12:32:18 +02:00
Daniel Bulant	1f5861fa72	disable auto update	2026-04-25 11:31:16 +02:00
Daniel Bulant	b038e2ff25	opencode config	2026-04-25 10:48:43 +02:00
Daniel Bulant	81a32d1f38	add cuda cache	2026-04-23 23:38:46 +02:00