From 52a2ea69043e5ee631d6ee5992018052e78e8014 Mon Sep 17 00:00:00 2001 From: Daniel Bulant Date: Sat, 25 Apr 2026 12:32:18 +0200 Subject: [PATCH] basic metrics --- .gitignore | 1 + modules/llama-swap-exporter.nix | 58 +++++++++ servers/eisen/configuration.nix | 17 +++ servers/eisen/llama-swap-exporter/default.nix | 23 ++++ .../eisen/llama-swap-exporter/src/exporter.py | 114 ++++++++++++++++++ .../eisen/llama-swap-exporter/src/setup.py | 10 ++ 6 files changed, 223 insertions(+) create mode 100644 .gitignore create mode 100644 modules/llama-swap-exporter.nix create mode 100644 servers/eisen/llama-swap-exporter/default.nix create mode 100644 servers/eisen/llama-swap-exporter/src/exporter.py create mode 100644 servers/eisen/llama-swap-exporter/src/setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b2be92b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +result diff --git a/modules/llama-swap-exporter.nix b/modules/llama-swap-exporter.nix new file mode 100644 index 0000000..d13977a --- /dev/null +++ b/modules/llama-swap-exporter.nix @@ -0,0 +1,58 @@ +{ + config, + lib, + pkgs, + ... +}: + +let + cfg = config.services.llama-swap-exporter; + exporter = pkgs.callPackage ../servers/eisen/llama-swap-exporter/default.nix { }; +in + +{ + options.services.llama-swap-exporter = { + enable = lib.mkEnableOption "llama-swap Prometheus exporter"; + + port = lib.mkOption { + type = lib.types.port; + default = 9409; + description = "Port for the Prometheus metrics endpoint."; + }; + + url = lib.mkOption { + type = lib.types.str; + default = "http://localhost:8080/api/metrics"; + description = "llama-swap metrics endpoint URL."; + }; + + interval = lib.mkOption { + type = lib.types.int; + default = 15; + description = "Scrape interval in seconds."; + }; + }; + + config = lib.mkIf cfg.enable { + systemd.services.llama-swap-exporter = { + description = "llama-swap Prometheus exporter"; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + + serviceConfig = { + Type = "simple"; + ExecStart = "${exporter}/bin/exporter.py"; + Restart = "on-failure"; + RestartSec = "5s"; + User = "root"; + Group = "root"; + Environment = [ + "PROMETHEUS_PORT=${toString cfg.port}" + "LLAMA_SWAP_URL=${cfg.url}" + "SCRAPE_INTERVAL=${toString cfg.interval}" + ]; + ReadWritePaths = [ "/tmp" ]; + }; + }; + }; +} diff --git a/servers/eisen/configuration.nix b/servers/eisen/configuration.nix index 4110f68..c07f8cf 100644 --- a/servers/eisen/configuration.nix +++ b/servers/eisen/configuration.nix @@ -21,6 +21,7 @@ let grafana = 3002; tolgee = 8200; # ntfy = 3003; + llama-swap = 8080; }; internalPorts = { prometheus-node = 9000; @@ -28,6 +29,7 @@ let prometheus-sonarr = 9101; prometheus-radarr = 9102; prometheus-prowlarr = 9103; + prometheus-llama-swap = 9409; prometheus = 9090; }; in @@ -42,6 +44,7 @@ in imports = [ nix-index-database.nixosModules.nix-index ./hardware-configuration.nix + ../../modules/llama-swap-exporter.nix ]; nix = { @@ -105,6 +108,12 @@ in environmentFile = "/etc/secrets/karakeep.env"; }; + llama-swap-exporter = { + enable = true; + url = "http://100.120.15.10:${toString ports.llama-swap}/api/metrics"; + port = internalPorts.prometheus-llama-swap; + }; + dnsmasq = { enable = true; }; @@ -185,6 +194,14 @@ in } ]; } + { + job_name = "llama-swap"; + static_configs = [ + { + targets = [ "localhost:${toString internalPorts.prometheus-llama-swap}" ]; + } + ]; + } ]; }; diff --git a/servers/eisen/llama-swap-exporter/default.nix b/servers/eisen/llama-swap-exporter/default.nix new file mode 100644 index 0000000..b9603f1 --- /dev/null +++ b/servers/eisen/llama-swap-exporter/default.nix @@ -0,0 +1,23 @@ +{ + lib, + pkgs ? import { }, + ... +}: + +pkgs.python3Packages.buildPythonApplication { + pname = "llama-swap-exporter"; + version = "0.1.0"; + + src = ./src; + build-system = with pkgs.python3Packages; [ + setuptools + setuptools-scm + ]; + pyproject = true; + meta = { + description = "Prometheus exporter for llama-swap metrics"; + license = lib.licenses.mit; + maintainers = [ ]; + platforms = lib.platforms.linux; + }; +} diff --git a/servers/eisen/llama-swap-exporter/src/exporter.py b/servers/eisen/llama-swap-exporter/src/exporter.py new file mode 100644 index 0000000..823ccb9 --- /dev/null +++ b/servers/eisen/llama-swap-exporter/src/exporter.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +"""Prometheus exporter for llama-swap metrics endpoint.""" + +import json +import os +import time +import urllib.request +from http.server import BaseHTTPRequestHandler, HTTPServer + +LLAMA_SWAP_URL = os.environ.get("LLAMA_SWAP_URL", "http://localhost:8080/metrics") +PROMETHEUS_PORT = int(os.environ.get("PROMETHEUS_PORT", "9409")) +SCRAPE_INTERVAL = int(os.environ.get("SCRAPE_INTERVAL", "15")) + +last_metrics = {} +last_scrape_time = 0 + + +def scrape_llama_swap(): + try: + req = urllib.request.Request(LLAMA_SWAP_URL) + with urllib.request.urlopen(req, timeout=10) as resp: + data = json.loads(resp.read().decode()) + if not data: + print("No data found when scraping (request successful, empty data)") + return {} + entry = data[-1] + return { + "llama_cache_tokens": float(entry.get("cache_tokens", 0)), + "llama_input_tokens": float(entry.get("input_tokens", 0)), + "llama_output_tokens": float(entry.get("output_tokens", 0)), + "llama_prompt_per_second": entry.get("prompt_per_second", 0), + "llama_tokens_per_second": entry.get("tokens_per_second", 0), + "llama_duration_ms": float(entry.get("duration_ms", 0)), + "llama_model": entry.get("model", "unknown"), + "llama_has_capture": 1 if entry.get("has_capture") else 0, + } + except Exception as e: + print(f"Error scraping llama-swap: {e}") + return {} + + +def format_metrics(metrics): + if not metrics: + metrics = {} + + lines = [] + model = metrics.get("llama_model", "unknown") + + for name, value in metrics.items(): + if name == "llama_model": + continue + if "second" in name: + lines.append(f"# HELP {name} Rate from llama-swap") + lines.append(f"# TYPE {name} gauge") + lines.append(f'{name}{{model="{model}"}} {value}') + elif "tokens" in name: + lines.append(f"# HELP {name} Total tokens from llama-swap") + lines.append(f"# TYPE {name} gauge") + lines.append(f'{name}{{model="{model}"}} {value}') + elif "duration" in name: + value_s = value / 1000.0 + lines.append("# HELP llama_duration_seconds Inference duration") + lines.append("# TYPE llama_duration_seconds gauge") + lines.append(f'llama_duration_seconds{{model="{model}"}} {value_s}') + elif name == "llama_has_capture": + lines.append("# HELP llama_has_capture Whether capture is available") + lines.append("# TYPE llama_has_capture gauge") + lines.append(f'llama_has_capture{{model="{model}"}} {value}') + + lines.append( + "# HELP llama_last_scrape_timestamp_seconds When metrics were last scraped" + ) + lines.append("# TYPE llama_last_scrape_timestamp_seconds gauge") + lines.append(f"llama_last_scrape_timestamp_seconds {time.time()}") + + return "\n".join(lines) + "\n" + + +last_metrics = None + + +class MetricsHandler(BaseHTTPRequestHandler): + def do_GET(self): + global last_scrape_time, last_metrics + if time.time() - last_scrape_time > SCRAPE_INTERVAL: + last_metrics = scrape_llama_swap() + last_scrape_time = time.time() + + if self.path == "/metrics": + self.send_response(200) + self.send_header("Content-Type", "text/plain; charset=utf-8") + self.end_headers() + self.wfile.write(format_metrics(last_metrics).encode()) + elif self.path == "/health": + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write(b"OK") + else: + self.send_response(404) + self.end_headers() + + def log_message(self, format, *args): + pass + + +if __name__ == "__main__": + print(f"Starting llama-swap exporter on port {PROMETHEUS_PORT}") + print(f"Scraping llama-swap from: {LLAMA_SWAP_URL}") + server = HTTPServer(("0.0.0.0", PROMETHEUS_PORT), MetricsHandler) + server.serve_forever() +else: + print("Exporter loaded") diff --git a/servers/eisen/llama-swap-exporter/src/setup.py b/servers/eisen/llama-swap-exporter/src/setup.py new file mode 100644 index 0000000..1528fd4 --- /dev/null +++ b/servers/eisen/llama-swap-exporter/src/setup.py @@ -0,0 +1,10 @@ +from setuptools import find_packages, setup + +setup( + name="llama-swap-exporter", + version="1.0", + # Modules to import from other scripts: + packages=find_packages(), + # Executables + scripts=["exporter.py"], +)