basic metrics

This commit is contained in:
Daniel Bulant 2026-04-25 12:32:18 +02:00
parent 1f5861fa72
commit 52a2ea6904
No known key found for this signature in database
6 changed files with 223 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
result

View file

@ -0,0 +1,58 @@
{
config,
lib,
pkgs,
...
}:
let
cfg = config.services.llama-swap-exporter;
exporter = pkgs.callPackage ../servers/eisen/llama-swap-exporter/default.nix { };
in
{
options.services.llama-swap-exporter = {
enable = lib.mkEnableOption "llama-swap Prometheus exporter";
port = lib.mkOption {
type = lib.types.port;
default = 9409;
description = "Port for the Prometheus metrics endpoint.";
};
url = lib.mkOption {
type = lib.types.str;
default = "http://localhost:8080/api/metrics";
description = "llama-swap metrics endpoint URL.";
};
interval = lib.mkOption {
type = lib.types.int;
default = 15;
description = "Scrape interval in seconds.";
};
};
config = lib.mkIf cfg.enable {
systemd.services.llama-swap-exporter = {
description = "llama-swap Prometheus exporter";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${exporter}/bin/exporter.py";
Restart = "on-failure";
RestartSec = "5s";
User = "root";
Group = "root";
Environment = [
"PROMETHEUS_PORT=${toString cfg.port}"
"LLAMA_SWAP_URL=${cfg.url}"
"SCRAPE_INTERVAL=${toString cfg.interval}"
];
ReadWritePaths = [ "/tmp" ];
};
};
};
}

View file

@ -21,6 +21,7 @@ let
grafana = 3002;
tolgee = 8200;
# ntfy = 3003;
llama-swap = 8080;
};
internalPorts = {
prometheus-node = 9000;
@ -28,6 +29,7 @@ let
prometheus-sonarr = 9101;
prometheus-radarr = 9102;
prometheus-prowlarr = 9103;
prometheus-llama-swap = 9409;
prometheus = 9090;
};
in
@ -42,6 +44,7 @@ in
imports = [
nix-index-database.nixosModules.nix-index
./hardware-configuration.nix
../../modules/llama-swap-exporter.nix
];
nix = {
@ -105,6 +108,12 @@ in
environmentFile = "/etc/secrets/karakeep.env";
};
llama-swap-exporter = {
enable = true;
url = "http://100.120.15.10:${toString ports.llama-swap}/api/metrics";
port = internalPorts.prometheus-llama-swap;
};
dnsmasq = {
enable = true;
};
@ -185,6 +194,14 @@ in
}
];
}
{
job_name = "llama-swap";
static_configs = [
{
targets = [ "localhost:${toString internalPorts.prometheus-llama-swap}" ];
}
];
}
];
};

View file

@ -0,0 +1,23 @@
{
lib,
pkgs ? import <nixpkgs> { },
...
}:
pkgs.python3Packages.buildPythonApplication {
pname = "llama-swap-exporter";
version = "0.1.0";
src = ./src;
build-system = with pkgs.python3Packages; [
setuptools
setuptools-scm
];
pyproject = true;
meta = {
description = "Prometheus exporter for llama-swap metrics";
license = lib.licenses.mit;
maintainers = [ ];
platforms = lib.platforms.linux;
};
}

View file

@ -0,0 +1,114 @@
#!/usr/bin/env python
"""Prometheus exporter for llama-swap metrics endpoint."""
import json
import os
import time
import urllib.request
from http.server import BaseHTTPRequestHandler, HTTPServer
LLAMA_SWAP_URL = os.environ.get("LLAMA_SWAP_URL", "http://localhost:8080/metrics")
PROMETHEUS_PORT = int(os.environ.get("PROMETHEUS_PORT", "9409"))
SCRAPE_INTERVAL = int(os.environ.get("SCRAPE_INTERVAL", "15"))
last_metrics = {}
last_scrape_time = 0
def scrape_llama_swap():
try:
req = urllib.request.Request(LLAMA_SWAP_URL)
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read().decode())
if not data:
print("No data found when scraping (request successful, empty data)")
return {}
entry = data[-1]
return {
"llama_cache_tokens": float(entry.get("cache_tokens", 0)),
"llama_input_tokens": float(entry.get("input_tokens", 0)),
"llama_output_tokens": float(entry.get("output_tokens", 0)),
"llama_prompt_per_second": entry.get("prompt_per_second", 0),
"llama_tokens_per_second": entry.get("tokens_per_second", 0),
"llama_duration_ms": float(entry.get("duration_ms", 0)),
"llama_model": entry.get("model", "unknown"),
"llama_has_capture": 1 if entry.get("has_capture") else 0,
}
except Exception as e:
print(f"Error scraping llama-swap: {e}")
return {}
def format_metrics(metrics):
if not metrics:
metrics = {}
lines = []
model = metrics.get("llama_model", "unknown")
for name, value in metrics.items():
if name == "llama_model":
continue
if "second" in name:
lines.append(f"# HELP {name} Rate from llama-swap")
lines.append(f"# TYPE {name} gauge")
lines.append(f'{name}{{model="{model}"}} {value}')
elif "tokens" in name:
lines.append(f"# HELP {name} Total tokens from llama-swap")
lines.append(f"# TYPE {name} gauge")
lines.append(f'{name}{{model="{model}"}} {value}')
elif "duration" in name:
value_s = value / 1000.0
lines.append("# HELP llama_duration_seconds Inference duration")
lines.append("# TYPE llama_duration_seconds gauge")
lines.append(f'llama_duration_seconds{{model="{model}"}} {value_s}')
elif name == "llama_has_capture":
lines.append("# HELP llama_has_capture Whether capture is available")
lines.append("# TYPE llama_has_capture gauge")
lines.append(f'llama_has_capture{{model="{model}"}} {value}')
lines.append(
"# HELP llama_last_scrape_timestamp_seconds When metrics were last scraped"
)
lines.append("# TYPE llama_last_scrape_timestamp_seconds gauge")
lines.append(f"llama_last_scrape_timestamp_seconds {time.time()}")
return "\n".join(lines) + "\n"
last_metrics = None
class MetricsHandler(BaseHTTPRequestHandler):
def do_GET(self):
global last_scrape_time, last_metrics
if time.time() - last_scrape_time > SCRAPE_INTERVAL:
last_metrics = scrape_llama_swap()
last_scrape_time = time.time()
if self.path == "/metrics":
self.send_response(200)
self.send_header("Content-Type", "text/plain; charset=utf-8")
self.end_headers()
self.wfile.write(format_metrics(last_metrics).encode())
elif self.path == "/health":
self.send_response(200)
self.send_header("Content-Type", "text/plain")
self.end_headers()
self.wfile.write(b"OK")
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
pass
if __name__ == "__main__":
print(f"Starting llama-swap exporter on port {PROMETHEUS_PORT}")
print(f"Scraping llama-swap from: {LLAMA_SWAP_URL}")
server = HTTPServer(("0.0.0.0", PROMETHEUS_PORT), MetricsHandler)
server.serve_forever()
else:
print("Exporter loaded")

View file

@ -0,0 +1,10 @@
from setuptools import find_packages, setup
setup(
name="llama-swap-exporter",
version="1.0",
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["exporter.py"],
)