mirror of
https://github.com/danbulant/dotfiles
synced 2026-06-24 17:22:10 +00:00
basic metrics
This commit is contained in:
parent
1f5861fa72
commit
52a2ea6904
6 changed files with 223 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
result
|
||||||
58
modules/llama-swap-exporter.nix
Normal file
58
modules/llama-swap-exporter.nix
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
pkgs,
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
cfg = config.services.llama-swap-exporter;
|
||||||
|
exporter = pkgs.callPackage ../servers/eisen/llama-swap-exporter/default.nix { };
|
||||||
|
in
|
||||||
|
|
||||||
|
{
|
||||||
|
options.services.llama-swap-exporter = {
|
||||||
|
enable = lib.mkEnableOption "llama-swap Prometheus exporter";
|
||||||
|
|
||||||
|
port = lib.mkOption {
|
||||||
|
type = lib.types.port;
|
||||||
|
default = 9409;
|
||||||
|
description = "Port for the Prometheus metrics endpoint.";
|
||||||
|
};
|
||||||
|
|
||||||
|
url = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "http://localhost:8080/api/metrics";
|
||||||
|
description = "llama-swap metrics endpoint URL.";
|
||||||
|
};
|
||||||
|
|
||||||
|
interval = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 15;
|
||||||
|
description = "Scrape interval in seconds.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
config = lib.mkIf cfg.enable {
|
||||||
|
systemd.services.llama-swap-exporter = {
|
||||||
|
description = "llama-swap Prometheus exporter";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${exporter}/bin/exporter.py";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "5s";
|
||||||
|
User = "root";
|
||||||
|
Group = "root";
|
||||||
|
Environment = [
|
||||||
|
"PROMETHEUS_PORT=${toString cfg.port}"
|
||||||
|
"LLAMA_SWAP_URL=${cfg.url}"
|
||||||
|
"SCRAPE_INTERVAL=${toString cfg.interval}"
|
||||||
|
];
|
||||||
|
ReadWritePaths = [ "/tmp" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
@ -21,6 +21,7 @@ let
|
||||||
grafana = 3002;
|
grafana = 3002;
|
||||||
tolgee = 8200;
|
tolgee = 8200;
|
||||||
# ntfy = 3003;
|
# ntfy = 3003;
|
||||||
|
llama-swap = 8080;
|
||||||
};
|
};
|
||||||
internalPorts = {
|
internalPorts = {
|
||||||
prometheus-node = 9000;
|
prometheus-node = 9000;
|
||||||
|
|
@ -28,6 +29,7 @@ let
|
||||||
prometheus-sonarr = 9101;
|
prometheus-sonarr = 9101;
|
||||||
prometheus-radarr = 9102;
|
prometheus-radarr = 9102;
|
||||||
prometheus-prowlarr = 9103;
|
prometheus-prowlarr = 9103;
|
||||||
|
prometheus-llama-swap = 9409;
|
||||||
prometheus = 9090;
|
prometheus = 9090;
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
|
|
@ -42,6 +44,7 @@ in
|
||||||
imports = [
|
imports = [
|
||||||
nix-index-database.nixosModules.nix-index
|
nix-index-database.nixosModules.nix-index
|
||||||
./hardware-configuration.nix
|
./hardware-configuration.nix
|
||||||
|
../../modules/llama-swap-exporter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
nix = {
|
nix = {
|
||||||
|
|
@ -105,6 +108,12 @@ in
|
||||||
environmentFile = "/etc/secrets/karakeep.env";
|
environmentFile = "/etc/secrets/karakeep.env";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
llama-swap-exporter = {
|
||||||
|
enable = true;
|
||||||
|
url = "http://100.120.15.10:${toString ports.llama-swap}/api/metrics";
|
||||||
|
port = internalPorts.prometheus-llama-swap;
|
||||||
|
};
|
||||||
|
|
||||||
dnsmasq = {
|
dnsmasq = {
|
||||||
enable = true;
|
enable = true;
|
||||||
};
|
};
|
||||||
|
|
@ -185,6 +194,14 @@ in
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
job_name = "llama-swap";
|
||||||
|
static_configs = [
|
||||||
|
{
|
||||||
|
targets = [ "localhost:${toString internalPorts.prometheus-llama-swap}" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
23
servers/eisen/llama-swap-exporter/default.nix
Normal file
23
servers/eisen/llama-swap-exporter/default.nix
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
lib,
|
||||||
|
pkgs ? import <nixpkgs> { },
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
|
||||||
|
pkgs.python3Packages.buildPythonApplication {
|
||||||
|
pname = "llama-swap-exporter";
|
||||||
|
version = "0.1.0";
|
||||||
|
|
||||||
|
src = ./src;
|
||||||
|
build-system = with pkgs.python3Packages; [
|
||||||
|
setuptools
|
||||||
|
setuptools-scm
|
||||||
|
];
|
||||||
|
pyproject = true;
|
||||||
|
meta = {
|
||||||
|
description = "Prometheus exporter for llama-swap metrics";
|
||||||
|
license = lib.licenses.mit;
|
||||||
|
maintainers = [ ];
|
||||||
|
platforms = lib.platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
114
servers/eisen/llama-swap-exporter/src/exporter.py
Normal file
114
servers/eisen/llama-swap-exporter/src/exporter.py
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""Prometheus exporter for llama-swap metrics endpoint."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
LLAMA_SWAP_URL = os.environ.get("LLAMA_SWAP_URL", "http://localhost:8080/metrics")
|
||||||
|
PROMETHEUS_PORT = int(os.environ.get("PROMETHEUS_PORT", "9409"))
|
||||||
|
SCRAPE_INTERVAL = int(os.environ.get("SCRAPE_INTERVAL", "15"))
|
||||||
|
|
||||||
|
last_metrics = {}
|
||||||
|
last_scrape_time = 0
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_llama_swap():
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(LLAMA_SWAP_URL)
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read().decode())
|
||||||
|
if not data:
|
||||||
|
print("No data found when scraping (request successful, empty data)")
|
||||||
|
return {}
|
||||||
|
entry = data[-1]
|
||||||
|
return {
|
||||||
|
"llama_cache_tokens": float(entry.get("cache_tokens", 0)),
|
||||||
|
"llama_input_tokens": float(entry.get("input_tokens", 0)),
|
||||||
|
"llama_output_tokens": float(entry.get("output_tokens", 0)),
|
||||||
|
"llama_prompt_per_second": entry.get("prompt_per_second", 0),
|
||||||
|
"llama_tokens_per_second": entry.get("tokens_per_second", 0),
|
||||||
|
"llama_duration_ms": float(entry.get("duration_ms", 0)),
|
||||||
|
"llama_model": entry.get("model", "unknown"),
|
||||||
|
"llama_has_capture": 1 if entry.get("has_capture") else 0,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error scraping llama-swap: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def format_metrics(metrics):
|
||||||
|
if not metrics:
|
||||||
|
metrics = {}
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
model = metrics.get("llama_model", "unknown")
|
||||||
|
|
||||||
|
for name, value in metrics.items():
|
||||||
|
if name == "llama_model":
|
||||||
|
continue
|
||||||
|
if "second" in name:
|
||||||
|
lines.append(f"# HELP {name} Rate from llama-swap")
|
||||||
|
lines.append(f"# TYPE {name} gauge")
|
||||||
|
lines.append(f'{name}{{model="{model}"}} {value}')
|
||||||
|
elif "tokens" in name:
|
||||||
|
lines.append(f"# HELP {name} Total tokens from llama-swap")
|
||||||
|
lines.append(f"# TYPE {name} gauge")
|
||||||
|
lines.append(f'{name}{{model="{model}"}} {value}')
|
||||||
|
elif "duration" in name:
|
||||||
|
value_s = value / 1000.0
|
||||||
|
lines.append("# HELP llama_duration_seconds Inference duration")
|
||||||
|
lines.append("# TYPE llama_duration_seconds gauge")
|
||||||
|
lines.append(f'llama_duration_seconds{{model="{model}"}} {value_s}')
|
||||||
|
elif name == "llama_has_capture":
|
||||||
|
lines.append("# HELP llama_has_capture Whether capture is available")
|
||||||
|
lines.append("# TYPE llama_has_capture gauge")
|
||||||
|
lines.append(f'llama_has_capture{{model="{model}"}} {value}')
|
||||||
|
|
||||||
|
lines.append(
|
||||||
|
"# HELP llama_last_scrape_timestamp_seconds When metrics were last scraped"
|
||||||
|
)
|
||||||
|
lines.append("# TYPE llama_last_scrape_timestamp_seconds gauge")
|
||||||
|
lines.append(f"llama_last_scrape_timestamp_seconds {time.time()}")
|
||||||
|
|
||||||
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
last_metrics = None
|
||||||
|
|
||||||
|
|
||||||
|
class MetricsHandler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
global last_scrape_time, last_metrics
|
||||||
|
if time.time() - last_scrape_time > SCRAPE_INTERVAL:
|
||||||
|
last_metrics = scrape_llama_swap()
|
||||||
|
last_scrape_time = time.time()
|
||||||
|
|
||||||
|
if self.path == "/metrics":
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(format_metrics(last_metrics).encode())
|
||||||
|
elif self.path == "/health":
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b"OK")
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(f"Starting llama-swap exporter on port {PROMETHEUS_PORT}")
|
||||||
|
print(f"Scraping llama-swap from: {LLAMA_SWAP_URL}")
|
||||||
|
server = HTTPServer(("0.0.0.0", PROMETHEUS_PORT), MetricsHandler)
|
||||||
|
server.serve_forever()
|
||||||
|
else:
|
||||||
|
print("Exporter loaded")
|
||||||
10
servers/eisen/llama-swap-exporter/src/setup.py
Normal file
10
servers/eisen/llama-swap-exporter/src/setup.py
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="llama-swap-exporter",
|
||||||
|
version="1.0",
|
||||||
|
# Modules to import from other scripts:
|
||||||
|
packages=find_packages(),
|
||||||
|
# Executables
|
||||||
|
scripts=["exporter.py"],
|
||||||
|
)
|
||||||
Loading…
Reference in a new issue