mirror of
https://github.com/danbulant/dotfiles
synced 2026-05-19 04:18:55 +00:00
basic metrics
This commit is contained in:
parent
1f5861fa72
commit
52a2ea6904
6 changed files with 223 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
result
|
||||
58
modules/llama-swap-exporter.nix
Normal file
58
modules/llama-swap-exporter.nix
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
let
|
||||
cfg = config.services.llama-swap-exporter;
|
||||
exporter = pkgs.callPackage ../servers/eisen/llama-swap-exporter/default.nix { };
|
||||
in
|
||||
|
||||
{
|
||||
options.services.llama-swap-exporter = {
|
||||
enable = lib.mkEnableOption "llama-swap Prometheus exporter";
|
||||
|
||||
port = lib.mkOption {
|
||||
type = lib.types.port;
|
||||
default = 9409;
|
||||
description = "Port for the Prometheus metrics endpoint.";
|
||||
};
|
||||
|
||||
url = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "http://localhost:8080/api/metrics";
|
||||
description = "llama-swap metrics endpoint URL.";
|
||||
};
|
||||
|
||||
interval = lib.mkOption {
|
||||
type = lib.types.int;
|
||||
default = 15;
|
||||
description = "Scrape interval in seconds.";
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
systemd.services.llama-swap-exporter = {
|
||||
description = "llama-swap Prometheus exporter";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${exporter}/bin/exporter.py";
|
||||
Restart = "on-failure";
|
||||
RestartSec = "5s";
|
||||
User = "root";
|
||||
Group = "root";
|
||||
Environment = [
|
||||
"PROMETHEUS_PORT=${toString cfg.port}"
|
||||
"LLAMA_SWAP_URL=${cfg.url}"
|
||||
"SCRAPE_INTERVAL=${toString cfg.interval}"
|
||||
];
|
||||
ReadWritePaths = [ "/tmp" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ let
|
|||
grafana = 3002;
|
||||
tolgee = 8200;
|
||||
# ntfy = 3003;
|
||||
llama-swap = 8080;
|
||||
};
|
||||
internalPorts = {
|
||||
prometheus-node = 9000;
|
||||
|
|
@ -28,6 +29,7 @@ let
|
|||
prometheus-sonarr = 9101;
|
||||
prometheus-radarr = 9102;
|
||||
prometheus-prowlarr = 9103;
|
||||
prometheus-llama-swap = 9409;
|
||||
prometheus = 9090;
|
||||
};
|
||||
in
|
||||
|
|
@ -42,6 +44,7 @@ in
|
|||
imports = [
|
||||
nix-index-database.nixosModules.nix-index
|
||||
./hardware-configuration.nix
|
||||
../../modules/llama-swap-exporter.nix
|
||||
];
|
||||
|
||||
nix = {
|
||||
|
|
@ -105,6 +108,12 @@ in
|
|||
environmentFile = "/etc/secrets/karakeep.env";
|
||||
};
|
||||
|
||||
llama-swap-exporter = {
|
||||
enable = true;
|
||||
url = "http://100.120.15.10:${toString ports.llama-swap}/api/metrics";
|
||||
port = internalPorts.prometheus-llama-swap;
|
||||
};
|
||||
|
||||
dnsmasq = {
|
||||
enable = true;
|
||||
};
|
||||
|
|
@ -185,6 +194,14 @@ in
|
|||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "llama-swap";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:${toString internalPorts.prometheus-llama-swap}" ];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
|
|
|
|||
23
servers/eisen/llama-swap-exporter/default.nix
Normal file
23
servers/eisen/llama-swap-exporter/default.nix
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
lib,
|
||||
pkgs ? import <nixpkgs> { },
|
||||
...
|
||||
}:
|
||||
|
||||
pkgs.python3Packages.buildPythonApplication {
|
||||
pname = "llama-swap-exporter";
|
||||
version = "0.1.0";
|
||||
|
||||
src = ./src;
|
||||
build-system = with pkgs.python3Packages; [
|
||||
setuptools
|
||||
setuptools-scm
|
||||
];
|
||||
pyproject = true;
|
||||
meta = {
|
||||
description = "Prometheus exporter for llama-swap metrics";
|
||||
license = lib.licenses.mit;
|
||||
maintainers = [ ];
|
||||
platforms = lib.platforms.linux;
|
||||
};
|
||||
}
|
||||
114
servers/eisen/llama-swap-exporter/src/exporter.py
Normal file
114
servers/eisen/llama-swap-exporter/src/exporter.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Prometheus exporter for llama-swap metrics endpoint."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
LLAMA_SWAP_URL = os.environ.get("LLAMA_SWAP_URL", "http://localhost:8080/metrics")
|
||||
PROMETHEUS_PORT = int(os.environ.get("PROMETHEUS_PORT", "9409"))
|
||||
SCRAPE_INTERVAL = int(os.environ.get("SCRAPE_INTERVAL", "15"))
|
||||
|
||||
last_metrics = {}
|
||||
last_scrape_time = 0
|
||||
|
||||
|
||||
def scrape_llama_swap():
|
||||
try:
|
||||
req = urllib.request.Request(LLAMA_SWAP_URL)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
if not data:
|
||||
print("No data found when scraping (request successful, empty data)")
|
||||
return {}
|
||||
entry = data[-1]
|
||||
return {
|
||||
"llama_cache_tokens": float(entry.get("cache_tokens", 0)),
|
||||
"llama_input_tokens": float(entry.get("input_tokens", 0)),
|
||||
"llama_output_tokens": float(entry.get("output_tokens", 0)),
|
||||
"llama_prompt_per_second": entry.get("prompt_per_second", 0),
|
||||
"llama_tokens_per_second": entry.get("tokens_per_second", 0),
|
||||
"llama_duration_ms": float(entry.get("duration_ms", 0)),
|
||||
"llama_model": entry.get("model", "unknown"),
|
||||
"llama_has_capture": 1 if entry.get("has_capture") else 0,
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Error scraping llama-swap: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def format_metrics(metrics):
|
||||
if not metrics:
|
||||
metrics = {}
|
||||
|
||||
lines = []
|
||||
model = metrics.get("llama_model", "unknown")
|
||||
|
||||
for name, value in metrics.items():
|
||||
if name == "llama_model":
|
||||
continue
|
||||
if "second" in name:
|
||||
lines.append(f"# HELP {name} Rate from llama-swap")
|
||||
lines.append(f"# TYPE {name} gauge")
|
||||
lines.append(f'{name}{{model="{model}"}} {value}')
|
||||
elif "tokens" in name:
|
||||
lines.append(f"# HELP {name} Total tokens from llama-swap")
|
||||
lines.append(f"# TYPE {name} gauge")
|
||||
lines.append(f'{name}{{model="{model}"}} {value}')
|
||||
elif "duration" in name:
|
||||
value_s = value / 1000.0
|
||||
lines.append("# HELP llama_duration_seconds Inference duration")
|
||||
lines.append("# TYPE llama_duration_seconds gauge")
|
||||
lines.append(f'llama_duration_seconds{{model="{model}"}} {value_s}')
|
||||
elif name == "llama_has_capture":
|
||||
lines.append("# HELP llama_has_capture Whether capture is available")
|
||||
lines.append("# TYPE llama_has_capture gauge")
|
||||
lines.append(f'llama_has_capture{{model="{model}"}} {value}')
|
||||
|
||||
lines.append(
|
||||
"# HELP llama_last_scrape_timestamp_seconds When metrics were last scraped"
|
||||
)
|
||||
lines.append("# TYPE llama_last_scrape_timestamp_seconds gauge")
|
||||
lines.append(f"llama_last_scrape_timestamp_seconds {time.time()}")
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
last_metrics = None
|
||||
|
||||
|
||||
class MetricsHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
global last_scrape_time, last_metrics
|
||||
if time.time() - last_scrape_time > SCRAPE_INTERVAL:
|
||||
last_metrics = scrape_llama_swap()
|
||||
last_scrape_time = time.time()
|
||||
|
||||
if self.path == "/metrics":
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
||||
self.end_headers()
|
||||
self.wfile.write(format_metrics(last_metrics).encode())
|
||||
elif self.path == "/health":
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/plain")
|
||||
self.end_headers()
|
||||
self.wfile.write(b"OK")
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Starting llama-swap exporter on port {PROMETHEUS_PORT}")
|
||||
print(f"Scraping llama-swap from: {LLAMA_SWAP_URL}")
|
||||
server = HTTPServer(("0.0.0.0", PROMETHEUS_PORT), MetricsHandler)
|
||||
server.serve_forever()
|
||||
else:
|
||||
print("Exporter loaded")
|
||||
10
servers/eisen/llama-swap-exporter/src/setup.py
Normal file
10
servers/eisen/llama-swap-exporter/src/setup.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
from setuptools import find_packages, setup
|
||||
|
||||
setup(
|
||||
name="llama-swap-exporter",
|
||||
version="1.0",
|
||||
# Modules to import from other scripts:
|
||||
packages=find_packages(),
|
||||
# Executables
|
||||
scripts=["exporter.py"],
|
||||
)
|
||||
Loading…
Reference in a new issue