Compare commits

..

4 commits

Author SHA1 Message Date
Daniel Bulant
52a2ea6904
basic metrics 2026-04-25 12:32:18 +02:00
Daniel Bulant
1f5861fa72
disable auto update 2026-04-25 11:31:16 +02:00
Daniel Bulant
b038e2ff25
opencode config 2026-04-25 10:48:43 +02:00
Daniel Bulant
81a32d1f38
add cuda cache 2026-04-23 23:38:46 +02:00
10 changed files with 307 additions and 0 deletions

View file

@ -104,6 +104,7 @@ exec-once = fcitx5
exec-once = easyeffects --gapplication-service
exec-once = dms run
exec-once = voxtype daemon
exec-once = openrgb --startminimized
#$swaylock = swaylock --screenshots --clock --indicator --effect-blur 6x6 --fade-in 0.2 --ring-color 4e9dc2 --key-hl-color 71b0ce
$swaylock = hyprlock

View file

@ -0,0 +1,72 @@
{
"$schema": "https://opencode.ai/config.json",
"autoupdate": false,
"mcp": {
"context7": {
"type": "remote",
"url": "https://mcp.context7.com/mcp",
"headers": {
"CONTEXT7_API_KEY": "",
},
"enabled": true,
},
"firecrawl-mcp": {
"type": "local",
"command": ["bun", "x", "firecrawl-mcp"],
"environment": {
"FIRECRAWL_API_KEY": "",
},
},
},
"agent": {
"explore": {
"_model": "github-copilot/gpt-5-mini",
},
"title": {
"_model": "github-copilot/gpt-5-mini",
},
"summary": {
"_model": "github-copilot/gpt-5-mini",
},
},
"provider": {
"lmstudio": {
"npm": "@ai-sdk/openai-compatible",
"name": "LM Studio (local)",
"options": {
"baseURL": "http://127.0.0.1:8080/v1",
},
"models": {
"gemma-4-26B-A4B": {
"name": "Gemma 4 26B A4B",
"limit": {
"context": 100000,
"output": 8192,
},
},
"qwen3.6-35B-A3B": {
"name": "Qwen 3.6 35B A3B",
"limit": {
"context": 100000,
"output": 8192,
},
},
"qwen3.5-9B": {
"name": "Qwen 3.5 9B",
"limit": {
"context": 100000,
"output": 8192,
},
},
"qwen3.5-9B-sushi": {
"name": "Qwen 3.5 9B Sushi coder",
"limit": {
"context": 100000,
"output": 8192,
},
},
},
},
},
}

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
result

View file

@ -0,0 +1,58 @@
{
config,
lib,
pkgs,
...
}:
let
cfg = config.services.llama-swap-exporter;
exporter = pkgs.callPackage ../servers/eisen/llama-swap-exporter/default.nix { };
in
{
options.services.llama-swap-exporter = {
enable = lib.mkEnableOption "llama-swap Prometheus exporter";
port = lib.mkOption {
type = lib.types.port;
default = 9409;
description = "Port for the Prometheus metrics endpoint.";
};
url = lib.mkOption {
type = lib.types.str;
default = "http://localhost:8080/api/metrics";
description = "llama-swap metrics endpoint URL.";
};
interval = lib.mkOption {
type = lib.types.int;
default = 15;
description = "Scrape interval in seconds.";
};
};
config = lib.mkIf cfg.enable {
systemd.services.llama-swap-exporter = {
description = "llama-swap Prometheus exporter";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${exporter}/bin/exporter.py";
Restart = "on-failure";
RestartSec = "5s";
User = "root";
Group = "root";
Environment = [
"PROMETHEUS_PORT=${toString cfg.port}"
"LLAMA_SWAP_URL=${cfg.url}"
"SCRAPE_INTERVAL=${toString cfg.interval}"
];
ReadWritePaths = [ "/tmp" ];
};
};
};
}

View file

@ -21,6 +21,7 @@ let
grafana = 3002;
tolgee = 8200;
# ntfy = 3003;
llama-swap = 8080;
};
internalPorts = {
prometheus-node = 9000;
@ -28,6 +29,7 @@ let
prometheus-sonarr = 9101;
prometheus-radarr = 9102;
prometheus-prowlarr = 9103;
prometheus-llama-swap = 9409;
prometheus = 9090;
};
in
@ -42,6 +44,7 @@ in
imports = [
nix-index-database.nixosModules.nix-index
./hardware-configuration.nix
../../modules/llama-swap-exporter.nix
];
nix = {
@ -105,6 +108,12 @@ in
environmentFile = "/etc/secrets/karakeep.env";
};
llama-swap-exporter = {
enable = true;
url = "http://100.120.15.10:${toString ports.llama-swap}/api/metrics";
port = internalPorts.prometheus-llama-swap;
};
dnsmasq = {
enable = true;
};
@ -185,6 +194,14 @@ in
}
];
}
{
job_name = "llama-swap";
static_configs = [
{
targets = [ "localhost:${toString internalPorts.prometheus-llama-swap}" ];
}
];
}
];
};

View file

@ -0,0 +1,23 @@
{
lib,
pkgs ? import <nixpkgs> { },
...
}:
pkgs.python3Packages.buildPythonApplication {
pname = "llama-swap-exporter";
version = "0.1.0";
src = ./src;
build-system = with pkgs.python3Packages; [
setuptools
setuptools-scm
];
pyproject = true;
meta = {
description = "Prometheus exporter for llama-swap metrics";
license = lib.licenses.mit;
maintainers = [ ];
platforms = lib.platforms.linux;
};
}

View file

@ -0,0 +1,114 @@
#!/usr/bin/env python
"""Prometheus exporter for llama-swap metrics endpoint."""
import json
import os
import time
import urllib.request
from http.server import BaseHTTPRequestHandler, HTTPServer
LLAMA_SWAP_URL = os.environ.get("LLAMA_SWAP_URL", "http://localhost:8080/metrics")
PROMETHEUS_PORT = int(os.environ.get("PROMETHEUS_PORT", "9409"))
SCRAPE_INTERVAL = int(os.environ.get("SCRAPE_INTERVAL", "15"))
last_metrics = {}
last_scrape_time = 0
def scrape_llama_swap():
try:
req = urllib.request.Request(LLAMA_SWAP_URL)
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read().decode())
if not data:
print("No data found when scraping (request successful, empty data)")
return {}
entry = data[-1]
return {
"llama_cache_tokens": float(entry.get("cache_tokens", 0)),
"llama_input_tokens": float(entry.get("input_tokens", 0)),
"llama_output_tokens": float(entry.get("output_tokens", 0)),
"llama_prompt_per_second": entry.get("prompt_per_second", 0),
"llama_tokens_per_second": entry.get("tokens_per_second", 0),
"llama_duration_ms": float(entry.get("duration_ms", 0)),
"llama_model": entry.get("model", "unknown"),
"llama_has_capture": 1 if entry.get("has_capture") else 0,
}
except Exception as e:
print(f"Error scraping llama-swap: {e}")
return {}
def format_metrics(metrics):
if not metrics:
metrics = {}
lines = []
model = metrics.get("llama_model", "unknown")
for name, value in metrics.items():
if name == "llama_model":
continue
if "second" in name:
lines.append(f"# HELP {name} Rate from llama-swap")
lines.append(f"# TYPE {name} gauge")
lines.append(f'{name}{{model="{model}"}} {value}')
elif "tokens" in name:
lines.append(f"# HELP {name} Total tokens from llama-swap")
lines.append(f"# TYPE {name} gauge")
lines.append(f'{name}{{model="{model}"}} {value}')
elif "duration" in name:
value_s = value / 1000.0
lines.append("# HELP llama_duration_seconds Inference duration")
lines.append("# TYPE llama_duration_seconds gauge")
lines.append(f'llama_duration_seconds{{model="{model}"}} {value_s}')
elif name == "llama_has_capture":
lines.append("# HELP llama_has_capture Whether capture is available")
lines.append("# TYPE llama_has_capture gauge")
lines.append(f'llama_has_capture{{model="{model}"}} {value}')
lines.append(
"# HELP llama_last_scrape_timestamp_seconds When metrics were last scraped"
)
lines.append("# TYPE llama_last_scrape_timestamp_seconds gauge")
lines.append(f"llama_last_scrape_timestamp_seconds {time.time()}")
return "\n".join(lines) + "\n"
last_metrics = None
class MetricsHandler(BaseHTTPRequestHandler):
def do_GET(self):
global last_scrape_time, last_metrics
if time.time() - last_scrape_time > SCRAPE_INTERVAL:
last_metrics = scrape_llama_swap()
last_scrape_time = time.time()
if self.path == "/metrics":
self.send_response(200)
self.send_header("Content-Type", "text/plain; charset=utf-8")
self.end_headers()
self.wfile.write(format_metrics(last_metrics).encode())
elif self.path == "/health":
self.send_response(200)
self.send_header("Content-Type", "text/plain")
self.end_headers()
self.wfile.write(b"OK")
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
pass
if __name__ == "__main__":
print(f"Starting llama-swap exporter on port {PROMETHEUS_PORT}")
print(f"Scraping llama-swap from: {LLAMA_SWAP_URL}")
server = HTTPServer(("0.0.0.0", PROMETHEUS_PORT), MetricsHandler)
server.serve_forever()
else:
print("Exporter loaded")

View file

@ -0,0 +1,10 @@
from setuptools import find_packages, setup
setup(
name="llama-swap-exporter",
version="1.0",
# Modules to import from other scripts:
packages=find_packages(),
# Executables
scripts=["exporter.py"],
)

View file

@ -1,5 +1,7 @@
{
pkgs,
lib,
config,
...
}:
@ -113,7 +115,9 @@ in
];
services.llama-swap = {
enable = true;
openFirewall = true;
settings = {
# listen = "0.0.0.0:8080";
macros = {
llama = ''
${pkgs.lib.getExe' llama-cpp "llama-server"} \
@ -161,6 +165,11 @@ in
DynamicUser = pkgs.lib.mkForce false;
User = pkgs.lib.mkForce "dan";
Group = pkgs.lib.mkForce "users"; # or dan's primary group
ExecStart = lib.mkForce ''
${lib.getExe pkgs.llama-swap} --listen 0.0.0.0:${toString config.services.llama-swap.port} --config ${
(pkgs.formats.yaml { }).generate "config.yaml" config.services.llama-swap.settings
}
'';
};
};
hardware.nvidia = {

View file

@ -196,6 +196,7 @@ in
"https://cache.garnix.io"
"https://rusic.cachix.org"
"https://cuda-maintainers.cachix.org"
"https://cache.nixos-cuda.org"
];
trusted-public-keys = [
"hyprland.cachix.org-1:a7pgxzMz7+chwVL3/pzj6jIBMioiJM7ypFP8PwtkuGc="
@ -206,6 +207,7 @@ in
"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
"rusic.cachix.org-1:WXMpGpamblLUiJtcoxBxGGGGwIcWxGPJBUxarLiqWmw="
"cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E="
"cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M="
];
};