mirror of
https://github.com/danbulant/dotfiles
synced 2026-05-19 04:18:55 +00:00
236 lines
6.7 KiB
Nix
236 lines
6.7 KiB
Nix
{
|
|
pkgs,
|
|
lib,
|
|
config,
|
|
...
|
|
}:
|
|
|
|
let
|
|
llama-cpp = (
|
|
(pkgs.llama-cpp.override {
|
|
cudaSupport = true;
|
|
rocmSupport = false;
|
|
metalSupport = false;
|
|
blasSupport = true;
|
|
}).overrideAttrs
|
|
(prevAttrs: rec {
|
|
preConfigure = ''
|
|
export NIX_ENFORCE_NO_NATIVE=0
|
|
${prevAttrs.preConfigure or ""}
|
|
'';
|
|
version = "8999";
|
|
src = pkgs.fetchFromGitHub {
|
|
owner = "ggml-org";
|
|
repo = "llama.cpp";
|
|
tag = "b${version}";
|
|
hash = "sha256-EgJ3Die/WpVm9dtQ2kwXoV4RAWNY9x7lT4wun79qqCI=";
|
|
leaveDotGit = true;
|
|
postFetch = ''
|
|
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
|
find "$out" -name .git -print0 | xargs -0 rm -rf
|
|
'';
|
|
};
|
|
npmDepsHash = "sha256-k62LIbyY2DXvs7XXbX0lNPiYxuYzeJUyQtS4eA+68f8=";
|
|
cmakeFlags = with pkgs.lib; [
|
|
# -march=native is non-deterministic; override with platform-specific flags if needed
|
|
(cmakeBool "GGML_NATIVE" true)
|
|
(cmakeBool "LLAMA_BUILD_EXAMPLES" false)
|
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
|
(cmakeBool "LLAMA_BUILD_TESTS" false)
|
|
(cmakeBool "LLAMA_OPENSSL" true)
|
|
(cmakeBool "BUILD_SHARED_LIBS" true)
|
|
# (cmakeBool "GGML_BLAS" false)
|
|
(cmakeBool "GGML_LTO" true)
|
|
(cmakeBool "GGML_CLBLAST" true)
|
|
(cmakeBool "GGML_CUDA" true)
|
|
(cmakeBool "GGML_CUDA_GRAPHS" true)
|
|
(cmakeBool "GGML_CUDA_F16" true)
|
|
(cmakeBool "GGML_CUDA_FA_ALL_QUANTS" true)
|
|
# (cmakeBool "GGML_HIP" false)
|
|
# (cmakeBool "GGML_METAL" false)
|
|
# (cmakeBool "GGML_RPC" false)
|
|
# (cmakeBool "GGML_VULKAN" false)
|
|
(cmakeFeature "LLAMA_BUILD_NUMBER" "8667")
|
|
(cmakeFeature "CMAKE_CUDA_ARCHITECTURES" "120")
|
|
];
|
|
})
|
|
);
|
|
in
|
|
{
|
|
services.hardware.openrgb.enable = true;
|
|
# ssh -R (remote port forward) to this server should listen publicly
|
|
services.openssh.settings.GatewayPorts = "yes";
|
|
boot = {
|
|
kernelParams = [
|
|
# attempt to fix nvidia perf
|
|
"nvidia_drm.fbdev=1"
|
|
"nvidia_drm.modeset=1"
|
|
"module_blacklist=i915"
|
|
"delayacct"
|
|
"initcall_blacklist=sysfb_init"
|
|
#"quiet"
|
|
#"splash"
|
|
"boot.shell_on_fail"
|
|
"loglevel=3"
|
|
"rd.systemd.show_status=false"
|
|
"rd.udev.log_level=3"
|
|
"udev.log_priority=3"
|
|
];
|
|
};
|
|
|
|
# vr
|
|
# services.monado = {
|
|
# enable = false;
|
|
# defaultRuntime = true; # Register as default OpenXR runtime
|
|
# };
|
|
# systemd.user.services.monado.environment = {
|
|
# STEAMVR_LH_ENABLE = "1";
|
|
# XRT_COMPOSITOR_COMPUTE = "1";
|
|
# WMR_HANDTRACKING = "0";
|
|
# VIT_SYSTEM_LIBRARY_PATH = "${pkgs.basalt-monado}/lib/libbasalt.so";
|
|
# };
|
|
programs.steam = {
|
|
enable = true;
|
|
package = pkgs.steam.override {
|
|
extraProfile = ''
|
|
# Fixes timezones on VRChat
|
|
unset TZ
|
|
# Allows Monado/WiVRn to be used
|
|
export PRESSURE_VESSEL_IMPORT_OPENXR_1_RUNTIMES=1
|
|
'';
|
|
};
|
|
};
|
|
|
|
services.paseo = {
|
|
enable = true;
|
|
relay.enable = false;
|
|
user = "dan";
|
|
group = "users";
|
|
port = 5656;
|
|
openFirewall = true;
|
|
};
|
|
|
|
programs.kdeconnect.enable = true;
|
|
|
|
hardware.cpu.amd.updateMicrocode = true;
|
|
|
|
hardware.graphics = {
|
|
enable = true;
|
|
# package = unstable-pkgs.mesa.drivers;
|
|
# Steam support
|
|
enable32Bit = true;
|
|
# package32 = unstable-pkgs.pkgsi686Linux.mesa.drivers;
|
|
extraPackages = with pkgs; [
|
|
nvidia-vaapi-driver
|
|
];
|
|
};
|
|
environment.systemPackages = with pkgs; [
|
|
android-studio-full
|
|
nvitop
|
|
# basalt-monado
|
|
cudaPackages.cuda_nvcc
|
|
llama-cpp
|
|
];
|
|
services.llama-swap = {
|
|
enable = true;
|
|
openFirewall = true;
|
|
settings = {
|
|
# listen = "0.0.0.0:8080";
|
|
macros = {
|
|
llama = ''
|
|
${pkgs.lib.getExe' llama-cpp "llama-server"} \
|
|
--port ${"\${PORT}"} \
|
|
--alias "unsloth/qwen" \
|
|
--no-webui \
|
|
--ctx-size 131072 \
|
|
--fit on --fit-ctx 131072 --fit-target 256 \
|
|
--temp 1.0 --top-p 0.95 --top-k 64 \
|
|
--repeat-penalty 1.0 \
|
|
-ctk q8_0 -ctv q8_0 \
|
|
--flash-attn on \
|
|
--batch-size 1024 --ubatch-size 512 \
|
|
--threads 12 --threads-batch 12 \
|
|
--no-mmap --mlock \
|
|
--parallel 1 --prio 2 --no-warmup --jinja
|
|
'';
|
|
models_dir = "\${env.HOME}/models";
|
|
};
|
|
globalTTL = 3600;
|
|
models = {
|
|
# qwen3-embedding-8b = {
|
|
# };
|
|
# "qwen3-embedding-0.6" = { };
|
|
"qwen3.6-35B-A3B" = {
|
|
cmd = "\${llama} -m /home/dan/.lmstudio/models/unsloth/Qwen3.6-35B-A3B-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf";
|
|
};
|
|
"gemma-4-26B-A4B" = {
|
|
cmd = "\${llama} -m /home/dan/.lmstudio/models/lmstudio-community/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-Q4_K_M.gguf";
|
|
};
|
|
"qwen3.5-9B" = {
|
|
cmd = "\${llama} -m /home/dan/.lmstudio/models/lmstudio-community/Qwen3.5-9B-GGUF/Qwen3.5-9B-Q4_K_M.gguf";
|
|
};
|
|
"qwen3.5-9B-sushi" = {
|
|
cmd = "\${llama} -m /home/dan/.lmstudio/models/bigatuna/Qwen3.5-9b-Sushi-Coder-RL-GGUF/Qwen3.5-9b-Sushi-Coder-RL.Q4_K_M.gguf";
|
|
};
|
|
};
|
|
};
|
|
};
|
|
systemd.services.llama-swap = {
|
|
environment = {
|
|
HOME = "/home/dan";
|
|
};
|
|
serviceConfig = {
|
|
ProtectHome = pkgs.lib.mkForce false;
|
|
DynamicUser = pkgs.lib.mkForce false;
|
|
User = pkgs.lib.mkForce "dan";
|
|
Group = pkgs.lib.mkForce "users"; # or dan's primary group
|
|
ExecStart = lib.mkForce ''
|
|
${lib.getExe pkgs.llama-swap} --listen 0.0.0.0:${toString config.services.llama-swap.port} --config ${
|
|
(pkgs.formats.yaml { }).generate "config.yaml" config.services.llama-swap.settings
|
|
}
|
|
'';
|
|
};
|
|
};
|
|
|
|
services.nix-serve = {
|
|
enable = true;
|
|
secretKeyFile = "/var/secrets/cache-private-key.pem";
|
|
};
|
|
|
|
services.caddy = {
|
|
enable = true;
|
|
|
|
virtualHosts = {
|
|
"llama.fern.danbulant.cloud:80" = {
|
|
extraConfig = ''
|
|
reverse_proxy http://localhost:${toString config.services.llama-swap.port}
|
|
'';
|
|
};
|
|
"nix.fern.danbulant.cloud:80" = {
|
|
extraConfig = ''
|
|
reverse_proxy http://localhost:${toString config.services.nix-serve.port}
|
|
'';
|
|
};
|
|
};
|
|
};
|
|
|
|
nix.optimise = {
|
|
automatic = true;
|
|
persistent = true;
|
|
};
|
|
nix.gc = {
|
|
automatic = true;
|
|
persistent = true;
|
|
};
|
|
|
|
hardware.nvidia = {
|
|
open = true;
|
|
modesetting.enable = true;
|
|
# powerManagement.enable = true;
|
|
nvidiaSettings = true;
|
|
};
|
|
services.xserver.videoDrivers = [ "nvidia" ];
|
|
# powerManagement.enable = true;
|
|
hardware.nvidia-container-toolkit.enable = true;
|
|
virtualisation.docker.daemon.settings.features.cdi = true;
|
|
}
|