diff --git a/analysis/collect_network_libraries.py b/analysis/collect_network_libraries.py index b3da0f8..8c3599f 100644 --- a/analysis/collect_network_libraries.py +++ b/analysis/collect_network_libraries.py @@ -73,6 +73,131 @@ CONTAINER_ROOTS = [ GITHUB_RE = re.compile(r"github\.com[:/](?P[^/]+)/(?P[^/#?]+?)(?:\.git|/|#|\?|$)") STORE_HASH_PREFIX_RE = re.compile(r"^[0-9a-z]{32}-(?P.+)$") +COMMON_UPSTREAMS = { + "acl": ("https://git.savannah.nongnu.org/cgit/acl.git", "C"), + "attr": ("https://git.savannah.nongnu.org/cgit/attr.git", "C"), + "avahi": ("https://github.com/avahi/avahi", "C"), + "bluez": ("https://git.kernel.org/pub/scm/bluetooth/bluez.git", "C"), + "bzip2": ("https://sourceware.org/git/bzip2.git", "C"), + "curl": ("https://github.com/curl/curl", "C"), + "dbus": ("https://gitlab.freedesktop.org/dbus/dbus", "C"), + "double-conversion": ("https://github.com/google/double-conversion", "C++"), + "ffmpeg": ("https://git.ffmpeg.org/ffmpeg.git", "C"), + "fuse": ("https://github.com/libfuse/libfuse", "C"), + "glib": ("https://gitlab.gnome.org/GNOME/glib", "C"), + "glibc": ("https://sourceware.org/git/glibc.git", "C"), + "graphviz": ("https://gitlab.com/graphviz/graphviz", "C"), + "gtk+3": ("https://gitlab.gnome.org/GNOME/gtk", "C"), + "libarchive": ("https://github.com/libarchive/libarchive", "C"), + "libbpf": ("https://github.com/libbpf/libbpf", "C"), + "libbsd": ("https://gitlab.freedesktop.org/libbsd/libbsd", "C"), + "libcbor": ("https://github.com/PJK/libcbor", "C"), + "libedit": ("https://www.thrysoee.dk/editline/", "C"), + "libfido2": ("https://github.com/Yubico/libfido2", "C"), + "libmnl": ("https://git.netfilter.org/libmnl", "C"), + "libnftnl": ("https://git.netfilter.org/libnftnl", "C"), + "libpcap": ("https://github.com/the-tcpdump-group/libpcap", "C"), + "libuv": ("https://github.com/libuv/libuv", "C"), + "libxml2": ("https://gitlab.gnome.org/GNOME/libxml2", "C"), + "libxslt": ("https://gitlab.gnome.org/GNOME/libxslt", "C"), + "ncurses": ("https://invisible-island.net/ncurses/", "C"), + "oniguruma": ("https://github.com/kkos/oniguruma", "C"), + "openssl": ("https://github.com/openssl/openssl", "C"), + "pcre2": ("https://github.com/PCRE2Project/pcre2", "C"), + "pcsclite": ("https://pcsclite.apdu.fr/", "C"), + "rhash": ("https://github.com/rhash/RHash", "C"), + "sqlite": ("https://sqlite.org/src", "C"), + "systemd": ("https://github.com/systemd/systemd", "C"), + "xz": ("https://git.tukaani.org/xz.git", "C"), + "zlib": ("https://github.com/madler/zlib", "C"), +} + +NUGET_NAME_PREFIXES = ( + "AngleSharp", + "AspNetCore", + "Azure.", + "BouncyCastle", + "Castle.", + "Dapper", + "DryIoc", + "Fluent", + "HarfBuzzSharp", + "ICU4N", + "Jellyfin.", + "MailKit", + "MetaBrainz.", + "Microsoft.", + "Mono.", + "NETStandard.", + "Newtonsoft.", + "NLog", + "NodaTime", + "NuGet.", + "NUnit", + "RestSharp", + "Serilog", + "Servarr.", + "SkiaSharp", + "SQLitePCLRaw", + "StyleCop.", + "System.", + "runtime.", +) + +NUGET_REPO_OVERRIDES = { + "AngleSharp": "https://github.com/AngleSharp/AngleSharp", + "AngleSharp.Xml": "https://github.com/AngleSharp/AngleSharp.Xml", + "BitFaster.Caching": "https://github.com/bitfaster/BitFaster.Caching", + "BlurHashSharp": "https://github.com/MarkusPalcer/BlurHashSharp", + "BlurHashSharp.SkiaSharp": "https://github.com/MarkusPalcer/BlurHashSharp", + "BouncyCastle.Cryptography": "https://github.com/bcgit/bc-csharp", + "Castle.Core": "https://github.com/castleproject/Core", + "Dapper": "https://github.com/DapperLib/Dapper", + "DryIoc.dll": "https://github.com/dadhi/DryIoc", + "DryIoc.Microsoft.DependencyInjection": "https://github.com/dadhi/DryIoc", + "FluentAssertions": "https://github.com/fluentassertions/fluentassertions", + "FluentMigrator": "https://github.com/fluentmigrator/fluentmigrator", + "FluentMigrator.Abstractions": "https://github.com/fluentmigrator/fluentmigrator", + "FluentMigrator.Extensions.Postgres": "https://github.com/fluentmigrator/fluentmigrator", + "FluentMigrator.Runner.Core": "https://github.com/fluentmigrator/fluentmigrator", + "FluentMigrator.Runner.Postgres": "https://github.com/fluentmigrator/fluentmigrator", + "FluentMigrator.Runner.SQLite": "https://github.com/fluentmigrator/fluentmigrator", + "FluentValidation": "https://github.com/FluentValidation/FluentValidation", + "HarfBuzzSharp": "https://github.com/mono/SkiaSharp", + "HarfBuzzSharp.NativeAssets.Linux": "https://github.com/mono/SkiaSharp", + "HarfBuzzSharp.NativeAssets.Win32": "https://github.com/mono/SkiaSharp", + "HarfBuzzSharp.NativeAssets.macOS": "https://github.com/mono/SkiaSharp", + "ICU4N": "https://github.com/NightOwl888/ICU4N", + "ICU4N.Transliterator": "https://github.com/NightOwl888/ICU4N", + "MailKit": "https://github.com/jstedfast/MailKit", + "MetaBrainz.Common": "https://github.com/Zastai/MetaBrainz.Common", + "MetaBrainz.Common.Json": "https://github.com/Zastai/MetaBrainz.Common.Json", + "MetaBrainz.MusicBrainz": "https://github.com/Zastai/MetaBrainz.MusicBrainz", + "Microsoft.Data.SqlClient": "https://github.com/dotnet/SqlClient", + "Microsoft.Data.SqlClient.SNI.runtime": "https://github.com/dotnet/SqlClient", + "Microsoft.Data.Sqlite": "https://github.com/dotnet/efcore", + "Microsoft.Data.Sqlite.Core": "https://github.com/dotnet/efcore", + "Newtonsoft.Json": "https://github.com/JamesNK/Newtonsoft.Json", + "NLog": "https://github.com/NLog/NLog", + "NodaTime": "https://github.com/nodatime/nodatime", + "NUnit": "https://github.com/nunit/nunit", + "NUnit3TestAdapter": "https://github.com/nunit/nunit3-vs-adapter", + "RestSharp": "https://github.com/restsharp/RestSharp", + "RestSharp.Serializers.SystemTextJson": "https://github.com/restsharp/RestSharp", + "Sentry": "https://github.com/getsentry/sentry-dotnet", + "Serilog": "https://github.com/serilog/serilog", + "SkiaSharp": "https://github.com/mono/SkiaSharp", + "SkiaSharp.HarfBuzz": "https://github.com/mono/SkiaSharp", + "SkiaSharp.NativeAssets.Linux": "https://github.com/mono/SkiaSharp", + "SkiaSharp.NativeAssets.Win32": "https://github.com/mono/SkiaSharp", + "SkiaSharp.NativeAssets.macOS": "https://github.com/mono/SkiaSharp", + "SQLitePCLRaw.bundle_e_sqlite3": "https://github.com/ericsink/SQLitePCL.raw", + "SQLitePCLRaw.core": "https://github.com/ericsink/SQLitePCL.raw", + "SQLitePCLRaw.lib.e_sqlite3": "https://github.com/ericsink/SQLitePCL.raw", + "SQLitePCLRaw.provider.e_sqlite3": "https://github.com/ericsink/SQLitePCL.raw", + "StyleCop.Analyzers": "https://github.com/DotNetAnalyzers/StyleCopAnalyzers", +} + def run(cmd: list[str], *, timeout: int = 120) -> str: proc = subprocess.run( @@ -225,6 +350,64 @@ def infer_language(name: str, env: dict[str, str]) -> str | None: return None +def static_upstream(name: str) -> dict[str, str] | None: + base = re.sub(r"-\d+(?:\.\d+).*$", "", name) + if base in COMMON_UPSTREAMS: + source, language = COMMON_UPSTREAMS[base] + return {"source_link": source, "language": language} + if name.startswith("qt") or name in {"qca", "phonon", "poppler"}: + return {"source_link": f"https://code.qt.io/cgit/qt/{base}.git", "language": "C++"} + if name.startswith("gst-") or name == "gstreamer": + project = "gstreamer" if name == "gstreamer" else base + return {"source_link": f"https://gitlab.freedesktop.org/gstreamer/{project}", "language": "C"} + kde_prefixes = ( + "karchive", + "kauth", + "kbookmarks", + "kcmutils", + "kcodecs", + "kcompletion", + "kconfig", + "kconfigwidgets", + "kcoreaddons", + "kcrash", + "kdbusaddons", + "kdeclarative", + "kded", + "kdnssd", + "kdoctools", + "kfilemetadata", + "kguiaddons", + "ki18n", + "kiconthemes", + "kidletime", + "kio", + "kirigami", + "kitemmodels", + "kitemviews", + "kjobwidgets", + "knotifications", + "kpackage", + "kparts", + "kpeople", + "kpty", + "kservice", + "kstatusnotifieritem", + "ksvg", + "ktextwidgets", + "kwallet", + "kwidgetsaddons", + "kwindowsystem", + "kxmlgui", + "solid", + "sonnet", + "syntax-highlighting", + ) + if base.startswith(kde_prefixes): + return {"source_link": f"https://invent.kde.org/frameworks/{base}", "language": "C++"} + return None + + def github_repo(*values: str | None) -> str | None: for value in values: if not value: @@ -318,7 +501,8 @@ def parse_ecosystem(row: dict[str, Any]) -> tuple[str | None, str | None, str | name = row["library"] version = row.get("version_in_use") or None drv = row.get("drv_path", "") - if ".nupkg" in drv or row["root_name"].lower() in ("jellyfin", "sonarr", "radarr", "prowlarr"): + is_nuget_like = name.startswith(NUGET_NAME_PREFIXES) + if ".nupkg" in drv or is_nuget_like: # The derivation rows have clean name/version; the raw .nupkg rows are # filtered from review but can still be enriched in summary/deps. if not version and ".nupkg" in drv: @@ -339,13 +523,33 @@ def parse_ecosystem(row: dict[str, Any]) -> tuple[str | None, str | None, str | return None, None, None +def apply_ecosystem_overrides(ecosystem: str, package: str, result: dict[str, Any]) -> dict[str, Any]: + if ecosystem == "nuget": + source = NUGET_REPO_OVERRIDES.get(package) + if not source and package.startswith("Microsoft.AspNetCore."): + source = "https://github.com/dotnet/aspnetcore" + if not source and package.startswith("Microsoft.EntityFrameworkCore"): + source = "https://github.com/dotnet/efcore" + if not source and package.startswith("Microsoft.Build"): + source = "https://github.com/dotnet/msbuild" + if not source and package.startswith("Microsoft.Identity.Client"): + source = "https://github.com/AzureAD/microsoft-authentication-library-for-dotnet" + if not source and (package.startswith("Microsoft.") or package.startswith("System.") or package.startswith("runtime.")): + source = "https://github.com/dotnet/runtime" + if source: + result["source_link"] = source + result["github_repo"] = github_repo(source) + result.setdefault("language", "C#") + return result + + def release_date_from_pypi(files: list[dict[str, Any]]) -> str | None: dates = [f.get("upload_time_iso_8601") for f in files if f.get("upload_time_iso_8601")] return min(dates) if dates else None def enrich_ecosystem(ecosystem: str, package: str, version: str | None, cache: dict[str, Any]) -> dict[str, Any]: - key = f"{ecosystem}:{package}:{version or ''}" + key = ecosystem_cache_key(ecosystem, package, version) if key in cache: return cache[key] result: dict[str, Any] = {"ecosystem": ecosystem} @@ -435,12 +639,19 @@ def enrich_ecosystem(ecosystem: str, package: str, version: str | None, cache: d "latest_release_date": data.get("date"), } ) + if str(data.get("version")) == str(version): + result["release_date"] = data.get("date") result["github_repo"] = github_repo(result.get("source_link")) + result = apply_ecosystem_overrides(ecosystem, package, result) cache[key] = result return result +def ecosystem_cache_key(ecosystem: str | None, package: str | None, version: str | None) -> str: + return f"{ecosystem}:{package}:{version or ''}" + + def enrich_github(repo: str, cache: dict[str, Any], sleep: float) -> dict[str, Any]: if repo in cache: return cache[repo] @@ -482,6 +693,9 @@ def walk_deps(root: dict[str, Any], all_drvs: dict[str, Any], max_depth: int) -> continue seen.add(dep_key) meta = drv_meta(dep_key, all_drvs) + static = static_upstream(meta["name"]) or {} + source_link = meta["source_link"] or static.get("source_link") + language = meta["language"] or static.get("language") dep_path = path + [meta["name"]] rows.append( { @@ -495,9 +709,9 @@ def walk_deps(root: dict[str, Any], all_drvs: dict[str, Any], max_depth: int) -> "dependency_path": " -> ".join([root["rootName"]] + dep_path), "drv_path": dep_key, "homepage": meta["homepage"], - "source_link": meta["source_link"], - "language": meta["language"], - "github_repo": github_repo(meta["homepage"], meta["source_link"]), + "source_link": source_link, + "language": language, + "github_repo": github_repo(meta["homepage"], source_link), "github_stars": None, "ecosystem": None, "release_date": None, @@ -516,6 +730,16 @@ def write_csv(path: Path, rows: list[dict[str, Any]], fields: list[str]) -> None writer.writerows(rows) +def ecosystem_priority(ecosystem: str | None) -> int: + return { + "cpan": 0, + "npm": 1, + "pypi": 2, + "crates": 3, + "nuget": 4, + }.get(ecosystem or "", 9) + + def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--max-roots", type=int, default=18) @@ -542,6 +766,7 @@ def main() -> int: ecosystem_cache_path = OUT / "ecosystem-cache.json" ecosystem_cache = json.loads(ecosystem_cache_path.read_text()) if ecosystem_cache_path.exists() else {} ecosystem_keys = [] + ecosystem_key_scores: dict[tuple[str | None, str | None, str | None], tuple[int, int, str, str]] = {} ecosystem_rows: dict[tuple[str | None, str | None, str | None], list[dict[str, Any]]] = {} for row in dep_rows: ecosystem, package, version = parse_ecosystem(row) @@ -550,14 +775,30 @@ def main() -> int: ecosystem_rows.setdefault(key, []).append(row) if key not in ecosystem_keys: ecosystem_keys.append(key) - for idx, (ecosystem, package, version) in enumerate(ecosystem_keys[: args.ecosystem_limit], start=1): + review_score = 0 if not noisy_for_review(row) else 1 + score = (review_score, ecosystem_priority(ecosystem), package.lower(), version or "") + if key not in ecosystem_key_scores or score < ecosystem_key_scores[key]: + ecosystem_key_scores[key] = score + ecosystem_keys.sort(key=lambda key: ecosystem_key_scores.get(key, (9, 9, "", ""))) + selected_ecosystem_keys = ecosystem_keys if args.ecosystem_limit < 0 else ecosystem_keys[: args.ecosystem_limit] + for idx, (ecosystem, package, version) in enumerate(selected_ecosystem_keys, start=1): if idx % 25 == 1: - print(f"enriching ecosystem metadata {idx}/{min(len(ecosystem_keys), args.ecosystem_limit)}", file=sys.stderr) + print(f"enriching ecosystem metadata {idx}/{len(selected_ecosystem_keys)}", file=sys.stderr) meta = enrich_ecosystem(ecosystem, package, version, ecosystem_cache) for row in ecosystem_rows.get((ecosystem, package, version), []): row.update({k: v for k, v in meta.items() if v is not None and (not row.get(k) or k in {"ecosystem", "release_date"})}) if idx % 25 == 0: write_json_atomic(ecosystem_cache_path, ecosystem_cache) + selected_ecosystem_key_set = set(selected_ecosystem_keys) + cached_only_keys = [ + key + for key in ecosystem_keys + if ecosystem_cache_key(*key) in ecosystem_cache and key not in selected_ecosystem_key_set + ] + for ecosystem, package, version in cached_only_keys: + meta = ecosystem_cache[ecosystem_cache_key(ecosystem, package, version)] + for row in ecosystem_rows.get((ecosystem, package, version), []): + row.update({k: v for k, v in meta.items() if v is not None and (not row.get(k) or k in {"ecosystem", "release_date"})}) write_json_atomic(ecosystem_cache_path, ecosystem_cache) cache_path = OUT / "github-cache.json" diff --git a/analysis/low-star-network-path-packages.md b/analysis/low-star-network-path-packages.md new file mode 100644 index 0000000..8d39079 --- /dev/null +++ b/analysis/low-star-network-path-packages.md @@ -0,0 +1,222 @@ +# Low-Star Packages On Plausible Network/Data Paths + +Generated from `analysis/network-library-review.csv` and GitHub metadata on 2026-05-30. Star counts are GitHub stars at collection time. Dependency paths are Nix derivation/package paths, so they show that a package is reachable from the configured service package closure; they do not prove every library is loaded on every runtime request path. + +Selection criteria: GitHub-backed dependency with relatively low stars, used by a network-facing root (`nix-serve`, `prowlarr`, `jellyfin`, `sonarr`, `radarr`), and plausibly involved in HTTP parsing, socket handling, JSON/XML/HTML parsing, remote metadata parsing, text normalization, database access, or similar externally influenced data handling. + +## Highest Priority + +| Project | Stars | Used by | Version in use | Latest seen | Why it may matter | +| --- | ---: | --- | --- | --- | --- | +| [kazeburo/HTTP-Entity-Parser](https://github.com/kazeburo/HTTP-Entity-Parser) | 5 | `nix-serve` | `0.25` | `0.25` | PSGI-compliant HTTP entity/body parser, directly adjacent to HTTP request handling. | +| [kazuho/p5-http-parser-xs](https://github.com/kazuho/p5-http-parser-xs) | 30 | `nix-serve` | `0.17` | `0.17` | Fast C/XS HTTP parser used through the Perl web stack; low-level parser code is a high-value review target. | +| [shlomif/perl-io-socket-inet6](https://github.com/shlomif/perl-io-socket-inet6) | 0 | `nix-serve` | `2.73` | `2.73` | IPv6 socket support library in the `nix-serve` Perl closure. Socket plumbing is network-path relevant. | +| [AngleSharp/AngleSharp.Xml](https://github.com/AngleSharp/AngleSharp.Xml) | 20 | `prowlarr` | `1.0.0` | `1.0.0` | XML and DTD parser extension for AngleSharp. Prowlarr handles indexer feeds/pages from remote sources. | +| [p5sagit/JSON-MaybeXS](https://github.com/p5sagit/JSON-MaybeXS) | 4 | `nix-serve` | `1.004005` | `1.004008` | JSON backend selection/compatibility module in the HTTP service closure. JSON parsing often receives externally supplied data. | + +## Medium Priority + +| Project | Stars | Used by | Version in use | Latest seen | Notes | +| --- | ---: | --- | --- | --- | --- | +| [madsen/io-html](https://github.com/madsen/io-html) | 3 | `nix-serve` | `1.004` | `1.004` | Perl module for opening files with automatic charset detection. Less directly exposed than HTTP parsers, but charset detection can be input-sensitive. | +| [Zastai/MetaBrainz.MusicBrainz](https://github.com/Zastai/MetaBrainz.MusicBrainz) | 41 | `jellyfin` | `6.1.0` | `v8.0.1` | Native .NET implementation of MusicBrainz client/data model. Jellyfin can ingest remote metadata responses. | +| [Zastai/MetaBrainz.Common.Json](https://github.com/Zastai/MetaBrainz.Common.Json) | 1 | `jellyfin` | `6.0.2` | `v7.2.0` | JSON helper classes for MetaBrainz packages. Relevant to parsing remote metadata. | +| [Zastai/MetaBrainz.Common](https://github.com/Zastai/MetaBrainz.Common) | 0 | `jellyfin` | `3.0.0` | `v4.1.1` | Shared classes for MetaBrainz packages. Low stars and in the metadata path, but not itself a parser entry point. | +| [NightOwl888/ICU4N](https://github.com/NightOwl888/ICU4N) | 44 | `jellyfin` | `60.1.0-alpha.356` | `60.1.0-alpha.439` | Unicode/text normalization and transliteration library. Useful to review because media metadata and filenames are attacker-influenced in many deployments. | + +## Lower Priority But Network-Adjacent + +| Project | Stars | Used by | Version in use | Latest seen | Notes | +| --- | ---: | --- | --- | --- | --- | +| [ericsink/SQLitePCL.raw](https://github.com/ericsink/SQLitePCL.raw) | 609 | `jellyfin` | `2.1.10` | `v3.0.3` | Low-level SQLite access layer. Not a network parser, but stores/query data derived from remote/user-controlled metadata. | +| [dotnet/SqlClient](https://github.com/dotnet/SqlClient) | 974 | `sonarr`, `radarr` | `2.1.7`, `6.1.1`, SNI runtime `2.1.1`, `6.0.2` | `v7.0.1` | SQL Server connectivity. Relevant if these apps are configured to use SQL Server or process DB connection data, but less relevant for the default SQLite-style local deployment path. | + +## Candidate Details + +### kazeburo/HTTP-Entity-Parser + +Project: [https://github.com/kazeburo/HTTP-Entity-Parser](https://github.com/kazeburo/HTTP-Entity-Parser) + +Description: PSGI compliant HTTP Entity Parser. + +Used by: `nix-serve` + +Dependency path: `nix-serve -> perl-5.42.0-env -> HTTP-Entity-Parser` + +Version in use: `0.25` + +Latest/release data: latest `0.25`, latest release date `2020-11-28T02:35:43` + +Other data: Perl, 5 stars, 8 forks, 2 open issues, not archived, last pushed `2020-11-28T02:35:43Z`, license `NOASSERTION` + +Assessment: Directly relevant to HTTP body parsing for `nix-serve`; worth manual review if `nix-serve` is publicly exposed through Caddy. + +### kazuho/p5-http-parser-xs + +Project: [https://github.com/kazuho/p5-http-parser-xs](https://github.com/kazuho/p5-http-parser-xs) + +Description: Fast HTTP parser. + +Used by: `nix-serve` + +Dependency path: `nix-serve -> perl-5.42.0-env -> HTTP-Parser-XS` + +Version in use: `0.17` + +Latest/release data: latest `0.17`, latest release date `2014-12-15T07:53:06` + +Other data: C, 30 stars, 11 forks, 9 open issues, not archived, last pushed `2024-06-13T04:08:54Z` + +Assessment: Highest-value low-star item because it is C parser code close to HTTP request parsing. + +### shlomif/perl-io-socket-inet6 + +Project: [https://github.com/shlomif/perl-io-socket-inet6](https://github.com/shlomif/perl-io-socket-inet6) + +Description: CPAN IPv6 socket module mirror/repository. + +Used by: `nix-serve` + +Dependency path: `nix-serve -> perl-5.42.0-env -> IO-Socket-INET6` + +Version in use: `2.73` + +Latest/release data: latest `2.73`, latest release date `2021-12-10T07:31:35` + +Other data: Perl, 0 stars, 1 fork, 0 open issues, not archived, last pushed `2021-12-10T07:31:26Z`, license `NOASSERTION` + +Assessment: Network plumbing dependency. Lower parser risk than HTTP parsers, but the star count is effectively zero. + +### AngleSharp/AngleSharp.Xml + +Project: [https://github.com/AngleSharp/AngleSharp.Xml](https://github.com/AngleSharp/AngleSharp.Xml) + +Description: Library adding XML and DTD parsing capabilities to AngleSharp. + +Used by: `prowlarr` + +Dependency path: `prowlarr -> AngleSharp.Xml` + +Version in use: `1.0.0` + +Latest/release data: latest `1.0.0`, release date `2023-01-15T12:45:03.84Z`, latest release date `2023-01-15T12:45:04Z` + +Other data: C#, 20 stars, 6 forks, 5 open issues, not archived, last pushed `2025-01-26T20:54:26Z`, license `MIT` + +Assessment: XML/DTD parsing in an indexer-facing service is plausibly exposed to remote feed/page content. Worth checking DTD/external entity behavior and parser limits. + +### p5sagit/JSON-MaybeXS + +Project: [https://github.com/p5sagit/JSON-MaybeXS](https://github.com/p5sagit/JSON-MaybeXS) + +Description: JSON backend compatibility/selecting module for Perl. + +Used by: `nix-serve` + +Dependency path: `nix-serve -> perl-5.42.0-env -> JSON-MaybeXS` + +Version in use: `1.004005` + +Latest/release data: latest `1.004008`, latest release date `2024-08-10T20:23:23` + +Other data: Perl, 4 stars, 6 forks, 1 open issue, not archived, last pushed `2024-12-27T11:55:18Z` + +Assessment: Probably a wrapper rather than the parser implementation itself, but it is in a web service closure and touches JSON handling. + +### madsen/io-html + +Project: [https://github.com/madsen/io-html](https://github.com/madsen/io-html) + +Description: Perl module that opens a file and performs automatic charset detection. + +Used by: `nix-serve` + +Dependency path: `nix-serve -> perl-5.42.0-env -> IO-HTML` + +Version in use: `1.004` + +Latest/release data: latest `1.004`, latest release date `2020-09-26T16:52:29` + +Other data: Perl, 3 stars, 1 fork, 0 open issues, not archived, last pushed `2020-09-26T16:51:31Z` + +Assessment: Charset detection can be input-sensitive, but this is lower priority unless `nix-serve` uses it on request-supplied content. + +### Zastai MetaBrainz packages + +Projects: [MetaBrainz.Common](https://github.com/Zastai/MetaBrainz.Common), [MetaBrainz.Common.Json](https://github.com/Zastai/MetaBrainz.Common.Json), [MetaBrainz.MusicBrainz](https://github.com/Zastai/MetaBrainz.MusicBrainz) + +Descriptions: Shared classes, JSON helpers, and native .NET implementation of libmusicbrainz. + +Used by: `jellyfin` + +Dependency paths: `jellyfin -> MetaBrainz.Common`, `jellyfin -> MetaBrainz.Common.Json`, `jellyfin -> MetaBrainz.MusicBrainz` + +Versions in use: `3.0.0`, `6.0.2`, `6.1.0` + +Latest/release data: latest `v4.1.1`, `v7.2.0`, `v8.0.1`; latest release dates in 2026 for all three + +Other data: C#, 0/1/41 stars, 0/0/10 forks, not archived, MIT license + +Assessment: These are in Jellyfin metadata handling. They are not direct socket parsers, but they process metadata structures that can originate from remote services or media tags. + +### NightOwl888/ICU4N + +Project: [https://github.com/NightOwl888/ICU4N](https://github.com/NightOwl888/ICU4N) + +Description: International Components for Unicode for .NET. + +Used by: `jellyfin` + +Dependency paths: `jellyfin -> ICU4N`, `jellyfin -> ICU4N.Transliterator` + +Version in use: `60.1.0-alpha.356` + +Latest/release data: latest `60.1.0-alpha.439` for `ICU4N`; latest `60.1.0-alpha.356` for `ICU4N.Transliterator`; NuGet release dates were not exposed in the cached data + +Other data: C#, 44 stars, 8 forks, 22 open issues, not archived, last pushed `2026-05-08T23:25:53Z`, license `Apache-2.0` + +Assessment: Text normalization/transliteration libraries can receive untrusted metadata, filenames, subtitles, and tags. Alpha-version package in use is notable. + +### ericsink/SQLitePCL.raw + +Project: [https://github.com/ericsink/SQLitePCL.raw](https://github.com/ericsink/SQLitePCL.raw) + +Description: Portable Class Library for low-level raw access to SQLite. + +Used by: `jellyfin` + +Dependency paths: `jellyfin -> SQLitePCLRaw.core`, `jellyfin -> SQLitePCLRaw.bundle_e_sqlite3`, `jellyfin -> SQLitePCLRaw.lib.e_sqlite3`, `jellyfin -> SQLitePCLRaw.provider.e_sqlite3` + +Version in use: `2.1.10` + +Latest/release data: latest `v3.0.3`, release dates around `2024-09-11`, latest release date `2026-05-07T17:28:57Z` + +Other data: C#, 609 stars, 134 forks, 36 open issues, not archived, last pushed `2026-05-07T17:23:42Z`, license `Apache-2.0` + +Assessment: Not a network parser, but stores and queries data derived from network/media metadata. Lower priority than parser/socket libraries. + +### dotnet/SqlClient + +Project: [https://github.com/dotnet/SqlClient](https://github.com/dotnet/SqlClient) + +Description: Microsoft.Data.SqlClient provides database connectivity to SQL Server for .NET applications. + +Used by: `sonarr`, `radarr` + +Dependency paths: `sonarr -> Microsoft.Data.SqlClient`, `radarr -> Microsoft.Data.SqlClient`, and corresponding `Microsoft.Data.SqlClient.SNI.runtime` rows + +Versions in use: `2.1.7`, `6.1.1`, SNI runtime `2.1.1`, `6.0.2` + +Latest/release data: latest `v7.0.1`, latest release date `2026-04-24T19:34:24Z` + +Other data: C#, 974 stars, 330 forks, 276 open issues, not archived, last pushed `2026-05-30T11:30:25Z`, license `MIT` + +Assessment: Network-adjacent database client. Relevant mainly if Sonarr/Radarr are configured to use SQL Server or expose database connection handling. + +## Low-Star Items Not Prioritized + +These appeared in the low-star scan but are less plausibly on a network/data parsing path: [garu/data-dump](https://github.com/garu/data-dump), [garu/Clone](https://github.com/garu/Clone), Serilog extension/sink packages, NUnit test adapters, and `buildcatrust`. They may still matter for build integrity or diagnostics, but they are not obvious request/response parser or socket-facing dependencies from the current dependency paths. + +## Suggested Follow-Up + +Review `nix-serve` first because it is exposed through Caddy and has several very low-star Perl HTTP/socket parser dependencies. Then check `prowlarr` XML/HTML parsing behavior, especially external entity handling and parser size/time limits. Finally, decide whether Jellyfin remote metadata providers are enabled and exposed enough to justify deeper review of the MetaBrainz and ICU4N paths. diff --git a/analysis/nix-serve-security-entry-points.md b/analysis/nix-serve-security-entry-points.md new file mode 100644 index 0000000..0d9599f --- /dev/null +++ b/analysis/nix-serve-security-entry-points.md @@ -0,0 +1,207 @@ +# nix-serve security entry points + +Target deployment: + +- Local: `nix-serve` / Starman directly on `:5000`. +- Public: `nix.fern.danbulant.cloud:80` via Caddy `reverse_proxy http://localhost:${config.services.nix-serve.port}` in `servers/fern/configuration.nix`. + +Primary application code: + +- `analysis/nix-serve/nix-serve.psgi` +- `analysis/p5-http-parser-xs/XS.xs` +- `analysis/p5-http-parser-xs/picohttpparser/picohttpparser.c` +- `analysis/HTTP-Entity-Parser/lib/HTTP/Entity/Parser*.pm` + +## Request Flow + +External request reaches Caddy, then Starman, then the PSGI app. Starman uses `HTTP::Parser::XS` to parse the request line and headers into PSGI env values. `nix-serve.psgi` routes only on `$env->{PATH_INFO}` and ignores method, query string, and request body. + +Application routes in `nix-serve.psgi`: + +- `/nix-cache-info`: static cache metadata. +- `/.narinfo`: maps hash prefix to a store path and returns NAR metadata/signatures. +- `/nar/-.nar`: maps hash prefix, checks NAR hash, then spawns `nix store dump-path -- `. +- `/nar/.nar`: legacy endpoint, maps hash prefix, then spawns `nix store dump-path -- ` without the NAR hash check. +- `/log/-`: constructs `/nix/store/-` and spawns `nix log ` without first proving that the path is valid or present. + +## Confirmed Active Behaviors + +### A. Incomplete `Content-Length` kills a Starman worker + +Starman reads request bodies before dispatching to `nix-serve.psgi`, even though the app ignores bodies. In `Starman/Server.pm:450-462`, a positive `CONTENT_LENGTH` causes `_prepare_env` to read until the declared length is consumed. If the client closes early, it executes `die "Read error: $!\n"` outside an eval around request processing. + +Confirmed behavior: + +- Direct `:5000`: sending `Content-Length: 999999` with a one-byte body and closing replaces one worker process. +- Through Caddy: the same incomplete request to `nix.fern.danbulant.cloud:80` also replaces one Starman worker. +- The master process respawns the worker, so this is a repeatable worker-crash / availability issue rather than a one-shot full service crash. + +Observed worker replacement example: + +```text +before: 2529 2530 2532 2533 1239489 +after: 2530 2532 2533 1239489 1240067 +``` + +Root cause code: + +```perl +elsif (my $cl = $env->{CONTENT_LENGTH}) { + my $buf = Plack::TempBuffer->new($cl); + while ($cl > 0) { + my($chunk, $read) = $get_chunk->(); + if ( !defined $read || $read == 0 ) { + die "Read error: $!\n"; + } + $cl -= $read; + $buf->print($chunk); + } + $env->{'psgi.input'} = $buf->rewind; +} +``` + +### B. Direct Starman accepts invalid `Content-Length` values + +`HTTP::Parser::XS` copies `Content-Length` as a header value and Starman relies on Perl numeric coercion instead of strict decimal validation. + +Parser-level results: + +```text +CL=-1 ret=48 CONTENT_LENGTH=-1 +CL=1x ret=48 CONTENT_LENGTH=1x +CL=1e9 ret=49 CONTENT_LENGTH=1e9 +CL=+1 ret=48 CONTENT_LENGTH=+1 +``` + +Confirmed direct behavior: + +- `Content-Length: -1` to direct `:5000` returns `200 OK` for `/nix-cache-info`. +- `Content-Length: 1x` with a one-byte body to direct `:5000` returns `200 OK`. + +Confirmed Caddy behavior: + +- Caddy rejects these invalid content lengths with `400 Bad Request`, so this is currently direct-port-only unless another frontend forwards such requests. + +### C. `%00` in path actively changes the routed endpoint + +As noted below, `%00` truncates `PATH_INFO`. This is not just parser API behavior: both direct Starman and Caddy route `GET /nix-cache-info%00suffix HTTP/1.1` to the `/nix-cache-info` handler and return `200 OK`. + +Current impact is endpoint confusion rather than data exposure because Caddy has no path-level allow/deny rules and the suffix does not select a protected app route. It would become a bypass if path filtering were added at Caddy or middleware while Starman still receives the raw encoded target. + +### D. Missing `/log/...` returns `200 OK` with an empty body + +Requests for a non-existent valid-looking log path return `200 OK` and an empty body through both direct Starman and Caddy: + +```text +GET /log/00000000000000000000000000000000-test -> HTTP/1.1 200 OK +``` + +This is not data exposure, but it is undesired behavior for clients and monitoring because errors from `nix log` are not converted into HTTP errors. The route streams the child stdout without checking exit status. + +## Candidate Entry Points + +### 1. Percent-decoded `PATH_INFO` before routing + +`HTTP::Parser::XS::parse_http_request` stores the original target in `REQUEST_URI`, then percent-decodes the path portion into `PATH_INFO` before `nix-serve.psgi` sees it: + +- `XS.xs:186-201` +- `nix-serve.psgi:24` + +This makes encoded delimiters and control bytes relevant to app routing. The app regexes are written as if `$path` is a normal textual URL path, but it is already decoded by the server parser. + +Most interesting subcase: `%00`. In `XS.xs:136-144`, decoded values are stored with `newSVpv(decoded, 0)`. `newSVpv(..., 0)` treats the decoded buffer as a C string, so an embedded NUL produced from `%00` truncates the Perl scalar. A request target such as `/nix-cache-info%00suffix` becomes `PATH_INFO == "/nix-cache-info"` at the PSGI layer. + +Confirmed with the packaged parser: + +```text +ret=50 +PATH_INFO=/nix-cache-info len=15 +REQUEST_URI=/nix-cache-info%00suffix len=24 +``` + +Confirmed through both local Starman and the Caddy reverse proxy: `GET /nix-cache-info%00suffix HTTP/1.1` returns the `/nix-cache-info` response. + +Impact: + +- Route suffix bypasses if any future route-level filtering is added before Starman decoding is understood. +- Caddy/Starman disagreement: Caddy forwards the raw target, while Starman truncates `PATH_INFO` after decoding. +- Possible endpoint confusion for `/nar/...` or `/log/...` where suffix data is invisible to the app but present in `REQUEST_URI` and access logs. + +### 2. Out-of-bounds read on malformed percent escapes + +`url_decode` in `XS.xs:97-128` scans for `%`, allocates `len - 1`, then reads `s[i + 1]` and `s[i + 2]` without first checking that two bytes remain: + +```c +if ((hi = hex_decode(s[i + 1])) == -1 + || (lo = hex_decode(s[i + 2])) == -1) { +``` + +For a path ending in `%` or `%0`, this reads past the logical end of the Perl string. Perl SV buffers are usually NUL-terminated, so this is likely a small out-of-bounds read rather than an immediate crash, but it is still memory-unsafe C on request-controlled input. Packaged-parser behavior for `/%`, `/%0`, `/%GG`, and `/%0G` is `ret=-1` with no env entries; ASAN/debug-allocator validation is still needed for the actual memory read. + +Impact to investigate: + +- Whether ASAN or a hardened allocator catches reads for trailing `%` / `%0`. +- Whether Caddy rejects those targets before forwarding; direct `:5000` remains exposed locally. + +### 3. `/log/...` prefix regex and subprocess spawning + +`nix-serve.psgi:82-86` matches logs with: + +```perl +elsif ($path =~ /^\/log\/([0-9a-z]+-[0-9a-zA-Z\+\-\.\_\?\=]+)/) { +``` + +The regex is not anchored at the end. Any path beginning with a valid-looking store basename is accepted, and the suffix is ignored. The route then runs `nix log $storePath` for the captured value without checking it with `queryPathFromHashPart` or `queryPathInfo` first. + +There is no shell injection because `open` is called with an argument list, not a shell string. The interesting angle is resource use and Nix behavior on attacker-chosen valid-looking store paths. + +Local command timing for a missing valid-looking path: + +```text +$ time nix --extra-experimental-features nix-command log /nix/store/00000000000000000000000000000000-test +error: build log of '/nix/store/00000000000000000000000000000000-test' is not available +real 0m1.892s +``` + +That is enough per request to make `/log/...` a plausible low-rate process/CPU DoS surface, especially because the app does not validate the path against the store before spawning `nix log`. + +Impact: + +- CPU/process exhaustion from repeated `nix log` subprocesses. +- Missing paths still cost roughly 1.9s locally in a direct command test. +- Whether ignored suffixes create Caddy/Starman/app log ambiguity. + +### 4. `/nar/...` subprocess fan-out + +Both NAR routes spawn a `nix store dump-path` process per request: + +- Checked route: `nix-serve.psgi:58-68` +- Legacy unchecked-hash route: `nix-serve.psgi:72-79` + +The checked route validates that the requested NAR hash matches current path info. The legacy route only checks the hash prefix maps to a path and then dumps it. + +Impact to investigate: + +- Bandwidth/process DoS by repeatedly requesting large store paths. +- Whether the legacy route should be disabled in this deployment. +- Whether Caddy should apply rate limits or buffering constraints. + +### 5. Request body parser inconsistencies in `HTTP::Entity::Parser` + +`nix-serve.psgi` does not call `HTTP::Entity::Parser`, so this is probably not reachable through the current app unless Starman/Plack middleware invokes it. It is still in the service closure and should be treated as a package-level finding. + +Potential issues: + +- `HTTP::Entity::Parser.pm:76-90`: if both `Content-Length` and `Transfer-Encoding: chunked` exist, `Content-Length` wins. RFC 7230 says transfer coding overrides content length; this can create request-smuggling-style disagreement when another component follows the standard. +- `HTTP::Entity::Parser.pm:77-88`: `CONTENT_LENGTH` is not strictly parsed as decimal digits. Perl numeric coercion can accept weird values like `10foo`, scientific notation, or negative values differently from other components. +- `HTTP::Entity::Parser.pm:102-115`: chunk header parsing accepts `^(([0-9a-fA-F]+).*\r\n)` and does not require the CRLF after chunk data; it merely tries to remove it with `s/^\r\n//`. Malformed chunk bodies can be accepted with parser state disagreement. +- `UrlEncoded.pm` and `JSON.pm` accumulate the full body in memory before final parsing. Large bodies are memory DoS if an app registers these parsers without external limits. +- `MultiPart.pm` writes uploaded file parts to temp files and accumulates non-file fields in memory. There are no per-field, per-file, part-count, or aggregate limits here. + +## Initial Priority + +1. Decide whether to block `%00` at Caddy or patch/replace `HTTP::Parser::XS` path decoding. +2. Validate malformed percent escape behavior under ASAN or a debug allocator if practical. +3. Inspect `nix log` behavior for missing/attacker-chosen valid-looking store paths and decide whether `/log` needs validation/rate-limiting. +4. Decide whether the legacy `/nar/.nar` route is still needed. +5. Treat `HTTP::Entity::Parser` as lower priority for this app unless a middleware path is found that parses request bodies. diff --git a/steam-heroic-shortcuts.md b/steam-heroic-shortcuts.md new file mode 100644 index 0000000..7025fac --- /dev/null +++ b/steam-heroic-shortcuts.md @@ -0,0 +1,96 @@ +# Steam Heroic Direct Shortcuts + +Reference for the non-declarative Steam shortcuts added for Heroic-managed games. +These live in Steam config files, not Nix, so this file records the important state for future recovery. + +## Files + +- Steam shortcuts: `/home/dan/.local/share/Steam/userdata/238310127/config/shortcuts.vdf` +- Steam compatibility mapping: `/home/dan/.local/share/Steam/config/config.vdf` +- Steam compatdata: `/home/dan/.local/share/Steam/steamapps/compatdata/` +- Heroic game config: `/home/dan/.config/heroic/GamesConfig/` + +## Steam Shortcuts + +All direct shortcuts are configured to use `DW-Proton Latest`. + +| Name | App ID | Exe | StartDir | Launch options | +| --- | ---: | --- | --- | --- | +| `Arknights: Endfield (Direct Launcher)` | `3532200938` | `/home/dan/Games/Heroic/ArknightsEndfieldgowoU/Launcher.exe` | `/home/dan/Games/Heroic/ArknightsEndfieldgowoU` | empty | +| `Arknights: Endfield (Direct Game)` | `2506976826` | `/home/dan/Games/Heroic/ArknightsEndfieldgowoU/games/EndField Game/Endfield.exe` | `/home/dan/Games/Heroic/ArknightsEndfieldgowoU/games/EndField Game` | empty | +| `Zenless Zone Zero (Direct Launcher)` | `2568476331` | `/home/dan/Games/Heroic/ZenlessZoneZero/launcher_epic.exe` | `/home/dan/Games/Heroic/ZenlessZoneZero` | `UMU_ID=umu-zenlesszonezero UMU_USE_STEAM=1 WINE_DISABLE_VULKAN_OPWR=1 %command% {enable_pay:true}` | +| `Zenless Zone Zero (Direct Game)` | `4264951319` | `/home/dan/Games/Heroic/ZenlessZoneZero/games/ZenlessZoneZero Game/ZenlessZoneZero.exe` | `/home/dan/Games/Heroic/ZenlessZoneZero/games/ZenlessZoneZero Game` | `UMU_ID=umu-zenlesszonezero UMU_USE_STEAM=1 WINE_DISABLE_VULKAN_OPWR=1 %command%` | + +The original Heroic-generated shortcuts were left in place: + +| Name | App ID | Exe | Launch options | +| --- | ---: | --- | --- | +| `Zenless Zone Zero` | `2928100415` | `heroic` | `--no-gui --no-sandbox "heroic://launch?appName=525aa0efd70f4399b9f64bcd2a5b38c7&runner=legendary"` | +| `Arknights: Endfield` | `2465091319` | `heroic` | `--no-gui --no-sandbox "heroic://launch?appName=bcd55b0d87c245dd867f5b1bd496f1df&runner=legendary"` | + +## Compatibility Mapping + +The app IDs above were added under `InstallConfigStore.Software.Valve.Steam.CompatToolMapping` in Steam's `config.vdf`: + +```vdf +"CompatToolMapping" +{ + "3532200938" + { + "name" "DW-Proton Latest" + "config" "" + "priority" "250" + } + "2506976826" + { + "name" "DW-Proton Latest" + "config" "" + "priority" "250" + } + "2568476331" + { + "name" "DW-Proton Latest" + "config" "" + "priority" "250" + } + "4264951319" + { + "name" "DW-Proton Latest" + "config" "" + "priority" "250" + } +} +``` + +## Prefix Links + +The direct Steam shortcuts use the existing Heroic prefixes by symlinking each shortcut's `pfx` directory: + +```sh +ln -s "/home/dan/Games/Heroic/Prefixes/default/Zenless Zone Zero" \ + "/home/dan/.local/share/Steam/steamapps/compatdata/2568476331/pfx" + +ln -s "/home/dan/Games/Heroic/Prefixes/default/Zenless Zone Zero" \ + "/home/dan/.local/share/Steam/steamapps/compatdata/4264951319/pfx" + +ln -s "/home/dan/Games/Heroic/Prefixes/default/Arknights Endfield" \ + "/home/dan/.local/share/Steam/steamapps/compatdata/3532200938/pfx" + +ln -s "/home/dan/Games/Heroic/Prefixes/default/Arknights Endfield" \ + "/home/dan/.local/share/Steam/steamapps/compatdata/2506976826/pfx" +``` + +If Steam already created a fresh prefix, move it aside before creating the symlink: + +```sh +mv "/home/dan/.local/share/Steam/steamapps/compatdata/4264951319/pfx" \ + "/home/dan/.local/share/Steam/steamapps/compatdata/4264951319/pfx.steam-empty-bak" +``` + +## Notes + +- `gamescope` is installed declaratively in `servers/ui-mode/home.nix` for optional testing. +- `Zenless Zone Zero (Direct Game)` is the most promising shortcut: the game process starts and Steam starts `gameoverlayui` for it. +- The Heroic-generated `heroic://launch` shortcuts can start the games, but Steam Overlay/Input may not attach because Steam tracks Heroic/Electron rather than the final game process. +- Zenless launcher mode needs fresh Epic exchange-code arguments from Heroic/Legendary, so the direct launcher shortcut may not be reliable. +- A `wine64-preloader`/`rpcss.exe` `SIGSYS` coredump was seen during startup, but the game continued and overlay was started; treat it as non-fatal unless the game crashes.