diff --git a/src/psyc/lines/topology_export.py b/src/psyc/lines/topology_export.py new file mode 100644 index 0000000..7de2aed --- /dev/null +++ b/src/psyc/lines/topology_export.py @@ -0,0 +1,228 @@ +"""Topology export — sanitized public docker snapshot. + +The cockpit's `docker_view.topology()` returns a rich daemon view useful to +the local operator: container env vars, volume mounts, internal IPs, labels, +gateways. None of that may leave the node. This module wraps `docker_view` +with a strict whitelist: only container names, images, states, network names +and high-level driver/health metadata are exposed. Anything not listed in +the Pydantic schemas below is dropped before serialization. + +Used by `/federation/topology` so peer admin pages can render every node's +container topology side-by-side with their own. +""" + +from __future__ import annotations + +import re +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + +from psyc import log +from psyc.cockpit import docker_view +from psyc.lines import federation + + +_log = log.get(__name__) + + +# Caps keep the response bounded — a runaway node with thousands of +# containers shouldn't blow up the peer's panel. +MAX_CONTAINERS = 200 +MAX_NETWORKS = 50 + + +# ---------- data model -------------------------------------------------- + +class TopologyContainer(BaseModel): + """One container — sanitized. + + Strict whitelist: name, short_id, image (tag-only), state, health, + network names, compose service label, started_at. No env vars, no + volumes, no IPs, no MACs, no port mappings, no full labels dict. + """ + name: str + short_id: str + image: str + state: str + health: str + networks: List[str] = Field(default_factory=list) + service: Optional[str] = None + started_at: Optional[str] = None + + +class TopologyNetwork(BaseModel): + """One docker network — sanitized. + + Whitelist: name, driver, internal flag, container_count. No subnet, + no gateway, no labels, no attached-container details (those are + surfaced via the container.networks list). + """ + name: str + driver: str + internal: bool + container_count: int + + +class TopologyExport(BaseModel): + """Whole-node container snapshot, public-safe.""" + node_fingerprint: str + generated_at: str + host_name: str + container_count: int + network_count: int + containers: List[TopologyContainer] = Field(default_factory=list) + networks: List[TopologyNetwork] = Field(default_factory=list) + + +# ---------- sanitizers -------------------------------------------------- + +_BASIC_AUTH_RE = re.compile(r"^[^/@]+@") + + +def _filter_image_name(s: str) -> str: + """Strip credentials from an image reference and drop digests. + + Docker accepts `user:pass@registry/image:tag` for registries with HTTP + basic auth — we strip everything up to and including the `@` so leaked + creds never reach a peer. We also cut content-addressable digests + (`...@sha256:...`) to a clean tag-only form. + + Returns the cleaned `repo/image:tag` string. Empty input → "". + """ + if not s: + return "" + raw = str(s).strip() + if not raw: + return "" + # Drop digest suffix, e.g. "nginx:1.25@sha256:abcd…" → "nginx:1.25". + if "@sha256:" in raw: + raw = raw.split("@sha256:", 1)[0] + # Strip basic-auth prefix on the registry component. + # "user:pass@host/repo:tag" → "host/repo:tag" (we never want creds out). + if _BASIC_AUTH_RE.match(raw): + raw = raw.split("@", 1)[1] + # Cap length defensively. + return raw[:160] + + +def _short_id(raw: Any) -> str: + s = str(raw or "") + return s[:12] + + +def _parse_health(status: str) -> str: + """Extract a healthcheck word from the docker "Status" line if present. + + docker's container-list "Status" string includes "(healthy)" or + "(unhealthy)" when a healthcheck is configured. We surface just that + one-word state and fall back to "—" otherwise — no other free-form + text from the daemon leaks out. + """ + if not status: + return "—" + low = status.lower() + if "(healthy)" in low: + return "healthy" + if "(unhealthy)" in low: + return "unhealthy" + if "(starting)" in low or "(health: starting)" in low: + return "starting" + return "—" + + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _empty_export(node_fp: str) -> TopologyExport: + return TopologyExport( + node_fingerprint=node_fp, + generated_at=_now_iso(), + host_name="", + container_count=0, + network_count=0, + containers=[], + networks=[], + ) + + +# ---------- builder ------------------------------------------------------ + +def build_export() -> TopologyExport: + """Sanitized snapshot of this node's docker topology. + + Calls `docker_view.topology()` and re-projects every field through the + Pydantic whitelist above. If the proxy is unreachable (e.g. dev box + without docker-socket-proxy) we return an empty export rather than + raising — the public endpoint must never 500. + """ + try: + node_fp = federation.node_fingerprint() + except Exception as exc: # noqa: BLE001 — keep endpoint defensive + _log.warning("topology_export.fp.error", error=str(exc)) + node_fp = "" + + try: + raw = docker_view.topology() + except Exception as exc: # noqa: BLE001 — docker proxy may be down + _log.warning("topology_export.docker.error", error=str(exc)) + return _empty_export(node_fp) + + # docker_view.topology() returns a dict with `containers`, `networks`, + # `host`, `error` fields. We treat any non-None error as "empty export" + # rather than partially leaking through whatever did succeed. + if raw.get("error"): + return _empty_export(node_fp) + + raw_host = raw.get("host") or {} + host_name_raw = str(raw_host.get("name") or "") + # Truncate the docker host id — it can be the actual machine hostname. + # Keep it short, no domain. Defensive even though docker host names are + # generally low-sensitivity. + host_name = host_name_raw[:24] + + raw_containers = raw.get("containers") or [] + raw_networks = raw.get("networks") or [] + + containers: List[TopologyContainer] = [] + for c in raw_containers[:MAX_CONTAINERS]: + nets_raw = c.get("networks") or [] + net_names: List[str] = [] + for nd in nets_raw: + nm = nd.get("name") if isinstance(nd, dict) else None + if nm: + net_names.append(str(nm)[:64]) + containers.append(TopologyContainer( + name=str(c.get("name") or "?")[:64], + short_id=_short_id(c.get("id")), + image=_filter_image_name(c.get("image") or ""), + state=str(c.get("state") or "")[:24], + health=_parse_health(str(c.get("status") or "")), + networks=net_names[:12], + # docker_view doesn't currently surface the compose service label + # or started_at; leave them None until that lands. + service=None, + started_at=None, + )) + + networks: List[TopologyNetwork] = [] + for n in raw_networks[:MAX_NETWORKS]: + attached = n.get("containers") or [] + networks.append(TopologyNetwork( + name=str(n.get("name") or "")[:64], + driver=str(n.get("driver") or "")[:24], + internal=bool(n.get("internal")), + container_count=len(attached), + )) + + return TopologyExport( + node_fingerprint=node_fp, + generated_at=_now_iso(), + host_name=host_name, + container_count=len(containers), + network_count=len(networks), + containers=containers, + networks=networks, + ) diff --git a/tests/test_topology_export.py b/tests/test_topology_export.py new file mode 100644 index 0000000..c31f665 --- /dev/null +++ b/tests/test_topology_export.py @@ -0,0 +1,245 @@ +"""Topology export — whitelist sanitization unit tests. + +The big invariant: nothing from docker_view.topology() escapes that isn't +in the Pydantic schema. We assert via model_fields introspection AND via a +JSON-dump scan over a fixture that contains every dangerous field. + +The /federation/topology endpoint contract lives in the sibling tests added +alongside it; this module covers the builder + the sanitizer in isolation. +""" + +from __future__ import annotations + +import json +from typing import Any, Dict +from unittest.mock import patch + +import pytest +from sqlalchemy import create_engine + +from psyc import db +from psyc.cockpit import docker_view +from psyc.lines import federation, topology_export +from psyc.lines.topology_export import ( + TopologyContainer, + TopologyExport, + TopologyNetwork, + _filter_image_name, + build_export, +) + + +# ---------- fixtures ---------------------------------------------------- + +@pytest.fixture +def fresh_db(tmp_path, monkeypatch): + test_db = tmp_path / "test.db" + eng = create_engine(f"sqlite:///{test_db}", future=True) + db._metadata.create_all(eng, checkfirst=True) + monkeypatch.setattr(db, "_engine", eng) + monkeypatch.setattr(db, "DB_PATH", test_db) + yield test_db + + +@pytest.fixture +def fed_dir(tmp_path, monkeypatch): + d = tmp_path / "federation" + monkeypatch.setattr(federation, "FED_DIR", d) + monkeypatch.setattr(federation, "PRIVATE_KEY_PATH", d / "node.key") + monkeypatch.setattr(federation, "PUBLIC_KEY_PATH", d / "node.pub") + yield d + + +# ---------- fixture data: hostile docker_view output -------------------- + +# This payload has every leaky field docker_view *could* surface, plus +# nested env-style data — used to prove the export is whitelist-only. +_LEAKY_TOPOLOGY: Dict[str, Any] = { + "containers": [ + { + "id": "abcdef1234567890ffff", + "name": "psyc-cockpit-1", + "image": "registry.example/psyc:1.2", + "state": "running", + "status": "Up 5 minutes (healthy)", + "networks": [ + {"name": "backend", "ip": "172.20.0.5", "gateway": "172.20.0.1", "mac": "02:42:ac:14:00:05"}, + {"name": "frontend", "ip": "172.21.0.7", "gateway": "172.21.0.1", "mac": "02:42:ac:15:00:07"}, + ], + "ports": ["0.0.0.0:8767->8767/tcp"], + "published_ports": ["8767/tcp"], + # These are NOT current docker_view fields but defend in depth — + # if a future docker_view change adds them, sanitizer drops them. + "env": ["SECRET_TOKEN=abc123", "DB_PASSWORD=hunter2"], + "mounts": ["/var/run/docker.sock", "/etc/secrets:/secrets"], + "labels": {"com.docker.compose.project": "psyc", "secret_label": "shh"}, + }, + { + "id": "fedcba0987654321", + "name": "some-stopped", + "image": "alpine", + "state": "exited", + "status": "Exited (0) 2 hours ago", + "networks": [], + "ports": [], + "published_ports": [], + }, + ], + "networks": [ + { + "id": "n1", "name": "backend", "driver": "bridge", "scope": "local", + "internal": False, "subnet": "172.20.0.0/16", "gateway": "172.20.0.1", + "containers": [ + {"id": "abcdef123456", "name": "psyc-cockpit-1", "ip": "172.20.0.5", "mac": "02:42:ac:14:00:05"}, + ], + }, + { + "id": "n2", "name": "internal-only", "driver": "bridge", "scope": "local", + "internal": True, "subnet": "10.99.0.0/16", "gateway": "10.99.0.1", + "containers": [], + }, + ], + "host": {"name": "docker-host-secret-internal.example.com", "os": "linux", "ncpu": 8}, + "error": None, + "proxy": "http://docker-socket-proxy:2375", +} + + +# Sensitive strings that MUST NOT appear anywhere in the export JSON. +_FORBIDDEN_STRINGS = ( + "SECRET_TOKEN", "DB_PASSWORD", "hunter2", "abc123", + "/var/run/docker.sock", "/etc/secrets", + "secret_label", "shh", + "172.20.0.5", "172.21.0.7", # IPs + "02:42:ac", # MAC prefix + "172.20.0.1", # gateway + "172.20.0.0/16", "10.99.0.0/16", # subnets + "0.0.0.0:8767", # port mapping + "internal.example.com", # full host +) + + +# ---------- model field introspection ----------------------------------- + +def test_container_model_has_no_dangerous_fields(): + fields = set(TopologyContainer.model_fields.keys()) + # whitelist — must match the design contract exactly + assert fields == { + "name", "short_id", "image", "state", "health", + "networks", "service", "started_at", + } + # explicit deny-list, double-belt + for forbidden in ("env", "environment", "mounts", "volumes", + "labels", "ip", "ip_address", "ipaddress", + "ports", "published_ports", "mac", "gateway"): + assert forbidden not in fields, f"{forbidden} must not be a field" + + +def test_network_model_has_no_dangerous_fields(): + fields = set(TopologyNetwork.model_fields.keys()) + assert fields == {"name", "driver", "internal", "container_count"} + for forbidden in ("subnet", "gateway", "labels", "ipam", + "containers", "scope", "id"): + assert forbidden not in fields, f"{forbidden} must not be a field" + + +def test_export_model_top_level_fields(): + fields = set(TopologyExport.model_fields.keys()) + assert fields == { + "node_fingerprint", "generated_at", "host_name", + "container_count", "network_count", "containers", "networks", + } + + +# ---------- image-name filter ------------------------------------------- + +def test_filter_image_strips_basic_auth_prefix(): + # user:pass@host/repo:tag → host/repo:tag (creds gone) + assert _filter_image_name("user:pass@host/repo:tag") == "host/repo:tag" + + +def test_filter_image_drops_digest_suffix(): + assert _filter_image_name( + "nginx:1.25@sha256:abcdef0123" + ) == "nginx:1.25" + + +def test_filter_image_passes_clean_refs_untouched(): + assert _filter_image_name("psyc:latest") == "psyc:latest" + assert _filter_image_name( + "ghcr.io/example/psyc:v0.3.1" + ) == "ghcr.io/example/psyc:v0.3.1" + + +def test_filter_image_handles_empty(): + assert _filter_image_name("") == "" + assert _filter_image_name(None) == "" # type: ignore[arg-type] + + +# ---------- build_export contract --------------------------------------- + +def test_build_export_returns_empty_when_docker_view_raises(fresh_db, fed_dir, monkeypatch): + def boom(): + raise docker_view.DockerProxyError("connection refused") + monkeypatch.setattr(docker_view, "topology", boom) + out = build_export() + assert isinstance(out, TopologyExport) + assert out.container_count == 0 + assert out.containers == [] + assert out.networks == [] + # fingerprint is still real (federation key was generated) + assert len(out.node_fingerprint) == 32 + + +def test_build_export_returns_empty_when_docker_view_reports_error(fresh_db, fed_dir, monkeypatch): + monkeypatch.setattr(docker_view, "topology", lambda: { + "containers": [], "networks": [], "host": {"name": "x"}, + "error": "containers: refused", "proxy": "x", + }) + out = build_export() + assert out.container_count == 0 + assert out.containers == [] + + +def test_build_export_sanitizes_every_field(fresh_db, fed_dir, monkeypatch): + monkeypatch.setattr(docker_view, "topology", lambda: _LEAKY_TOPOLOGY) + out = build_export() + # Containers came through, but as TopologyContainer (no leaky attrs). + assert out.container_count == 2 + by_name = {c.name: c for c in out.containers} + cp = by_name["psyc-cockpit-1"] + assert cp.short_id == "abcdef123456" + assert cp.image == "registry.example/psyc:1.2" + assert cp.state == "running" + assert cp.health == "healthy" + assert cp.networks == ["backend", "frontend"] + assert cp.service is None + # Networks came through, sanitized. + assert out.network_count == 2 + by_net = {n.name: n for n in out.networks} + assert by_net["backend"].driver == "bridge" + assert by_net["backend"].internal is False + assert by_net["backend"].container_count == 1 + assert by_net["internal-only"].internal is True + + +def test_export_json_contains_no_dangerous_strings(fresh_db, fed_dir, monkeypatch): + """Strict no-leak: serialize and grep for everything sensitive.""" + monkeypatch.setattr(docker_view, "topology", lambda: _LEAKY_TOPOLOGY) + out = build_export() + blob = json.dumps(out.model_dump(mode="json")) + for forbidden in _FORBIDDEN_STRINGS: + assert forbidden not in blob, f"leak: {forbidden!r} appeared in export JSON" + + +def test_build_export_caps_at_max_containers(fresh_db, fed_dir, monkeypatch): + fake = { + "containers": [ + {"id": f"id{i:04d}", "name": f"c{i}", "image": "x", "state": "running", "status": "Up", "networks": []} + for i in range(topology_export.MAX_CONTAINERS + 50) + ], + "networks": [], "host": {"name": "h"}, "error": None, "proxy": "", + } + monkeypatch.setattr(docker_view, "topology", lambda: fake) + out = build_export() + assert out.container_count == topology_export.MAX_CONTAINERS