stage-topo-a topology-export: sanitized public docker snapshot module + tests

This commit is contained in:
m17hr1l
2026-06-07 01:55:49 +02:00
parent 8587e079bb
commit a8216d00ef
2 changed files with 473 additions and 0 deletions

View File

@@ -0,0 +1,228 @@
"""Topology export — sanitized public docker snapshot.
The cockpit's `docker_view.topology()` returns a rich daemon view useful to
the local operator: container env vars, volume mounts, internal IPs, labels,
gateways. None of that may leave the node. This module wraps `docker_view`
with a strict whitelist: only container names, images, states, network names
and high-level driver/health metadata are exposed. Anything not listed in
the Pydantic schemas below is dropped before serialization.
Used by `/federation/topology` so peer admin pages can render every node's
container topology side-by-side with their own.
"""
from __future__ import annotations
import re
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
from psyc import log
from psyc.cockpit import docker_view
from psyc.lines import federation
_log = log.get(__name__)
# Caps keep the response bounded — a runaway node with thousands of
# containers shouldn't blow up the peer's panel.
MAX_CONTAINERS = 200
MAX_NETWORKS = 50
# ---------- data model --------------------------------------------------
class TopologyContainer(BaseModel):
"""One container — sanitized.
Strict whitelist: name, short_id, image (tag-only), state, health,
network names, compose service label, started_at. No env vars, no
volumes, no IPs, no MACs, no port mappings, no full labels dict.
"""
name: str
short_id: str
image: str
state: str
health: str
networks: List[str] = Field(default_factory=list)
service: Optional[str] = None
started_at: Optional[str] = None
class TopologyNetwork(BaseModel):
"""One docker network — sanitized.
Whitelist: name, driver, internal flag, container_count. No subnet,
no gateway, no labels, no attached-container details (those are
surfaced via the container.networks list).
"""
name: str
driver: str
internal: bool
container_count: int
class TopologyExport(BaseModel):
"""Whole-node container snapshot, public-safe."""
node_fingerprint: str
generated_at: str
host_name: str
container_count: int
network_count: int
containers: List[TopologyContainer] = Field(default_factory=list)
networks: List[TopologyNetwork] = Field(default_factory=list)
# ---------- sanitizers --------------------------------------------------
_BASIC_AUTH_RE = re.compile(r"^[^/@]+@")
def _filter_image_name(s: str) -> str:
"""Strip credentials from an image reference and drop digests.
Docker accepts `user:pass@registry/image:tag` for registries with HTTP
basic auth — we strip everything up to and including the `@` so leaked
creds never reach a peer. We also cut content-addressable digests
(`...@sha256:...`) to a clean tag-only form.
Returns the cleaned `repo/image:tag` string. Empty input → "".
"""
if not s:
return ""
raw = str(s).strip()
if not raw:
return ""
# Drop digest suffix, e.g. "nginx:1.25@sha256:abcd…" → "nginx:1.25".
if "@sha256:" in raw:
raw = raw.split("@sha256:", 1)[0]
# Strip basic-auth prefix on the registry component.
# "user:pass@host/repo:tag" → "host/repo:tag" (we never want creds out).
if _BASIC_AUTH_RE.match(raw):
raw = raw.split("@", 1)[1]
# Cap length defensively.
return raw[:160]
def _short_id(raw: Any) -> str:
s = str(raw or "")
return s[:12]
def _parse_health(status: str) -> str:
"""Extract a healthcheck word from the docker "Status" line if present.
docker's container-list "Status" string includes "(healthy)" or
"(unhealthy)" when a healthcheck is configured. We surface just that
one-word state and fall back to "" otherwise — no other free-form
text from the daemon leaks out.
"""
if not status:
return ""
low = status.lower()
if "(healthy)" in low:
return "healthy"
if "(unhealthy)" in low:
return "unhealthy"
if "(starting)" in low or "(health: starting)" in low:
return "starting"
return ""
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _empty_export(node_fp: str) -> TopologyExport:
return TopologyExport(
node_fingerprint=node_fp,
generated_at=_now_iso(),
host_name="",
container_count=0,
network_count=0,
containers=[],
networks=[],
)
# ---------- builder ------------------------------------------------------
def build_export() -> TopologyExport:
"""Sanitized snapshot of this node's docker topology.
Calls `docker_view.topology()` and re-projects every field through the
Pydantic whitelist above. If the proxy is unreachable (e.g. dev box
without docker-socket-proxy) we return an empty export rather than
raising — the public endpoint must never 500.
"""
try:
node_fp = federation.node_fingerprint()
except Exception as exc: # noqa: BLE001 — keep endpoint defensive
_log.warning("topology_export.fp.error", error=str(exc))
node_fp = ""
try:
raw = docker_view.topology()
except Exception as exc: # noqa: BLE001 — docker proxy may be down
_log.warning("topology_export.docker.error", error=str(exc))
return _empty_export(node_fp)
# docker_view.topology() returns a dict with `containers`, `networks`,
# `host`, `error` fields. We treat any non-None error as "empty export"
# rather than partially leaking through whatever did succeed.
if raw.get("error"):
return _empty_export(node_fp)
raw_host = raw.get("host") or {}
host_name_raw = str(raw_host.get("name") or "")
# Truncate the docker host id — it can be the actual machine hostname.
# Keep it short, no domain. Defensive even though docker host names are
# generally low-sensitivity.
host_name = host_name_raw[:24]
raw_containers = raw.get("containers") or []
raw_networks = raw.get("networks") or []
containers: List[TopologyContainer] = []
for c in raw_containers[:MAX_CONTAINERS]:
nets_raw = c.get("networks") or []
net_names: List[str] = []
for nd in nets_raw:
nm = nd.get("name") if isinstance(nd, dict) else None
if nm:
net_names.append(str(nm)[:64])
containers.append(TopologyContainer(
name=str(c.get("name") or "?")[:64],
short_id=_short_id(c.get("id")),
image=_filter_image_name(c.get("image") or ""),
state=str(c.get("state") or "")[:24],
health=_parse_health(str(c.get("status") or "")),
networks=net_names[:12],
# docker_view doesn't currently surface the compose service label
# or started_at; leave them None until that lands.
service=None,
started_at=None,
))
networks: List[TopologyNetwork] = []
for n in raw_networks[:MAX_NETWORKS]:
attached = n.get("containers") or []
networks.append(TopologyNetwork(
name=str(n.get("name") or "")[:64],
driver=str(n.get("driver") or "")[:24],
internal=bool(n.get("internal")),
container_count=len(attached),
))
return TopologyExport(
node_fingerprint=node_fp,
generated_at=_now_iso(),
host_name=host_name,
container_count=len(containers),
network_count=len(networks),
containers=containers,
networks=networks,
)

View File

@@ -0,0 +1,245 @@
"""Topology export — whitelist sanitization unit tests.
The big invariant: nothing from docker_view.topology() escapes that isn't
in the Pydantic schema. We assert via model_fields introspection AND via a
JSON-dump scan over a fixture that contains every dangerous field.
The /federation/topology endpoint contract lives in the sibling tests added
alongside it; this module covers the builder + the sanitizer in isolation.
"""
from __future__ import annotations
import json
from typing import Any, Dict
from unittest.mock import patch
import pytest
from sqlalchemy import create_engine
from psyc import db
from psyc.cockpit import docker_view
from psyc.lines import federation, topology_export
from psyc.lines.topology_export import (
TopologyContainer,
TopologyExport,
TopologyNetwork,
_filter_image_name,
build_export,
)
# ---------- fixtures ----------------------------------------------------
@pytest.fixture
def fresh_db(tmp_path, monkeypatch):
test_db = tmp_path / "test.db"
eng = create_engine(f"sqlite:///{test_db}", future=True)
db._metadata.create_all(eng, checkfirst=True)
monkeypatch.setattr(db, "_engine", eng)
monkeypatch.setattr(db, "DB_PATH", test_db)
yield test_db
@pytest.fixture
def fed_dir(tmp_path, monkeypatch):
d = tmp_path / "federation"
monkeypatch.setattr(federation, "FED_DIR", d)
monkeypatch.setattr(federation, "PRIVATE_KEY_PATH", d / "node.key")
monkeypatch.setattr(federation, "PUBLIC_KEY_PATH", d / "node.pub")
yield d
# ---------- fixture data: hostile docker_view output --------------------
# This payload has every leaky field docker_view *could* surface, plus
# nested env-style data — used to prove the export is whitelist-only.
_LEAKY_TOPOLOGY: Dict[str, Any] = {
"containers": [
{
"id": "abcdef1234567890ffff",
"name": "psyc-cockpit-1",
"image": "registry.example/psyc:1.2",
"state": "running",
"status": "Up 5 minutes (healthy)",
"networks": [
{"name": "backend", "ip": "172.20.0.5", "gateway": "172.20.0.1", "mac": "02:42:ac:14:00:05"},
{"name": "frontend", "ip": "172.21.0.7", "gateway": "172.21.0.1", "mac": "02:42:ac:15:00:07"},
],
"ports": ["0.0.0.0:8767->8767/tcp"],
"published_ports": ["8767/tcp"],
# These are NOT current docker_view fields but defend in depth —
# if a future docker_view change adds them, sanitizer drops them.
"env": ["SECRET_TOKEN=abc123", "DB_PASSWORD=hunter2"],
"mounts": ["/var/run/docker.sock", "/etc/secrets:/secrets"],
"labels": {"com.docker.compose.project": "psyc", "secret_label": "shh"},
},
{
"id": "fedcba0987654321",
"name": "some-stopped",
"image": "alpine",
"state": "exited",
"status": "Exited (0) 2 hours ago",
"networks": [],
"ports": [],
"published_ports": [],
},
],
"networks": [
{
"id": "n1", "name": "backend", "driver": "bridge", "scope": "local",
"internal": False, "subnet": "172.20.0.0/16", "gateway": "172.20.0.1",
"containers": [
{"id": "abcdef123456", "name": "psyc-cockpit-1", "ip": "172.20.0.5", "mac": "02:42:ac:14:00:05"},
],
},
{
"id": "n2", "name": "internal-only", "driver": "bridge", "scope": "local",
"internal": True, "subnet": "10.99.0.0/16", "gateway": "10.99.0.1",
"containers": [],
},
],
"host": {"name": "docker-host-secret-internal.example.com", "os": "linux", "ncpu": 8},
"error": None,
"proxy": "http://docker-socket-proxy:2375",
}
# Sensitive strings that MUST NOT appear anywhere in the export JSON.
_FORBIDDEN_STRINGS = (
"SECRET_TOKEN", "DB_PASSWORD", "hunter2", "abc123",
"/var/run/docker.sock", "/etc/secrets",
"secret_label", "shh",
"172.20.0.5", "172.21.0.7", # IPs
"02:42:ac", # MAC prefix
"172.20.0.1", # gateway
"172.20.0.0/16", "10.99.0.0/16", # subnets
"0.0.0.0:8767", # port mapping
"internal.example.com", # full host
)
# ---------- model field introspection -----------------------------------
def test_container_model_has_no_dangerous_fields():
fields = set(TopologyContainer.model_fields.keys())
# whitelist — must match the design contract exactly
assert fields == {
"name", "short_id", "image", "state", "health",
"networks", "service", "started_at",
}
# explicit deny-list, double-belt
for forbidden in ("env", "environment", "mounts", "volumes",
"labels", "ip", "ip_address", "ipaddress",
"ports", "published_ports", "mac", "gateway"):
assert forbidden not in fields, f"{forbidden} must not be a field"
def test_network_model_has_no_dangerous_fields():
fields = set(TopologyNetwork.model_fields.keys())
assert fields == {"name", "driver", "internal", "container_count"}
for forbidden in ("subnet", "gateway", "labels", "ipam",
"containers", "scope", "id"):
assert forbidden not in fields, f"{forbidden} must not be a field"
def test_export_model_top_level_fields():
fields = set(TopologyExport.model_fields.keys())
assert fields == {
"node_fingerprint", "generated_at", "host_name",
"container_count", "network_count", "containers", "networks",
}
# ---------- image-name filter -------------------------------------------
def test_filter_image_strips_basic_auth_prefix():
# user:pass@host/repo:tag → host/repo:tag (creds gone)
assert _filter_image_name("user:pass@host/repo:tag") == "host/repo:tag"
def test_filter_image_drops_digest_suffix():
assert _filter_image_name(
"nginx:1.25@sha256:abcdef0123"
) == "nginx:1.25"
def test_filter_image_passes_clean_refs_untouched():
assert _filter_image_name("psyc:latest") == "psyc:latest"
assert _filter_image_name(
"ghcr.io/example/psyc:v0.3.1"
) == "ghcr.io/example/psyc:v0.3.1"
def test_filter_image_handles_empty():
assert _filter_image_name("") == ""
assert _filter_image_name(None) == "" # type: ignore[arg-type]
# ---------- build_export contract ---------------------------------------
def test_build_export_returns_empty_when_docker_view_raises(fresh_db, fed_dir, monkeypatch):
def boom():
raise docker_view.DockerProxyError("connection refused")
monkeypatch.setattr(docker_view, "topology", boom)
out = build_export()
assert isinstance(out, TopologyExport)
assert out.container_count == 0
assert out.containers == []
assert out.networks == []
# fingerprint is still real (federation key was generated)
assert len(out.node_fingerprint) == 32
def test_build_export_returns_empty_when_docker_view_reports_error(fresh_db, fed_dir, monkeypatch):
monkeypatch.setattr(docker_view, "topology", lambda: {
"containers": [], "networks": [], "host": {"name": "x"},
"error": "containers: refused", "proxy": "x",
})
out = build_export()
assert out.container_count == 0
assert out.containers == []
def test_build_export_sanitizes_every_field(fresh_db, fed_dir, monkeypatch):
monkeypatch.setattr(docker_view, "topology", lambda: _LEAKY_TOPOLOGY)
out = build_export()
# Containers came through, but as TopologyContainer (no leaky attrs).
assert out.container_count == 2
by_name = {c.name: c for c in out.containers}
cp = by_name["psyc-cockpit-1"]
assert cp.short_id == "abcdef123456"
assert cp.image == "registry.example/psyc:1.2"
assert cp.state == "running"
assert cp.health == "healthy"
assert cp.networks == ["backend", "frontend"]
assert cp.service is None
# Networks came through, sanitized.
assert out.network_count == 2
by_net = {n.name: n for n in out.networks}
assert by_net["backend"].driver == "bridge"
assert by_net["backend"].internal is False
assert by_net["backend"].container_count == 1
assert by_net["internal-only"].internal is True
def test_export_json_contains_no_dangerous_strings(fresh_db, fed_dir, monkeypatch):
"""Strict no-leak: serialize and grep for everything sensitive."""
monkeypatch.setattr(docker_view, "topology", lambda: _LEAKY_TOPOLOGY)
out = build_export()
blob = json.dumps(out.model_dump(mode="json"))
for forbidden in _FORBIDDEN_STRINGS:
assert forbidden not in blob, f"leak: {forbidden!r} appeared in export JSON"
def test_build_export_caps_at_max_containers(fresh_db, fed_dir, monkeypatch):
fake = {
"containers": [
{"id": f"id{i:04d}", "name": f"c{i}", "image": "x", "state": "running", "status": "Up", "networks": []}
for i in range(topology_export.MAX_CONTAINERS + 50)
],
"networks": [], "host": {"name": "h"}, "error": None, "proxy": "",
}
monkeypatch.setattr(docker_view, "topology", lambda: fake)
out = build_export()
assert out.container_count == topology_export.MAX_CONTAINERS