stage-14: pytest test suite over the worker lines

38 tests covering the pure worker-line logic: Classifyline rules, Routeline TLP/country/incident-type gates, Sealine seal/unseal round-trip, Proofline confidence scoring, Mapline CVEResolver escalation, Trainline dataset well-posedness (the v1/v3 input-signal bugs are now regression-guarded), and the Scoutline feed parsers. pytest added as a dev extra. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 23:36:41 +02:00
parent bc61b9a3a1
commit e504b3dbcf
9 changed files with 403 additions and 0 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,12 +20,18 @@ dependencies = [
    "sqlalchemy>=2.0",
 ]

+[project.optional-dependencies]
+dev = ["pytest>=8.0"]
+
 [project.scripts]
 psyc = "psyc.cli:app"

 [tool.hatch.build.targets.wheel]
 packages = ["src/psyc"]

+[tool.pytest.ini_options]
+testpaths = ["tests"]
+
 [tool.ruff]
 line-length = 120

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,40 @@
+"""Shared test fixtures — Case builders for the worker-line tests."""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+
+import pytest
+
+from psyc.models import Case, Classification, IncidentType, Observables, Severity, TLP
+
+
+def make_case(
+    feed: str = "urlhaus",
+    incident_type: Optional[IncidentType] = None,
+    severity: Optional[Severity] = None,
+    tlp: TLP = TLP.AMBER,
+    country: str = "",
+    age_days: int = 1,
+    **observables: list,
+) -> Case:
+    """A Case with controllable feed, classification, age, and observables."""
+    case = Case(
+        case_id=f"TEST-{feed}-{age_days}",
+        summary=f"test case from {feed}",
+        source_type="abuse_feed",
+        observed_at=datetime.now(timezone.utc) - timedelta(days=age_days),
+        observables=Observables(**observables),
+        classification=Classification(incident_type=incident_type, severity=severity, tlp=tlp),
+    )
+    case.source_metadata["feed"] = feed
+    case.victim.country = country
+    return case
+
+
+@pytest.fixture
+def urlhaus_case() -> Case:
+    c = make_case(feed="urlhaus", urls=["http://1.2.3.4/x"], domains=["1.2.3.4"], ips=["1.2.3.4"])
+    c.source_metadata["url_status"] = "online"
+    return c
--- a/tests/test_classify.py
+++ b/tests/test_classify.py
@@ -0,0 +1,59 @@
+"""Classifyline rule tests."""
+
+from __future__ import annotations
+
+from psyc.lines.classify import classify
+from psyc.models import IncidentType, InternalClass, Severity, TLP
+from conftest import make_case
+
+
+def test_urlhaus_feed_is_malware():
+    case = classify(make_case(feed="urlhaus", urls=["http://1.2.3.4/x"]))
+    assert case.classification.incident_type is IncidentType.MALWARE
+    assert case.classification.tlp is TLP.GREEN
+
+
+def test_cisa_kev_feed_is_exploit():
+    case = classify(make_case(feed="cisa-kev", cves=["CVE-2026-0001"]))
+    assert case.classification.incident_type is IncidentType.EXPLOIT
+
+
+def test_feodo_feed_is_botnet():
+    case = classify(make_case(feed="feodo", ips=["1.2.3.4"]))
+    assert case.classification.incident_type is IncidentType.BOTNET
+
+
+def test_malware_severity_tracks_url_status():
+    online = make_case(feed="urlhaus", urls=["http://1.2.3.4/x"])
+    online.source_metadata["url_status"] = "online"
+    assert classify(online).classification.severity is Severity.HIGH
+
+    offline = make_case(feed="urlhaus", urls=["http://1.2.3.4/x"])
+    offline.source_metadata["url_status"] = "offline"
+    assert classify(offline).classification.severity is Severity.MEDIUM
+
+
+def test_ransomware_kev_is_critical():
+    case = make_case(feed="cisa-kev", cves=["CVE-2026-0001"])
+    case.source_metadata["ransomware"] = "Known"
+    assert classify(case).classification.severity is Severity.CRITICAL
+
+
+def test_critical_infrastructure_forces_critical():
+    case = make_case(feed="urlhaus", urls=["http://1.2.3.4/x"])
+    case.victim.critical_infrastructure = True
+    assert classify(case).classification.severity is Severity.CRITICAL
+
+
+def test_internal_class_from_severity():
+    assert classify(make_case(feed="cisa-kev", cves=["CVE-2026-1"])).classification.internal_class is InternalClass.C
+    crit = make_case(feed="urlhaus", urls=["http://1.2.3.4/x"])
+    crit.victim.critical_infrastructure = True
+    assert classify(crit).classification.internal_class is InternalClass.A
+
+
+def test_classify_is_idempotent():
+    case = classify(make_case(feed="urlhaus", urls=["http://1.2.3.4/x"]))
+    first = case.classification.model_copy(deep=True)
+    classify(case)
+    assert case.classification == first
--- a/tests/test_map.py
+++ b/tests/test_map.py
@@ -0,0 +1,42 @@
+"""Mapline tests — CVEResolver KEV cross-check."""
+
+from __future__ import annotations
+
+from psyc.lines.map import _looks_like_ip, kev_cve_set, resolve_cves
+from psyc.models import Severity
+from conftest import make_case
+
+
+def test_kev_cve_set_only_from_kev_cases():
+    kev = make_case(feed="cisa-kev", cves=["CVE-2026-0001"])
+    urlhaus = make_case(feed="urlhaus", cves=["CVE-2099-9999"])  # not KEV-sourced
+    assert kev_cve_set([kev, urlhaus]) == {"CVE-2026-0001"}
+
+
+def test_resolve_cves_flags_and_escalates_non_kev_case():
+    kev_set = {"CVE-2026-0001"}
+    case = make_case(feed="urlhaus", cves=["CVE-2026-0001"], severity=Severity.LOW)
+    resolve_cves(case, kev_set)
+    assert case.source_metadata["kev_cves"] == "CVE-2026-0001"
+    assert case.classification.severity is Severity.HIGH
+
+
+def test_resolve_cves_does_not_escalate_kev_source_case():
+    kev_set = {"CVE-2026-0001"}
+    case = make_case(feed="cisa-kev", cves=["CVE-2026-0001"], severity=Severity.LOW)
+    resolve_cves(case, kev_set)
+    # its own CVE is in KEV by definition — no self-escalation
+    assert case.classification.severity is Severity.LOW
+
+
+def test_resolve_cves_noop_without_match():
+    case = make_case(feed="urlhaus", cves=["CVE-2099-9999"], severity=Severity.MEDIUM)
+    resolve_cves(case, {"CVE-2026-0001"})
+    assert "kev_cves" not in case.source_metadata
+    assert case.classification.severity is Severity.MEDIUM
+
+
+def test_looks_like_ip():
+    assert _looks_like_ip("8.8.8.8")
+    assert not _looks_like_ip("example.com")
+    assert not _looks_like_ip("999.1.1.1")
--- a/tests/test_proof.py
+++ b/tests/test_proof.py
@@ -0,0 +1,42 @@
+"""Proofline confidence-scoring tests."""
+
+from __future__ import annotations
+
+from psyc.lines.proof import prove
+from conftest import make_case
+
+
+def test_kev_source_is_high_confidence():
+    case = prove(make_case(feed="cisa-kev", cves=["CVE-2026-0001"], age_days=1))
+    assert case.confidence.source_reliability == "A"
+    assert case.confidence.level == "high"
+
+
+def test_urlhaus_source_is_medium_confidence():
+    case = prove(make_case(feed="urlhaus", urls=["http://1.2.3.4/x"], age_days=1))
+    assert case.confidence.source_reliability == "B"
+    assert case.confidence.level == "medium"
+
+
+def test_freshness_buckets():
+    assert prove(make_case(age_days=1)).confidence.freshness == "new"
+    assert prove(make_case(age_days=7)).confidence.freshness == "recent"
+    assert prove(make_case(age_days=30)).confidence.freshness == "stale"
+    assert prove(make_case(age_days=200)).confidence.freshness == "resurfaced"
+
+
+def test_stale_kev_case_is_docked_to_medium():
+    case = prove(make_case(feed="cisa-kev", cves=["CVE-2026-0001"], age_days=200))
+    assert case.confidence.level == "medium"  # high docked by staleness
+
+
+def test_malformed_ioc_drops_confidence_to_low():
+    case = make_case(feed="cisa-kev", ips=["999.999.0.1"], age_days=1)
+    proved = prove(case)
+    assert proved.confidence.iocs_valid is False
+    assert proved.confidence.level == "low"
+
+
+def test_valid_iocs_pass():
+    case = prove(make_case(feed="urlhaus", ips=["8.8.8.8"], cves=["CVE-2026-1234"], age_days=1))
+    assert case.confidence.iocs_valid is True
--- a/tests/test_route.py
+++ b/tests/test_route.py
@@ -0,0 +1,52 @@
+"""Routeline policy-gate tests."""
+
+from __future__ import annotations
+
+from psyc.lines.route import plan
+from psyc.models import IncidentType, Severity, TLP
+from conftest import make_case
+
+
+def _dest_names(routes):
+    return {r.destination_name for r in routes}
+
+
+def _blocked_reasons(blocked):
+    return {b.destination_name: b.reason for b in blocked}
+
+
+def test_green_malware_routes_to_misp_and_urlhaus():
+    case = make_case(incident_type=IncidentType.MALWARE, severity=Severity.HIGH, tlp=TLP.GREEN)
+    routes, blocked = plan(case)
+    assert {"MISP-Community", "URLhaus"} <= _dest_names(routes)
+
+
+def test_tlp_ceiling_blocks_abuseipdb():
+    # AbuseIPDB max TLP is CLEAR; a GREEN case must be blocked there
+    case = make_case(incident_type=IncidentType.MALWARE, severity=Severity.HIGH, tlp=TLP.GREEN)
+    _, blocked = plan(case)
+    assert _blocked_reasons(blocked).get("AbuseIPDB") == "tlp_exceeded"
+
+
+def test_country_gate_blocks_cert_bund_when_not_de():
+    case = make_case(incident_type=IncidentType.MALWARE, severity=Severity.HIGH, tlp=TLP.AMBER, country="CN")
+    _, blocked = plan(case)
+    assert _blocked_reasons(blocked).get("CERT-Bund") == "country_mismatch"
+
+
+def test_country_gate_allows_cert_bund_for_de():
+    case = make_case(incident_type=IncidentType.MALWARE, severity=Severity.HIGH, tlp=TLP.AMBER, country="DE")
+    routes, _ = plan(case)
+    assert "CERT-Bund" in _dest_names(routes)
+
+
+def test_incident_type_gate_blocks_urlhaus_for_non_malware():
+    case = make_case(incident_type=IncidentType.BOTNET, severity=Severity.HIGH, tlp=TLP.GREEN)
+    _, blocked = plan(case)
+    assert _blocked_reasons(blocked).get("URLhaus") == "incident_type_mismatch"
+
+
+def test_routes_sorted_by_priority():
+    case = make_case(incident_type=IncidentType.MALWARE, severity=Severity.HIGH, tlp=TLP.GREEN, country="DE")
+    routes, _ = plan(case)
+    assert [r.priority for r in routes] == sorted(r.priority for r in routes)
--- a/tests/test_scout.py
+++ b/tests/test_scout.py
@@ -0,0 +1,49 @@
+"""Scoutline parser tests — feed rows to normalized Case objects."""
+
+from __future__ import annotations
+
+from psyc.lines.scout import _feodo_record_to_case, _kev_vuln_to_case, _parse_urlhaus_csv
+
+URLHAUS_CSV = """\
+# comment line
+"3846688","2026-05-14 11:01:14","http://1.2.3.4/x","online","2026-05-14","malware_download","elf,mirai","https://urlhaus.abuse.ch/url/3846688/","reporter1"
+"""
+
+
+def test__parse_urlhaus_csv_skips_comments_and_parses_rows():
+    rows = list(_parse_urlhaus_csv(URLHAUS_CSV))
+    assert len(rows) == 1
+    assert rows[0]["url"] == "http://1.2.3.4/x"
+    assert rows[0]["url_status"] == "online"
+
+
+def test_kev_vuln_to_case():
+    vuln = {
+        "cveID": "CVE-2026-0300",
+        "vendorProject": "Microsoft",
+        "product": "Exchange",
+        "vulnerabilityName": "Exchange XSS",
+        "dateAdded": "2026-05-15",
+        "knownRansomwareCampaignUse": "Known",
+    }
+    case = _kev_vuln_to_case(vuln)
+    assert case.case_id == "PSYC-KEV-CVE-2026-0300"
+    assert case.observables.cves == ["CVE-2026-0300"]
+    assert case.source_metadata["feed"] == "cisa-kev"
+    assert case.source_metadata["ransomware"] == "Known"
+
+
+def test_feodo_record_to_case():
+    record = {
+        "ip_address": "162.243.103.246",
+        "port": 8080,
+        "status": "online",
+        "malware": "Emotet",
+        "country": "US",
+        "first_seen": "2022-06-04 21:24:53",
+    }
+    case = _feodo_record_to_case(record)
+    assert case.observables.ips == ["162.243.103.246"]
+    assert case.source_metadata["feed"] == "feodo"
+    assert case.source_metadata["malware"] == "Emotet"
+    assert case.source_metadata["status"] == "online"
--- a/tests/test_seal.py
+++ b/tests/test_seal.py
@@ -0,0 +1,58 @@
+"""Sealine — sealed-box encryption round-trip tests."""
+
+from __future__ import annotations
+
+import pytest
+
+from psyc.lines import seal
+from psyc.result import Err, Ok
+
+
+@pytest.fixture(autouse=True)
+def _isolate_seal_dirs(tmp_path, monkeypatch):
+    monkeypatch.setattr(seal, "KEYS_DIR", tmp_path / "keys")
+    monkeypatch.setattr(seal, "SEALED_DIR", tmp_path / "sealed")
+
+
+def test_seal_unseal_round_trip():
+    seal.generate_recipient_keys("CERT-Test")
+    plaintext = b'{"case": "evidence", "secret": true}'
+    pkg = seal.seal(plaintext, ["CERT-Test"])
+    assert isinstance(pkg, Ok)
+    out = seal.unseal(pkg.value.package_id, "CERT-Test")
+    assert isinstance(out, Ok)
+    assert out.value == plaintext
+
+
+def test_seal_to_unknown_recipient_errors():
+    result = seal.seal(b"data", ["Nobody"])
+    assert isinstance(result, Err)
+
+
+def test_unseal_with_wrong_recipient_errors():
+    seal.generate_recipient_keys("CERT-A")
+    seal.generate_recipient_keys("CERT-B")
+    pkg = seal.seal(b"data", ["CERT-A"])
+    assert isinstance(pkg, Ok)
+    # CERT-B has keys but is not on the package
+    assert isinstance(seal.unseal(pkg.value.package_id, "CERT-B"), Err)
+
+
+def test_multi_recipient_each_can_unseal():
+    seal.generate_recipient_keys("CERT-Bund")
+    seal.generate_recipient_keys("MISP")
+    plaintext = b"shared evidence"
+    pkg = seal.seal(plaintext, ["CERT-Bund", "MISP"])
+    assert isinstance(pkg, Ok)
+    for recipient in ("CERT-Bund", "MISP"):
+        out = seal.unseal(pkg.value.package_id, recipient)
+        assert isinstance(out, Ok) and out.value == plaintext
+
+
+def test_plaintext_hash_recorded():
+    import hashlib
+    seal.generate_recipient_keys("R")
+    plaintext = b"hash me"
+    pkg = seal.seal(plaintext, ["R"])
+    assert isinstance(pkg, Ok)
+    assert pkg.value.plaintext_hash == hashlib.sha256(plaintext).hexdigest()
--- a/tests/test_train.py
+++ b/tests/test_train.py
@@ -0,0 +1,55 @@
+"""Trainline dataset-builder tests — the well-posedness properties."""
+
+from __future__ import annotations
+
+import json
+
+from psyc.lines.train import (
+    _ex_ioc_extraction,
+    _ex_severity_classification,
+    quality_gate,
+)
+from psyc.models import IncidentType, Severity, TLP
+from conftest import make_case
+
+
+def test_ioc_extraction_is_well_posed():
+    """Every IOC in the output must also appear in the input — the v1 bug."""
+    case = make_case(feed="urlhaus", urls=["http://1.2.3.4:8080/x"], domains=["1.2.3.4"], ips=["1.2.3.4"])
+    ex = _ex_ioc_extraction(case)
+    assert ex is not None
+    output = json.loads(ex.output)
+    for bucket in output.values():
+        for ioc in bucket:
+            assert ioc in ex.input, f"{ioc!r} not derivable from the input"
+
+
+def test_ioc_extraction_includes_cve_only_cases():
+    case = make_case(feed="cisa-kev", cves=["CVE-2026-0001"])
+    ex = _ex_ioc_extraction(case)
+    assert ex is not None
+    assert "CVE-2026-0001" in ex.input
+
+
+def test_severity_input_carries_status_signal():
+    """The severity task input must contain the online/offline status (v3 bug)."""
+    case = make_case(feed="feodo", ips=["1.2.3.4"], severity=Severity.HIGH,
+                     incident_type=IncidentType.BOTNET)
+    case.source_metadata["status"] = "online"
+    ex = _ex_severity_classification(case)
+    assert ex is not None
+    assert "online" in ex.input
+
+
+def test_quality_gate_drops_tlp_red():
+    case = make_case(tlp=TLP.RED, urls=["http://1.2.3.4/x"])
+    ex = _ex_ioc_extraction(case)
+    assert ex is not None
+    assert quality_gate(ex, case) == "tlp_red"
+
+
+def test_quality_gate_passes_clean_example():
+    case = make_case(feed="urlhaus", tlp=TLP.GREEN, urls=["http://1.2.3.4/x"], ips=["1.2.3.4"])
+    ex = _ex_ioc_extraction(case)
+    assert ex is not None
+    assert quality_gate(ex, case) is None