"""Trainline dataset-builder tests — the well-posedness properties.""" from __future__ import annotations import json from psyc.lines.train import ( _ex_ioc_extraction, _ex_severity_classification, quality_gate, ) from psyc.models import IncidentType, Severity, TLP from conftest import make_case def test_ioc_extraction_is_well_posed(): """Every IOC in the output must also appear in the input — the v1 bug.""" case = make_case(feed="urlhaus", urls=["http://1.2.3.4:8080/x"], domains=["1.2.3.4"], ips=["1.2.3.4"]) ex = _ex_ioc_extraction(case) assert ex is not None output = json.loads(ex.output) for bucket in output.values(): for ioc in bucket: assert ioc in ex.input, f"{ioc!r} not derivable from the input" def test_ioc_extraction_includes_cve_only_cases(): case = make_case(feed="cisa-kev", cves=["CVE-2026-0001"]) ex = _ex_ioc_extraction(case) assert ex is not None assert "CVE-2026-0001" in ex.input def test_severity_input_carries_status_signal(): """The severity task input must contain the online/offline status (v3 bug).""" case = make_case(feed="feodo", ips=["1.2.3.4"], severity=Severity.HIGH, incident_type=IncidentType.BOTNET) case.source_metadata["status"] = "online" ex = _ex_severity_classification(case) assert ex is not None assert "online" in ex.input def test_quality_gate_drops_tlp_red(): case = make_case(tlp=TLP.RED, urls=["http://1.2.3.4/x"]) ex = _ex_ioc_extraction(case) assert ex is not None assert quality_gate(ex, case) == "tlp_red" def test_quality_gate_passes_clean_example(): case = make_case(feed="urlhaus", tlp=TLP.GREEN, urls=["http://1.2.3.4/x"], ips=["1.2.3.4"]) ex = _ex_ioc_extraction(case) assert ex is not None assert quality_gate(ex, case) is None