From 85830be9fa0cc597fc1b3d6026e329845e8a49ae Mon Sep 17 00:00:00 2001 From: m17hr1l Date: Wed, 20 May 2026 22:25:56 +0200 Subject: [PATCH] =?UTF-8?q?stage-19-fix:=20ThreatFox=20+=20MalwareBazaar?= =?UTF-8?q?=20=E2=80=94=20real=20API=20shape?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live test against abuse.ch revealed two issues with the stage-19 wiring: - ThreatFox returns `ioc` (not `ioc_value`) and `first_seen` (not `first_seen_utc`) — older field names from stale docs. Parser now reads the real names and falls back to the old aliases defensively. Also captures `malware_malpedia` (per-family writeup URL) and `threat_type_desc` for richer downstream prose. - MalwareBazaar's API expects form-encoded bodies, unlike ThreatFox's JSON. Extended _http with form_body=; MB fetcher switched to it. Verified live: 10 ThreatFox cases landed with mixed botnet/malware classification (4/6 split from threat_type signal — first real incident-type diversity from a single feed). 10 MalwareBazaar cases landed with sha256+sha1 hash observables and exe/file_type metadata. Co-Authored-By: Claude Opus 4.7 --- src/psyc/lines/scout.py | 24 +++++++++++++++++++----- tests/test_scout.py | 10 +++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/psyc/lines/scout.py b/src/psyc/lines/scout.py index ba740de..917670f 100644 --- a/src/psyc/lines/scout.py +++ b/src/psyc/lines/scout.py @@ -34,13 +34,22 @@ OTX_PULSES_API = "https://otx.alienvault.com/api/v1/pulses/subscribed" _log = log.get(__name__) -def _http(method: str, url: str, headers: Optional[Dict[str, str]] = None, json_body: Optional[Dict[str, Any]] = None) -> httpx.Response: +def _http( + method: str, + url: str, + headers: Optional[Dict[str, str]] = None, + json_body: Optional[Dict[str, Any]] = None, + form_body: Optional[Dict[str, Any]] = None, +) -> httpx.Response: h = {"User-Agent": USER_AGENT} if headers: h.update(headers) with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client: if method.upper() == "POST": - resp = client.post(url, json=json_body) + if form_body is not None: + resp = client.post(url, data=form_body) + else: + resp = client.post(url, json=json_body) else: resp = client.get(url) resp.raise_for_status() @@ -168,7 +177,8 @@ THREATFOX_THREAT_TYPE: Dict[str, IncidentType] = { def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]: - ioc_value = str(r.get("ioc_value") or "").strip() + # API field is `ioc` (the `_value` alias is older docs); date is `first_seen`. + ioc_value = str(r.get("ioc") or r.get("ioc_value") or "").strip() ioc_type = str(r.get("ioc_type") or "").lower() if not ioc_value or not ioc_type: return None @@ -197,6 +207,7 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]: threat_label = threat_type.replace("_", " ") or "malware" summary = f"ThreatFox: {malware} {threat_label} — {ioc_value}" + first_seen = str(r.get("first_seen") or r.get("first_seen_utc") or "") return Case( case_id=f"PSYC-THREATFOX-{r.get('id', '')}", summary=summary, @@ -205,13 +216,15 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]: source_metadata=dict( feed="threatfox", malware=malware, + malware_malpedia=str(r.get("malware_malpedia") or ""), threat_type=threat_type, + threat_type_desc=str(r.get("threat_type_desc") or ""), ioc_type=ioc_type, confidence_level=str(r.get("confidence_level", "")), tags=",".join(t for t in tags if t), reporter=str(r.get("reporter", "")), ), - observed_at=_parse_dt(str(r.get("first_seen_utc", "")), "%Y-%m-%d %H:%M:%S"), + observed_at=_parse_dt(first_seen, "%Y-%m-%d %H:%M:%S"), observables=obs, ) @@ -268,7 +281,8 @@ def _fetch_malware_bazaar() -> List[Case]: key = os.environ.get("THREATFOX_AUTH_KEY", "").strip() if not key: raise RuntimeError("THREATFOX_AUTH_KEY not set — abuse.ch auth-key from https://auth.abuse.ch/ also covers MalwareBazaar") - data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, json_body={"query": "get_recent", "selector": "100"}).json() + # MalwareBazaar expects form-encoded body (unlike ThreatFox which takes JSON). + data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, form_body={"query": "get_recent", "selector": "100"}).json() rows = data.get("data") or [] out: List[Case] = [] for r in rows: diff --git a/tests/test_scout.py b/tests/test_scout.py index 5f6587b..592a225 100644 --- a/tests/test_scout.py +++ b/tests/test_scout.py @@ -59,11 +59,11 @@ def test_feodo_record_to_case(): def test_threatfox_row_url_to_case(): row = { "id": "1234567", - "ioc_value": "http://1.2.3.4/x.bin", + "ioc": "http://1.2.3.4/x.bin", "ioc_type": "url", "threat_type": "payload_delivery", "malware_printable": "Cobalt Strike", - "first_seen_utc": "2026-05-19 10:00:00", + "first_seen": "2026-05-19 10:00:00", "confidence_level": 100, "tags": ["c2", "stager"], "reporter": "anon", @@ -81,11 +81,11 @@ def test_threatfox_row_url_to_case(): def test_threatfox_row_ip_port_to_case(): row = { "id": "9999", - "ioc_value": "5.6.7.8:443", + "ioc": "5.6.7.8:443", "ioc_type": "ip:port", "threat_type": "botnet_cc", "malware_printable": "Qakbot", - "first_seen_utc": "2026-05-18 10:00:00", + "first_seen": "2026-05-18 10:00:00", } case = _threatfox_row_to_case(row) assert case is not None @@ -93,7 +93,7 @@ def test_threatfox_row_ip_port_to_case(): def test_threatfox_row_rejects_unknown_type(): - assert _threatfox_row_to_case({"id": "1", "ioc_value": "x", "ioc_type": "ja3_fp"}) is None + assert _threatfox_row_to_case({"id": "1", "ioc": "x", "ioc_type": "ja3_fp"}) is None def test_malware_bazaar_row_to_case():