stage-19-fix: ThreatFox + MalwareBazaar — real API shape

Live test against abuse.ch revealed two issues with the stage-19 wiring:

- ThreatFox returns `ioc` (not `ioc_value`) and `first_seen` (not
  `first_seen_utc`) — older field names from stale docs. Parser now reads
  the real names and falls back to the old aliases defensively. Also
  captures `malware_malpedia` (per-family writeup URL) and
  `threat_type_desc` for richer downstream prose.
- MalwareBazaar's API expects form-encoded bodies, unlike ThreatFox's
  JSON. Extended _http with form_body=; MB fetcher switched to it.

Verified live: 10 ThreatFox cases landed with mixed botnet/malware
classification (4/6 split from threat_type signal — first real
incident-type diversity from a single feed). 10 MalwareBazaar cases
landed with sha256+sha1 hash observables and exe/file_type metadata.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
m17hr1l
2026-05-20 22:25:56 +02:00
parent d87bd710bb
commit 85830be9fa
2 changed files with 24 additions and 10 deletions

View File

@@ -34,13 +34,22 @@ OTX_PULSES_API = "https://otx.alienvault.com/api/v1/pulses/subscribed"
_log = log.get(__name__) _log = log.get(__name__)
def _http(method: str, url: str, headers: Optional[Dict[str, str]] = None, json_body: Optional[Dict[str, Any]] = None) -> httpx.Response: def _http(
method: str,
url: str,
headers: Optional[Dict[str, str]] = None,
json_body: Optional[Dict[str, Any]] = None,
form_body: Optional[Dict[str, Any]] = None,
) -> httpx.Response:
h = {"User-Agent": USER_AGENT} h = {"User-Agent": USER_AGENT}
if headers: if headers:
h.update(headers) h.update(headers)
with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client: with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client:
if method.upper() == "POST": if method.upper() == "POST":
resp = client.post(url, json=json_body) if form_body is not None:
resp = client.post(url, data=form_body)
else:
resp = client.post(url, json=json_body)
else: else:
resp = client.get(url) resp = client.get(url)
resp.raise_for_status() resp.raise_for_status()
@@ -168,7 +177,8 @@ THREATFOX_THREAT_TYPE: Dict[str, IncidentType] = {
def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]: def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
ioc_value = str(r.get("ioc_value") or "").strip() # API field is `ioc` (the `_value` alias is older docs); date is `first_seen`.
ioc_value = str(r.get("ioc") or r.get("ioc_value") or "").strip()
ioc_type = str(r.get("ioc_type") or "").lower() ioc_type = str(r.get("ioc_type") or "").lower()
if not ioc_value or not ioc_type: if not ioc_value or not ioc_type:
return None return None
@@ -197,6 +207,7 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
threat_label = threat_type.replace("_", " ") or "malware" threat_label = threat_type.replace("_", " ") or "malware"
summary = f"ThreatFox: {malware} {threat_label}{ioc_value}" summary = f"ThreatFox: {malware} {threat_label}{ioc_value}"
first_seen = str(r.get("first_seen") or r.get("first_seen_utc") or "")
return Case( return Case(
case_id=f"PSYC-THREATFOX-{r.get('id', '')}", case_id=f"PSYC-THREATFOX-{r.get('id', '')}",
summary=summary, summary=summary,
@@ -205,13 +216,15 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
source_metadata=dict( source_metadata=dict(
feed="threatfox", feed="threatfox",
malware=malware, malware=malware,
malware_malpedia=str(r.get("malware_malpedia") or ""),
threat_type=threat_type, threat_type=threat_type,
threat_type_desc=str(r.get("threat_type_desc") or ""),
ioc_type=ioc_type, ioc_type=ioc_type,
confidence_level=str(r.get("confidence_level", "")), confidence_level=str(r.get("confidence_level", "")),
tags=",".join(t for t in tags if t), tags=",".join(t for t in tags if t),
reporter=str(r.get("reporter", "")), reporter=str(r.get("reporter", "")),
), ),
observed_at=_parse_dt(str(r.get("first_seen_utc", "")), "%Y-%m-%d %H:%M:%S"), observed_at=_parse_dt(first_seen, "%Y-%m-%d %H:%M:%S"),
observables=obs, observables=obs,
) )
@@ -268,7 +281,8 @@ def _fetch_malware_bazaar() -> List[Case]:
key = os.environ.get("THREATFOX_AUTH_KEY", "").strip() key = os.environ.get("THREATFOX_AUTH_KEY", "").strip()
if not key: if not key:
raise RuntimeError("THREATFOX_AUTH_KEY not set — abuse.ch auth-key from https://auth.abuse.ch/ also covers MalwareBazaar") raise RuntimeError("THREATFOX_AUTH_KEY not set — abuse.ch auth-key from https://auth.abuse.ch/ also covers MalwareBazaar")
data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, json_body={"query": "get_recent", "selector": "100"}).json() # MalwareBazaar expects form-encoded body (unlike ThreatFox which takes JSON).
data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, form_body={"query": "get_recent", "selector": "100"}).json()
rows = data.get("data") or [] rows = data.get("data") or []
out: List[Case] = [] out: List[Case] = []
for r in rows: for r in rows:

View File

@@ -59,11 +59,11 @@ def test_feodo_record_to_case():
def test_threatfox_row_url_to_case(): def test_threatfox_row_url_to_case():
row = { row = {
"id": "1234567", "id": "1234567",
"ioc_value": "http://1.2.3.4/x.bin", "ioc": "http://1.2.3.4/x.bin",
"ioc_type": "url", "ioc_type": "url",
"threat_type": "payload_delivery", "threat_type": "payload_delivery",
"malware_printable": "Cobalt Strike", "malware_printable": "Cobalt Strike",
"first_seen_utc": "2026-05-19 10:00:00", "first_seen": "2026-05-19 10:00:00",
"confidence_level": 100, "confidence_level": 100,
"tags": ["c2", "stager"], "tags": ["c2", "stager"],
"reporter": "anon", "reporter": "anon",
@@ -81,11 +81,11 @@ def test_threatfox_row_url_to_case():
def test_threatfox_row_ip_port_to_case(): def test_threatfox_row_ip_port_to_case():
row = { row = {
"id": "9999", "id": "9999",
"ioc_value": "5.6.7.8:443", "ioc": "5.6.7.8:443",
"ioc_type": "ip:port", "ioc_type": "ip:port",
"threat_type": "botnet_cc", "threat_type": "botnet_cc",
"malware_printable": "Qakbot", "malware_printable": "Qakbot",
"first_seen_utc": "2026-05-18 10:00:00", "first_seen": "2026-05-18 10:00:00",
} }
case = _threatfox_row_to_case(row) case = _threatfox_row_to_case(row)
assert case is not None assert case is not None
@@ -93,7 +93,7 @@ def test_threatfox_row_ip_port_to_case():
def test_threatfox_row_rejects_unknown_type(): def test_threatfox_row_rejects_unknown_type():
assert _threatfox_row_to_case({"id": "1", "ioc_value": "x", "ioc_type": "ja3_fp"}) is None assert _threatfox_row_to_case({"id": "1", "ioc": "x", "ioc_type": "ja3_fp"}) is None
def test_malware_bazaar_row_to_case(): def test_malware_bazaar_row_to_case():