stage-19-fix: ThreatFox + MalwareBazaar — real API shape
Live test against abuse.ch revealed two issues with the stage-19 wiring: - ThreatFox returns `ioc` (not `ioc_value`) and `first_seen` (not `first_seen_utc`) — older field names from stale docs. Parser now reads the real names and falls back to the old aliases defensively. Also captures `malware_malpedia` (per-family writeup URL) and `threat_type_desc` for richer downstream prose. - MalwareBazaar's API expects form-encoded bodies, unlike ThreatFox's JSON. Extended _http with form_body=; MB fetcher switched to it. Verified live: 10 ThreatFox cases landed with mixed botnet/malware classification (4/6 split from threat_type signal — first real incident-type diversity from a single feed). 10 MalwareBazaar cases landed with sha256+sha1 hash observables and exe/file_type metadata. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -34,12 +34,21 @@ OTX_PULSES_API = "https://otx.alienvault.com/api/v1/pulses/subscribed"
|
||||
_log = log.get(__name__)
|
||||
|
||||
|
||||
def _http(method: str, url: str, headers: Optional[Dict[str, str]] = None, json_body: Optional[Dict[str, Any]] = None) -> httpx.Response:
|
||||
def _http(
|
||||
method: str,
|
||||
url: str,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
json_body: Optional[Dict[str, Any]] = None,
|
||||
form_body: Optional[Dict[str, Any]] = None,
|
||||
) -> httpx.Response:
|
||||
h = {"User-Agent": USER_AGENT}
|
||||
if headers:
|
||||
h.update(headers)
|
||||
with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client:
|
||||
if method.upper() == "POST":
|
||||
if form_body is not None:
|
||||
resp = client.post(url, data=form_body)
|
||||
else:
|
||||
resp = client.post(url, json=json_body)
|
||||
else:
|
||||
resp = client.get(url)
|
||||
@@ -168,7 +177,8 @@ THREATFOX_THREAT_TYPE: Dict[str, IncidentType] = {
|
||||
|
||||
|
||||
def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
|
||||
ioc_value = str(r.get("ioc_value") or "").strip()
|
||||
# API field is `ioc` (the `_value` alias is older docs); date is `first_seen`.
|
||||
ioc_value = str(r.get("ioc") or r.get("ioc_value") or "").strip()
|
||||
ioc_type = str(r.get("ioc_type") or "").lower()
|
||||
if not ioc_value or not ioc_type:
|
||||
return None
|
||||
@@ -197,6 +207,7 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
|
||||
|
||||
threat_label = threat_type.replace("_", " ") or "malware"
|
||||
summary = f"ThreatFox: {malware} {threat_label} — {ioc_value}"
|
||||
first_seen = str(r.get("first_seen") or r.get("first_seen_utc") or "")
|
||||
return Case(
|
||||
case_id=f"PSYC-THREATFOX-{r.get('id', '')}",
|
||||
summary=summary,
|
||||
@@ -205,13 +216,15 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
|
||||
source_metadata=dict(
|
||||
feed="threatfox",
|
||||
malware=malware,
|
||||
malware_malpedia=str(r.get("malware_malpedia") or ""),
|
||||
threat_type=threat_type,
|
||||
threat_type_desc=str(r.get("threat_type_desc") or ""),
|
||||
ioc_type=ioc_type,
|
||||
confidence_level=str(r.get("confidence_level", "")),
|
||||
tags=",".join(t for t in tags if t),
|
||||
reporter=str(r.get("reporter", "")),
|
||||
),
|
||||
observed_at=_parse_dt(str(r.get("first_seen_utc", "")), "%Y-%m-%d %H:%M:%S"),
|
||||
observed_at=_parse_dt(first_seen, "%Y-%m-%d %H:%M:%S"),
|
||||
observables=obs,
|
||||
)
|
||||
|
||||
@@ -268,7 +281,8 @@ def _fetch_malware_bazaar() -> List[Case]:
|
||||
key = os.environ.get("THREATFOX_AUTH_KEY", "").strip()
|
||||
if not key:
|
||||
raise RuntimeError("THREATFOX_AUTH_KEY not set — abuse.ch auth-key from https://auth.abuse.ch/ also covers MalwareBazaar")
|
||||
data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, json_body={"query": "get_recent", "selector": "100"}).json()
|
||||
# MalwareBazaar expects form-encoded body (unlike ThreatFox which takes JSON).
|
||||
data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, form_body={"query": "get_recent", "selector": "100"}).json()
|
||||
rows = data.get("data") or []
|
||||
out: List[Case] = []
|
||||
for r in rows:
|
||||
|
||||
@@ -59,11 +59,11 @@ def test_feodo_record_to_case():
|
||||
def test_threatfox_row_url_to_case():
|
||||
row = {
|
||||
"id": "1234567",
|
||||
"ioc_value": "http://1.2.3.4/x.bin",
|
||||
"ioc": "http://1.2.3.4/x.bin",
|
||||
"ioc_type": "url",
|
||||
"threat_type": "payload_delivery",
|
||||
"malware_printable": "Cobalt Strike",
|
||||
"first_seen_utc": "2026-05-19 10:00:00",
|
||||
"first_seen": "2026-05-19 10:00:00",
|
||||
"confidence_level": 100,
|
||||
"tags": ["c2", "stager"],
|
||||
"reporter": "anon",
|
||||
@@ -81,11 +81,11 @@ def test_threatfox_row_url_to_case():
|
||||
def test_threatfox_row_ip_port_to_case():
|
||||
row = {
|
||||
"id": "9999",
|
||||
"ioc_value": "5.6.7.8:443",
|
||||
"ioc": "5.6.7.8:443",
|
||||
"ioc_type": "ip:port",
|
||||
"threat_type": "botnet_cc",
|
||||
"malware_printable": "Qakbot",
|
||||
"first_seen_utc": "2026-05-18 10:00:00",
|
||||
"first_seen": "2026-05-18 10:00:00",
|
||||
}
|
||||
case = _threatfox_row_to_case(row)
|
||||
assert case is not None
|
||||
@@ -93,7 +93,7 @@ def test_threatfox_row_ip_port_to_case():
|
||||
|
||||
|
||||
def test_threatfox_row_rejects_unknown_type():
|
||||
assert _threatfox_row_to_case({"id": "1", "ioc_value": "x", "ioc_type": "ja3_fp"}) is None
|
||||
assert _threatfox_row_to_case({"id": "1", "ioc": "x", "ioc_type": "ja3_fp"}) is None
|
||||
|
||||
|
||||
def test_malware_bazaar_row_to_case():
|
||||
|
||||
Reference in New Issue
Block a user