stage-19-fix: ThreatFox + MalwareBazaar — real API shape
Live test against abuse.ch revealed two issues with the stage-19 wiring: - ThreatFox returns `ioc` (not `ioc_value`) and `first_seen` (not `first_seen_utc`) — older field names from stale docs. Parser now reads the real names and falls back to the old aliases defensively. Also captures `malware_malpedia` (per-family writeup URL) and `threat_type_desc` for richer downstream prose. - MalwareBazaar's API expects form-encoded bodies, unlike ThreatFox's JSON. Extended _http with form_body=; MB fetcher switched to it. Verified live: 10 ThreatFox cases landed with mixed botnet/malware classification (4/6 split from threat_type signal — first real incident-type diversity from a single feed). 10 MalwareBazaar cases landed with sha256+sha1 hash observables and exe/file_type metadata. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -34,13 +34,22 @@ OTX_PULSES_API = "https://otx.alienvault.com/api/v1/pulses/subscribed"
|
||||
_log = log.get(__name__)
|
||||
|
||||
|
||||
def _http(method: str, url: str, headers: Optional[Dict[str, str]] = None, json_body: Optional[Dict[str, Any]] = None) -> httpx.Response:
|
||||
def _http(
|
||||
method: str,
|
||||
url: str,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
json_body: Optional[Dict[str, Any]] = None,
|
||||
form_body: Optional[Dict[str, Any]] = None,
|
||||
) -> httpx.Response:
|
||||
h = {"User-Agent": USER_AGENT}
|
||||
if headers:
|
||||
h.update(headers)
|
||||
with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client:
|
||||
if method.upper() == "POST":
|
||||
resp = client.post(url, json=json_body)
|
||||
if form_body is not None:
|
||||
resp = client.post(url, data=form_body)
|
||||
else:
|
||||
resp = client.post(url, json=json_body)
|
||||
else:
|
||||
resp = client.get(url)
|
||||
resp.raise_for_status()
|
||||
@@ -168,7 +177,8 @@ THREATFOX_THREAT_TYPE: Dict[str, IncidentType] = {
|
||||
|
||||
|
||||
def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
|
||||
ioc_value = str(r.get("ioc_value") or "").strip()
|
||||
# API field is `ioc` (the `_value` alias is older docs); date is `first_seen`.
|
||||
ioc_value = str(r.get("ioc") or r.get("ioc_value") or "").strip()
|
||||
ioc_type = str(r.get("ioc_type") or "").lower()
|
||||
if not ioc_value or not ioc_type:
|
||||
return None
|
||||
@@ -197,6 +207,7 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
|
||||
|
||||
threat_label = threat_type.replace("_", " ") or "malware"
|
||||
summary = f"ThreatFox: {malware} {threat_label} — {ioc_value}"
|
||||
first_seen = str(r.get("first_seen") or r.get("first_seen_utc") or "")
|
||||
return Case(
|
||||
case_id=f"PSYC-THREATFOX-{r.get('id', '')}",
|
||||
summary=summary,
|
||||
@@ -205,13 +216,15 @@ def _threatfox_row_to_case(r: Dict[str, Any]) -> Optional[Case]:
|
||||
source_metadata=dict(
|
||||
feed="threatfox",
|
||||
malware=malware,
|
||||
malware_malpedia=str(r.get("malware_malpedia") or ""),
|
||||
threat_type=threat_type,
|
||||
threat_type_desc=str(r.get("threat_type_desc") or ""),
|
||||
ioc_type=ioc_type,
|
||||
confidence_level=str(r.get("confidence_level", "")),
|
||||
tags=",".join(t for t in tags if t),
|
||||
reporter=str(r.get("reporter", "")),
|
||||
),
|
||||
observed_at=_parse_dt(str(r.get("first_seen_utc", "")), "%Y-%m-%d %H:%M:%S"),
|
||||
observed_at=_parse_dt(first_seen, "%Y-%m-%d %H:%M:%S"),
|
||||
observables=obs,
|
||||
)
|
||||
|
||||
@@ -268,7 +281,8 @@ def _fetch_malware_bazaar() -> List[Case]:
|
||||
key = os.environ.get("THREATFOX_AUTH_KEY", "").strip()
|
||||
if not key:
|
||||
raise RuntimeError("THREATFOX_AUTH_KEY not set — abuse.ch auth-key from https://auth.abuse.ch/ also covers MalwareBazaar")
|
||||
data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, json_body={"query": "get_recent", "selector": "100"}).json()
|
||||
# MalwareBazaar expects form-encoded body (unlike ThreatFox which takes JSON).
|
||||
data = _http("POST", MALWARE_BAZAAR_API, headers={"Auth-Key": key}, form_body={"query": "get_recent", "selector": "100"}).json()
|
||||
rows = data.get("data") or []
|
||||
out: List[Case] = []
|
||||
for r in rows:
|
||||
|
||||
Reference in New Issue
Block a user