stage-19-fix2: OTX — narrow by modified_since, longer timeout
The /pulses/subscribed endpoint enumerates every curated feed a fresh account is auto-subscribed to. On its own that's enough to 504 from OTX's backend regardless of client timeout. Narrowing by modified_since=now-7d brings the response back to a single-second fetch. Also: _http now accepts params + per-call timeout overrides (OTX uses 120s). The CLI --limit still slices post-fetch. Verified live: 10 OTX pulse-cases ingested, each carrying real paragraph-form descriptions (Mirai, macOS Stealer, FlowerStorm PhaaS, Vidar v1.5, manufacturing intrusion) — exactly the real-prose source the IOC extractor's been missing. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ from __future__ import annotations
|
|||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from typing import Any, Callable, Dict, Iterable, List, Optional
|
from typing import Any, Callable, Dict, Iterable, List, Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
@@ -40,18 +40,20 @@ def _http(
|
|||||||
headers: Optional[Dict[str, str]] = None,
|
headers: Optional[Dict[str, str]] = None,
|
||||||
json_body: Optional[Dict[str, Any]] = None,
|
json_body: Optional[Dict[str, Any]] = None,
|
||||||
form_body: Optional[Dict[str, Any]] = None,
|
form_body: Optional[Dict[str, Any]] = None,
|
||||||
|
params: Optional[Dict[str, Any]] = None,
|
||||||
|
timeout: float = HTTP_TIMEOUT,
|
||||||
) -> httpx.Response:
|
) -> httpx.Response:
|
||||||
h = {"User-Agent": USER_AGENT}
|
h = {"User-Agent": USER_AGENT}
|
||||||
if headers:
|
if headers:
|
||||||
h.update(headers)
|
h.update(headers)
|
||||||
with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client:
|
with httpx.Client(timeout=timeout, headers=h, follow_redirects=True) as client:
|
||||||
if method.upper() == "POST":
|
if method.upper() == "POST":
|
||||||
if form_body is not None:
|
if form_body is not None:
|
||||||
resp = client.post(url, data=form_body)
|
resp = client.post(url, data=form_body, params=params)
|
||||||
else:
|
else:
|
||||||
resp = client.post(url, json=json_body)
|
resp = client.post(url, json=json_body, params=params)
|
||||||
else:
|
else:
|
||||||
resp = client.get(url)
|
resp = client.get(url, params=params)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
@@ -352,7 +354,16 @@ def _fetch_otx() -> List[Case]:
|
|||||||
key = os.environ.get("OTX_API_KEY", "").strip()
|
key = os.environ.get("OTX_API_KEY", "").strip()
|
||||||
if not key:
|
if not key:
|
||||||
raise RuntimeError("OTX_API_KEY not set — free key at https://otx.alienvault.com → settings → API")
|
raise RuntimeError("OTX_API_KEY not set — free key at https://otx.alienvault.com → settings → API")
|
||||||
data = _http("GET", OTX_PULSES_API, headers={"X-OTX-API-KEY": key}).json()
|
# OTX subscribes a new account to many curated feeds, so the unfiltered
|
||||||
|
# /pulses/subscribed page can 504 on its own backend. modified_since
|
||||||
|
# narrows to recent pulses; page size 20 caps the response.
|
||||||
|
since = (datetime.now(timezone.utc) - timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
data = _http(
|
||||||
|
"GET", OTX_PULSES_API,
|
||||||
|
headers={"X-OTX-API-KEY": key},
|
||||||
|
params={"limit": 20, "modified_since": since},
|
||||||
|
timeout=120.0,
|
||||||
|
).json()
|
||||||
pulses = data.get("results") or []
|
pulses = data.get("results") or []
|
||||||
out: List[Case] = []
|
out: List[Case] = []
|
||||||
for p in pulses:
|
for p in pulses:
|
||||||
|
|||||||
Reference in New Issue
Block a user