stage-19-fix2: OTX — narrow by modified_since, longer timeout
The /pulses/subscribed endpoint enumerates every curated feed a fresh account is auto-subscribed to. On its own that's enough to 504 from OTX's backend regardless of client timeout. Narrowing by modified_since=now-7d brings the response back to a single-second fetch. Also: _http now accepts params + per-call timeout overrides (OTX uses 120s). The CLI --limit still slices post-fetch. Verified live: 10 OTX pulse-cases ingested, each carrying real paragraph-form descriptions (Mirai, macOS Stealer, FlowerStorm PhaaS, Vidar v1.5, manufacturing intrusion) — exactly the real-prose source the IOC extractor's been missing. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ from __future__ import annotations
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@@ -40,18 +40,20 @@ def _http(
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
json_body: Optional[Dict[str, Any]] = None,
|
||||
form_body: Optional[Dict[str, Any]] = None,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
timeout: float = HTTP_TIMEOUT,
|
||||
) -> httpx.Response:
|
||||
h = {"User-Agent": USER_AGENT}
|
||||
if headers:
|
||||
h.update(headers)
|
||||
with httpx.Client(timeout=HTTP_TIMEOUT, headers=h, follow_redirects=True) as client:
|
||||
with httpx.Client(timeout=timeout, headers=h, follow_redirects=True) as client:
|
||||
if method.upper() == "POST":
|
||||
if form_body is not None:
|
||||
resp = client.post(url, data=form_body)
|
||||
resp = client.post(url, data=form_body, params=params)
|
||||
else:
|
||||
resp = client.post(url, json=json_body)
|
||||
resp = client.post(url, json=json_body, params=params)
|
||||
else:
|
||||
resp = client.get(url)
|
||||
resp = client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
|
||||
@@ -352,7 +354,16 @@ def _fetch_otx() -> List[Case]:
|
||||
key = os.environ.get("OTX_API_KEY", "").strip()
|
||||
if not key:
|
||||
raise RuntimeError("OTX_API_KEY not set — free key at https://otx.alienvault.com → settings → API")
|
||||
data = _http("GET", OTX_PULSES_API, headers={"X-OTX-API-KEY": key}).json()
|
||||
# OTX subscribes a new account to many curated feeds, so the unfiltered
|
||||
# /pulses/subscribed page can 504 on its own backend. modified_since
|
||||
# narrows to recent pulses; page size 20 caps the response.
|
||||
since = (datetime.now(timezone.utc) - timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%S")
|
||||
data = _http(
|
||||
"GET", OTX_PULSES_API,
|
||||
headers={"X-OTX-API-KEY": key},
|
||||
params={"limit": 20, "modified_since": since},
|
||||
timeout=120.0,
|
||||
).json()
|
||||
pulses = data.get("results") or []
|
||||
out: List[Case] = []
|
||||
for p in pulses:
|
||||
|
||||
Reference in New Issue
Block a user