From d7999150b3b2555d05e0c8746cbc32310b771e99 Mon Sep 17 00:00:00 2001 From: m17hr1l Date: Mon, 25 May 2026 16:56:27 +0200 Subject: [PATCH] stage-29: fetch-all resilience + Mozilla-compatible UA for CISA Two production-discovered fixes after first deploy: - CISA's CDN was 403'ing the "psyc/0.1 (defensive CTI; hackathon prototype)" User-Agent from the cloud.neuronetz.ai exit IP. Switched to a Mozilla-compatible UA that identifies us honestly while passing the CDN's UA filters. Overridable via PSYC_HTTP_USER_AGENT. - fetch-all aborted on the first HTTPStatusError, so a CISA hiccup killed the threatfox/malware-bazaar/otx legs that come after. The outer loop now catches any exception per-source, logs a skip, and moves on. Single-source failures no longer poison the rest of the pull. Co-Authored-By: Claude Opus 4.7 --- src/psyc/cli.py | 2 +- src/psyc/lines/scout.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/psyc/cli.py b/src/psyc/cli.py index 5a180b2..fbdccf8 100644 --- a/src/psyc/cli.py +++ b/src/psyc/cli.py @@ -114,7 +114,7 @@ def fetch_all() -> None: for source, limit in plan: try: _ingest(source, limit) - except RuntimeError as exc: + except Exception as exc: # noqa: BLE001 — keep going if one feed misbehaves typer.echo(f" skip {source}: {exc}", err=True) diff --git a/src/psyc/lines/scout.py b/src/psyc/lines/scout.py index e822a02..8c777ae 100644 --- a/src/psyc/lines/scout.py +++ b/src/psyc/lines/scout.py @@ -21,7 +21,13 @@ from psyc import log from psyc.models import Case, IncidentType, Observables -USER_AGENT = "psyc/0.1 (defensive CTI; hackathon prototype)" +# CISA's CDN 403s "exotic" UAs from some IPs; a Mozilla-compatible identifier +# is universally accepted and still identifies us honestly. Overridable via env +# if a feed ever wants a specific UA. +USER_AGENT = os.environ.get( + "PSYC_HTTP_USER_AGENT", + "Mozilla/5.0 (compatible; psyc/0.1; +https://psyc.neuronetz.ai)", +) HTTP_TIMEOUT = 30.0 URLHAUS_RECENT_CSV = "https://urlhaus.abuse.ch/downloads/csv_recent/"