diff --git a/src/psyc/cli.py b/src/psyc/cli.py index 5a180b2..fbdccf8 100644 --- a/src/psyc/cli.py +++ b/src/psyc/cli.py @@ -114,7 +114,7 @@ def fetch_all() -> None: for source, limit in plan: try: _ingest(source, limit) - except RuntimeError as exc: + except Exception as exc: # noqa: BLE001 — keep going if one feed misbehaves typer.echo(f" skip {source}: {exc}", err=True) diff --git a/src/psyc/lines/scout.py b/src/psyc/lines/scout.py index e822a02..8c777ae 100644 --- a/src/psyc/lines/scout.py +++ b/src/psyc/lines/scout.py @@ -21,7 +21,13 @@ from psyc import log from psyc.models import Case, IncidentType, Observables -USER_AGENT = "psyc/0.1 (defensive CTI; hackathon prototype)" +# CISA's CDN 403s "exotic" UAs from some IPs; a Mozilla-compatible identifier +# is universally accepted and still identifies us honestly. Overridable via env +# if a feed ever wants a specific UA. +USER_AGENT = os.environ.get( + "PSYC_HTTP_USER_AGENT", + "Mozilla/5.0 (compatible; psyc/0.1; +https://psyc.neuronetz.ai)", +) HTTP_TIMEOUT = 30.0 URLHAUS_RECENT_CSV = "https://urlhaus.abuse.ch/downloads/csv_recent/"