stage-33a pulse: scheduler module with pipeline registry
This commit is contained in:
380
src/psyc/lines/pulse.py
Normal file
380
src/psyc/lines/pulse.py
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
"""Pulseline — cron-style scheduler that drives every psyc pipeline on a cadence.
|
||||||
|
|
||||||
|
Each registered pipeline has an autonomy mode (manual / auto-propose /
|
||||||
|
auto-execute) and a cadence in seconds. tick() iterates every pipeline and
|
||||||
|
fires whichever ones are due (and enabled, and not manual, and the global kill
|
||||||
|
switch is off). State persists to SQLite via psyc.db so cadences survive
|
||||||
|
restarts. A background asyncio loop calls tick() at a fixed interval — the
|
||||||
|
cockpit lifespan attaches it.
|
||||||
|
|
||||||
|
NOTE: federation pipelines (peer-pull, vouch-refresh) are wired as placeholders
|
||||||
|
that return a no-op string. Real federation lands in a later stage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import traceback
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from psyc import db, log
|
||||||
|
|
||||||
|
|
||||||
|
_log = log.get(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PulseMode(str, Enum):
|
||||||
|
MANUAL = "manual"
|
||||||
|
AUTO_PROPOSE = "auto-propose"
|
||||||
|
AUTO_EXECUTE = "auto-execute"
|
||||||
|
|
||||||
|
|
||||||
|
class Pipeline(BaseModel):
|
||||||
|
name: str
|
||||||
|
title: str
|
||||||
|
description: str
|
||||||
|
mode: PulseMode
|
||||||
|
cadence_seconds: int
|
||||||
|
enabled: bool = True
|
||||||
|
last_fired: Optional[datetime] = None
|
||||||
|
next_fire: Optional[datetime] = None
|
||||||
|
last_result: str = ""
|
||||||
|
last_outcome: str = "" # "ok" | "err" | "skipped" | ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- pipeline runners --------------------------------------------------
|
||||||
|
|
||||||
|
def _run_fetch() -> str:
|
||||||
|
"""Fetch every enabled scout source; partial fetch is fine.
|
||||||
|
|
||||||
|
Skip-on-fail is critical: keyed feeds (threatfox, malware-bazaar, otx)
|
||||||
|
raise when their key isn't configured — we don't want one missing key to
|
||||||
|
block the public ones.
|
||||||
|
"""
|
||||||
|
from psyc.lines import scout
|
||||||
|
|
||||||
|
plan: Tuple[Tuple[str, Optional[int]], ...] = (
|
||||||
|
("urlhaus", 50),
|
||||||
|
("cisa-kev", 100),
|
||||||
|
("feodo", 50),
|
||||||
|
("threatfox", 200),
|
||||||
|
("malware-bazaar", 100),
|
||||||
|
("otx", 100),
|
||||||
|
)
|
||||||
|
total = 0
|
||||||
|
feeds_ok = 0
|
||||||
|
feeds_err: List[str] = []
|
||||||
|
for source, limit in plan:
|
||||||
|
try:
|
||||||
|
cases = scout.fetch_and_signal(source, limit=limit)
|
||||||
|
for c in cases:
|
||||||
|
db.upsert_case(c)
|
||||||
|
total += len(cases)
|
||||||
|
feeds_ok += 1
|
||||||
|
except Exception as exc: # noqa: BLE001 — partial fetch is the point
|
||||||
|
feeds_err.append(source)
|
||||||
|
_log.info("pulse.fetch.skip", source=source, error=str(exc)[:200])
|
||||||
|
tail = f" (skipped: {', '.join(feeds_err)})" if feeds_err else ""
|
||||||
|
return f"fetched {total} cases across {feeds_ok} feed(s){tail}"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_classify() -> str:
|
||||||
|
from psyc.lines import classify
|
||||||
|
|
||||||
|
cases = db.list_cases(limit=10_000)
|
||||||
|
n = 0
|
||||||
|
for c in cases:
|
||||||
|
classify.classify(c)
|
||||||
|
db.upsert_case(c)
|
||||||
|
n += 1
|
||||||
|
return f"classified {n} case(s)"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_prove() -> str:
|
||||||
|
from psyc.lines import proof
|
||||||
|
|
||||||
|
cases = db.list_cases(limit=10_000)
|
||||||
|
n = 0
|
||||||
|
for c in cases:
|
||||||
|
proof.prove(c)
|
||||||
|
db.upsert_case(c)
|
||||||
|
n += 1
|
||||||
|
return f"proved {n} case(s)"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_reindex() -> str:
|
||||||
|
from psyc.lines import lookup
|
||||||
|
|
||||||
|
cases = db.list_cases(limit=1_000_000)
|
||||||
|
written = lookup.reindex(cases)
|
||||||
|
return f"indexed {written} IOC(s) from {len(cases)} case(s)"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_respond_propose() -> str:
|
||||||
|
"""Propose response actions for high-severity cases that don't yet have any.
|
||||||
|
|
||||||
|
Strictly proposal-only: nothing is dispatched. The respond.propose_for_case
|
||||||
|
helper is already idempotent per case (skips cases that already have
|
||||||
|
actions), so re-running this on a tick is safe.
|
||||||
|
"""
|
||||||
|
from psyc.lines import respond
|
||||||
|
|
||||||
|
cases = db.list_cases(limit=10_000)
|
||||||
|
proposed = 0
|
||||||
|
touched = 0
|
||||||
|
for c in cases:
|
||||||
|
ids = respond.propose_for_case(c)
|
||||||
|
if ids:
|
||||||
|
proposed += len(ids)
|
||||||
|
touched += 1
|
||||||
|
return f"proposed {proposed} action(s) for {touched} case(s)"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_peer_pull() -> str:
|
||||||
|
return "federation not yet active"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_vouch_refresh() -> str:
|
||||||
|
return "federation not yet active"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- registry ----------------------------------------------------------
|
||||||
|
|
||||||
|
_REGISTRY: Dict[str, Callable[[], str]] = {
|
||||||
|
"fetch": _run_fetch,
|
||||||
|
"classify": _run_classify,
|
||||||
|
"prove": _run_prove,
|
||||||
|
"reindex": _run_reindex,
|
||||||
|
"respond": _run_respond_propose,
|
||||||
|
"peer-pull": _run_peer_pull,
|
||||||
|
"vouch-refresh": _run_vouch_refresh,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Initial defaults — seeded once on first DB init. Tuples of
|
||||||
|
# (name, title, description, mode, cadence_seconds, enabled).
|
||||||
|
_DEFAULTS: Tuple[Tuple[str, str, str, PulseMode, int, bool], ...] = (
|
||||||
|
("fetch", "Scout · fetch feeds", "Pull every configured threat feed and ingest new cases.",
|
||||||
|
PulseMode.AUTO_EXECUTE, 900, True),
|
||||||
|
("classify", "Classify · label cases", "Assign incident type, severity, TLP, and internal class to every case.",
|
||||||
|
PulseMode.AUTO_EXECUTE, 300, True),
|
||||||
|
("prove", "Proof · score confidence", "Compute confidence (reliability · credibility · freshness) for every case.",
|
||||||
|
PulseMode.AUTO_EXECUTE, 300, True),
|
||||||
|
("reindex", "Lookup · rebuild IOC index","Rebuild the IOC reverse-index over the case corpus.",
|
||||||
|
PulseMode.AUTO_EXECUTE, 3600, True),
|
||||||
|
("respond", "Respond · propose actions", "Propose human-gated response actions for newly high-severity cases.",
|
||||||
|
PulseMode.AUTO_PROPOSE, 600, True),
|
||||||
|
("peer-pull", "Federation · peer pull", "(placeholder) Pull sealed cases from federated peers.",
|
||||||
|
PulseMode.MANUAL, 600, False),
|
||||||
|
("vouch-refresh","Federation · vouch refresh","(placeholder) Refresh per-peer vouching ledgers.",
|
||||||
|
PulseMode.MANUAL, 3600, False),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- helpers -----------------------------------------------------------
|
||||||
|
|
||||||
|
def _now() -> datetime:
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_dt(value: Optional[str]) -> Optional[datetime]:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(value)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_pipeline(row: dict) -> Pipeline:
|
||||||
|
return Pipeline(
|
||||||
|
name=row["name"],
|
||||||
|
title=row["title"],
|
||||||
|
description=row["description"],
|
||||||
|
mode=PulseMode(row["mode"]),
|
||||||
|
cadence_seconds=int(row["cadence_seconds"]),
|
||||||
|
enabled=bool(row["enabled"]),
|
||||||
|
last_fired=_parse_dt(row.get("last_fired")),
|
||||||
|
next_fire=_parse_dt(row.get("next_fire")),
|
||||||
|
last_result=row.get("last_result") or "",
|
||||||
|
last_outcome=row.get("last_outcome") or "",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pipeline_to_row(p: Pipeline) -> dict:
|
||||||
|
return dict(
|
||||||
|
name=p.name,
|
||||||
|
title=p.title,
|
||||||
|
description=p.description,
|
||||||
|
mode=p.mode.value,
|
||||||
|
cadence_seconds=int(p.cadence_seconds),
|
||||||
|
enabled=bool(p.enabled),
|
||||||
|
last_fired=p.last_fired.isoformat() if p.last_fired else None,
|
||||||
|
next_fire=p.next_fire.isoformat() if p.next_fire else None,
|
||||||
|
last_result=p.last_result or "",
|
||||||
|
last_outcome=p.last_outcome or "",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def seed_defaults() -> None:
|
||||||
|
"""Insert any default pipelines that aren't already in the DB. Idempotent."""
|
||||||
|
existing = {row["name"] for row in db.get_pulse_state()}
|
||||||
|
for name, title, desc, mode, cadence, enabled in _DEFAULTS:
|
||||||
|
if name in existing:
|
||||||
|
continue
|
||||||
|
p = Pipeline(
|
||||||
|
name=name, title=title, description=desc,
|
||||||
|
mode=mode, cadence_seconds=cadence, enabled=enabled,
|
||||||
|
next_fire=_now(), # first tick after install fires immediately if due
|
||||||
|
)
|
||||||
|
db.upsert_pulse_pipeline(_pipeline_to_row(p))
|
||||||
|
_log.info("pulse.defaults.seeded", count=len(_DEFAULTS))
|
||||||
|
|
||||||
|
|
||||||
|
def state() -> List[Pipeline]:
|
||||||
|
"""Every pipeline, ordered by name. Seeds defaults on the first call."""
|
||||||
|
rows = db.get_pulse_state()
|
||||||
|
if not rows:
|
||||||
|
seed_defaults()
|
||||||
|
rows = db.get_pulse_state()
|
||||||
|
return [_row_to_pipeline(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pipeline(name: str) -> Optional[Pipeline]:
|
||||||
|
for p in state():
|
||||||
|
if p.name == name:
|
||||||
|
return p
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def set_mode(name: str, mode: PulseMode) -> None:
|
||||||
|
p = _get_pipeline(name)
|
||||||
|
if p is None:
|
||||||
|
raise ValueError(f"unknown pipeline: {name}")
|
||||||
|
p.mode = mode
|
||||||
|
db.upsert_pulse_pipeline(_pipeline_to_row(p))
|
||||||
|
|
||||||
|
|
||||||
|
def set_cadence(name: str, seconds: int) -> None:
|
||||||
|
if seconds <= 0:
|
||||||
|
raise ValueError("cadence must be > 0 seconds")
|
||||||
|
p = _get_pipeline(name)
|
||||||
|
if p is None:
|
||||||
|
raise ValueError(f"unknown pipeline: {name}")
|
||||||
|
p.cadence_seconds = int(seconds)
|
||||||
|
db.upsert_pulse_pipeline(_pipeline_to_row(p))
|
||||||
|
|
||||||
|
|
||||||
|
def set_enabled(name: str, enabled: bool) -> None:
|
||||||
|
p = _get_pipeline(name)
|
||||||
|
if p is None:
|
||||||
|
raise ValueError(f"unknown pipeline: {name}")
|
||||||
|
p.enabled = bool(enabled)
|
||||||
|
db.upsert_pulse_pipeline(_pipeline_to_row(p))
|
||||||
|
|
||||||
|
|
||||||
|
def set_kill_switch(armed: bool) -> None:
|
||||||
|
db.kill_switch_set(armed)
|
||||||
|
_log.warning("pulse.killswitch.changed", armed=bool(armed))
|
||||||
|
|
||||||
|
|
||||||
|
def kill_switch_state() -> bool:
|
||||||
|
return db.kill_switch_get()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- the heartbeat -----------------------------------------------------
|
||||||
|
|
||||||
|
def _fire(p: Pipeline) -> Tuple[str, str]:
|
||||||
|
"""Run one pipeline. Returns (outcome, result_str). Persists the timestamps.
|
||||||
|
|
||||||
|
Outcome is "ok" if the runner returned, "err" if it raised.
|
||||||
|
"""
|
||||||
|
runner = _REGISTRY.get(p.name)
|
||||||
|
if runner is None:
|
||||||
|
outcome = "err"
|
||||||
|
result = f"no runner registered for '{p.name}'"
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
result = runner() or ""
|
||||||
|
outcome = "ok"
|
||||||
|
except Exception as exc: # noqa: BLE001 — log + record, don't crash the loop
|
||||||
|
outcome = "err"
|
||||||
|
result = f"{type(exc).__name__}: {exc}"
|
||||||
|
_log.warning("pulse.fire.error", name=p.name, error=result, trace=traceback.format_exc())
|
||||||
|
|
||||||
|
now = _now()
|
||||||
|
p.last_fired = now
|
||||||
|
p.next_fire = now + timedelta(seconds=max(1, p.cadence_seconds))
|
||||||
|
p.last_result = result[:500]
|
||||||
|
p.last_outcome = outcome
|
||||||
|
db.upsert_pulse_pipeline(_pipeline_to_row(p))
|
||||||
|
_log.info("pulse.fired", name=p.name, outcome=outcome, result=p.last_result)
|
||||||
|
return outcome, p.last_result
|
||||||
|
|
||||||
|
|
||||||
|
def _should_fire(p: Pipeline, now: datetime) -> bool:
|
||||||
|
if not p.enabled:
|
||||||
|
return False
|
||||||
|
if p.mode == PulseMode.MANUAL:
|
||||||
|
return False
|
||||||
|
if p.next_fire is None:
|
||||||
|
return True
|
||||||
|
return now >= p.next_fire
|
||||||
|
|
||||||
|
|
||||||
|
def tick() -> List[Tuple[str, str, str]]:
|
||||||
|
"""Single scheduler heartbeat. Returns (name, outcome, result_str) per pipeline.
|
||||||
|
|
||||||
|
Outcome is "ok" / "err" / "skipped" — every registered pipeline appears in
|
||||||
|
the return value so callers can see what was skipped and why.
|
||||||
|
"""
|
||||||
|
if kill_switch_state():
|
||||||
|
_log.info("pulse.tick.killed")
|
||||||
|
return [(p.name, "skipped", "kill switch armed") for p in state()]
|
||||||
|
|
||||||
|
now = _now()
|
||||||
|
out: List[Tuple[str, str, str]] = []
|
||||||
|
for p in state():
|
||||||
|
if not _should_fire(p, now):
|
||||||
|
out.append((p.name, "skipped", "not due"))
|
||||||
|
continue
|
||||||
|
outcome, result = _fire(p)
|
||||||
|
out.append((p.name, outcome, result))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def run_now(name: str) -> Tuple[str, str]:
|
||||||
|
"""Manually fire one pipeline, bypassing cadence and mode. Honors kill switch.
|
||||||
|
|
||||||
|
Returns (outcome, result_str). Raises ValueError on unknown name.
|
||||||
|
"""
|
||||||
|
if kill_switch_state():
|
||||||
|
return ("skipped", "kill switch armed")
|
||||||
|
p = _get_pipeline(name)
|
||||||
|
if p is None:
|
||||||
|
raise ValueError(f"unknown pipeline: {name}")
|
||||||
|
return _fire(p)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- background loop ---------------------------------------------------
|
||||||
|
|
||||||
|
async def start_background_loop(interval_seconds: int = 30) -> None:
|
||||||
|
"""Long-running scheduler — calls tick() every interval. Launched from FastAPI lifespan.
|
||||||
|
|
||||||
|
Designed to run for the life of the process; cancellation is the normal stop signal.
|
||||||
|
"""
|
||||||
|
_log.info("pulse.loop.starting", interval=interval_seconds)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
tick()
|
||||||
|
except Exception as exc: # noqa: BLE001 — one bad tick must not kill the loop
|
||||||
|
_log.warning("pulse.loop.tick.error", error=str(exc), trace=traceback.format_exc())
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(interval_seconds)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
_log.info("pulse.loop.cancelled")
|
||||||
|
raise
|
||||||
Reference in New Issue
Block a user