stage-23: IOC index + lookup — the actionable keystone

New iocs table (value, type, case_id, feed, severity, first_seen) +
lines/lookup.py: normalize() (CVE upper, rest lower), reindex() to
rebuild from the corpus, lookup() (normalization-insensitive, scans all
types), export_blocklist() (deduped, min-severity filter).

CLI: psyc reindex / lookup <indicator> / export-blocklist --type --min-severity.

Verified on the live corpus: 1288 IOCs from 598 cases; lookup of a real
IP/CVE resolves to its case+feed+severity; 8.8.8.8 correctly misses;
blocklist export yields 148 IPs / 289 domains / 150 URLs / 514 hashes /
108 CVEs. This primitive backs the upcoming search UI, asset matching,
and watchlist alerting.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
m17hr1l
2026-05-22 23:39:05 +02:00
parent f88db2fdf7
commit 9a2a31ec9a
4 changed files with 272 additions and 1 deletions

View File

@@ -13,7 +13,7 @@ from psyc import db, log
load_dotenv() # per-dev .env (API keys) is loaded into os.environ for venv CLI
from psyc.cockpit import inference
from psyc.lines import classify, courier, proof, route, scout, seal, train
from psyc.lines import classify, courier, lookup, proof, route, scout, seal, train
from psyc.lines import map as map_line
from psyc.models import Outcome
from psyc.result import Err, Ok
@@ -357,6 +357,45 @@ def reject(
typer.echo(f"rejected #{pending_id}{(': ' + reason) if reason else ''}")
@app.command("reindex")
def reindex() -> None:
"""Rebuild the IOC index from all cases."""
db.init_db() # ensure the iocs table exists (idempotent)
cases = db.list_cases(limit=1_000_000)
n = lookup.reindex(cases)
typer.echo(f"indexed {n} IOC(s) from {len(cases)} case(s). total: {db.ioc_count()}")
@app.command("lookup")
def lookup_ioc(value: str = typer.Argument(..., help="indicator: IP, domain, URL, hash, or CVE")) -> None:
"""Look up an indicator across the case corpus."""
rows = lookup.lookup(value)
if not rows:
typer.echo(f"'{value}' — not found in the corpus (no known-bad match)")
return
typer.echo(f"'{value}'{len(rows)} match(es):")
for r in rows:
sev = r["severity"] or "?"
typer.echo(f" [{r['ioc_type']}] {r['case_id']} feed={r['feed'] or '?'} severity={sev} seen={(r['first_seen'] or '')[:10]}")
@app.command("export-blocklist")
def export_blocklist(
ioc_type: str = typer.Option("ip", "--type", "-t", help=f"one of: {', '.join(lookup.IOC_TYPES)}"),
min_severity: str = typer.Option("", "--min-severity", help="low | medium | high | critical"),
out: str = typer.Option("", "--out", help="write to file instead of stdout"),
) -> None:
"""Emit a deduplicated blocklist of indicators (firewall/DNS/SIEM ingestion)."""
values = lookup.export_blocklist(ioc_type, min_severity or None)
text = "\n".join(values)
if out:
from pathlib import Path as _Path
_Path(out).write_text(text + "\n", encoding="utf-8")
typer.echo(f"wrote {len(values)} {ioc_type}(s) → {out}")
else:
typer.echo(text)
@app.command("mock-cert")
def mock_cert_serve(host: str = "127.0.0.1", port: int = 8770) -> None:
uvicorn.run("psyc.mock_cert:app", host=host, port=port)

View File

@@ -82,6 +82,20 @@ pending = Table(
Index("pending_status_idx", pending.c.status)
Index("pending_case_idx", pending.c.case_id)
iocs = Table(
"iocs", _metadata,
Column("id", Integer, primary_key=True, autoincrement=True),
Column("value", String, nullable=False), # normalized indicator
Column("ioc_type", String, nullable=False), # url | domain | ip | hash | cve
Column("case_id", String, nullable=False),
Column("feed", String, nullable=True),
Column("severity", String, nullable=True),
Column("first_seen", String, nullable=True),
)
Index("iocs_value_idx", iocs.c.value)
Index("iocs_type_idx", iocs.c.ioc_type)
Index("iocs_case_idx", iocs.c.case_id)
_log = log.get(__name__)
_engine: Optional[Engine] = None
@@ -151,3 +165,34 @@ def case_count(db_path: Path = DB_PATH) -> int:
stmt = select(func.count()).select_from(cases)
with engine(db_path).connect() as conn:
return conn.execute(stmt).scalar_one()
# ---------- IOC index ----------------------------------------------------
def replace_iocs(rows: List[dict], db_path: Path = DB_PATH) -> int:
"""Rebuild the IOC index: clear it, then bulk-insert rows. Returns count."""
with engine(db_path).begin() as conn:
conn.execute(iocs.delete())
if rows:
conn.execute(iocs.insert(), rows)
return len(rows)
def find_iocs(value: str, db_path: Path = DB_PATH) -> List[dict]:
"""Exact-match lookup of one normalized indicator. Returns matching index rows."""
stmt = select(iocs).where(iocs.c.value == value).order_by(iocs.c.first_seen.desc())
with engine(db_path).connect() as conn:
return [dict(r._mapping) for r in conn.execute(stmt).fetchall()]
def iocs_by_type(ioc_type: str, db_path: Path = DB_PATH) -> List[dict]:
"""All index rows of one type, newest first — caller filters/dedupes."""
stmt = select(iocs).where(iocs.c.ioc_type == ioc_type).order_by(iocs.c.first_seen.desc())
with engine(db_path).connect() as conn:
return [dict(r._mapping) for r in conn.execute(stmt).fetchall()]
def ioc_count(db_path: Path = DB_PATH) -> int:
stmt = select(func.count()).select_from(iocs)
with engine(db_path).connect() as conn:
return conn.execute(stmt).scalar_one()

102
src/psyc/lines/lookup.py Normal file
View File

@@ -0,0 +1,102 @@
"""Lookupline — IOC index over the case corpus.
Turns the collected cases into a reverse index: indicator -> which cases,
feeds, and severities mention it. This is the shared primitive behind
"paste an indicator, is it known-bad?", asset matching, and blocklist export.
Indicators are normalized so lookups are case- and format-insensitive.
"""
from __future__ import annotations
from typing import Dict, Iterable, List, Optional, Tuple
from psyc import db, log
from psyc.models import Case
_log = log.get(__name__)
# severity ordering for min-severity filters
_SEVERITY_RANK: Dict[str, int] = {"low": 0, "medium": 1, "high": 2, "critical": 3}
IOC_TYPES = ("url", "domain", "ip", "hash", "cve")
def normalize(value: str, ioc_type: str) -> str:
"""Normalize an indicator for storage + lookup. CVEs upper, everything else lower."""
v = value.strip()
if ioc_type == "cve":
return v.upper()
return v.lower()
def iter_case_iocs(case: Case) -> Iterable[Tuple[str, str]]:
"""Yield (normalized_value, ioc_type) for every observable on a case."""
obs = case.observables
for u in obs.urls:
yield normalize(u, "url"), "url"
for d in obs.domains:
yield normalize(d, "domain"), "domain"
for ip in obs.ips:
yield normalize(ip, "ip"), "ip"
for h in obs.hashes:
yield normalize(h, "hash"), "hash"
for c in obs.cves:
yield normalize(c, "cve"), "cve"
def reindex(cases: Iterable[Case]) -> int:
"""Rebuild the whole IOC index from the given cases. Returns rows written."""
rows: List[dict] = []
seen: set = set()
for case in cases:
feed = case.source_metadata.get("feed")
sev = case.classification.severity.value if case.classification.severity else None
first_seen = case.observed_at.isoformat() if case.observed_at else None
for value, ioc_type in iter_case_iocs(case):
if not value:
continue
key = (value, ioc_type, case.case_id)
if key in seen:
continue
seen.add(key)
rows.append(dict(
value=value, ioc_type=ioc_type, case_id=case.case_id,
feed=feed, severity=sev, first_seen=first_seen,
))
written = db.replace_iocs(rows)
_log.info("lookup.reindexed", iocs=written, cases=len(seen))
return written
def lookup(value: str) -> List[dict]:
"""Look up one indicator across all types. Returns matching index rows (may be empty)."""
# Try every type's normalization so callers don't need to know the type.
candidates = {normalize(value, t) for t in IOC_TYPES}
out: List[dict] = []
seen_ids: set = set()
for cand in candidates:
for row in db.find_iocs(cand):
if row["id"] not in seen_ids:
seen_ids.add(row["id"])
out.append(row)
return out
def export_blocklist(ioc_type: str, min_severity: Optional[str] = None) -> List[str]:
"""Distinct indicator values of one type, optionally filtered by min severity."""
if ioc_type not in IOC_TYPES:
raise ValueError(f"unknown ioc_type: {ioc_type}; choices: {', '.join(IOC_TYPES)}")
floor = _SEVERITY_RANK.get(min_severity, -1) if min_severity else -1
values: List[str] = []
seen: set = set()
for row in db.iocs_by_type(ioc_type):
if floor >= 0:
rank = _SEVERITY_RANK.get(row["severity"] or "", -1)
if rank < floor:
continue
v = row["value"]
if v not in seen:
seen.add(v)
values.append(v)
return values