stage-trans-a translog: append-only signed merkle chain + tests
translog.append computes
sha256(canonical({prev_hash, entry_type, entry_data, timestamp})) and
writes one row per call; the first entry uses prev_hash = "0"*64.
verify_chain walks rows in id order, re-hashes each, and returns
Err("broken at id=X expected=... got=...") on the first mismatch — so
tampering with either entry_data or prev_hash invalidates every
downstream row. recent / entries_after / head support peer sync and UI.
Tests cover: genesis prev_hash, chained prev_hash, full-chain verify,
tampered-data detection, tampered-prev_hash detection, slicing.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
161
src/psyc/lines/translog.py
Normal file
161
src/psyc/lines/translog.py
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
"""Transparency log — append-only signed merkle chain over federation signals.
|
||||||
|
|
||||||
|
Every signal we receive from a peer (case, IOC, or accepted vouch) is appended
|
||||||
|
as one `LogEntry`. Each entry's `entry_hash = sha256(canonical(prev_hash +
|
||||||
|
entry_type + entry_data + timestamp))` references the previous head, so any
|
||||||
|
tampering with a historical row invalidates every subsequent hash. The chain
|
||||||
|
is public — auditors can re-fetch it and re-run `verify_chain` to detect a
|
||||||
|
node that quietly mutated history (e.g. to hide a bad signal it accepted).
|
||||||
|
|
||||||
|
Hash format: lowercase hex SHA-256 of the canonical JSON of
|
||||||
|
``{"prev_hash": "...", "entry_type": "...", "entry_data": {...}, "timestamp": "..."}``.
|
||||||
|
Genesis entries use ``prev_hash = "0" * 64``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from psyc import db, log
|
||||||
|
from psyc.result import Err, Ok, Result
|
||||||
|
|
||||||
|
|
||||||
|
_log = log.get(__name__)
|
||||||
|
|
||||||
|
GENESIS_PREV_HASH = "0" * 64
|
||||||
|
|
||||||
|
|
||||||
|
class LogEntry(BaseModel):
|
||||||
|
id: int
|
||||||
|
prev_hash: str
|
||||||
|
entry_type: str
|
||||||
|
entry_data: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
timestamp: str
|
||||||
|
entry_hash: str
|
||||||
|
|
||||||
|
|
||||||
|
def _canonical_json(obj: Dict[str, Any]) -> bytes:
|
||||||
|
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def compute_entry_hash(prev_hash: str, entry_type: str, entry_data: Dict[str, Any], timestamp: str) -> str:
|
||||||
|
"""Hex SHA-256 of canonical(prev_hash + entry_type + entry_data + timestamp)."""
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"prev_hash": prev_hash,
|
||||||
|
"entry_type": entry_type,
|
||||||
|
"entry_data": entry_data,
|
||||||
|
"timestamp": timestamp,
|
||||||
|
}
|
||||||
|
return hashlib.sha256(_canonical_json(payload)).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_entry(row: Dict[str, Any]) -> LogEntry:
|
||||||
|
raw = row.get("entry_data") or "{}"
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
data = {}
|
||||||
|
return LogEntry(
|
||||||
|
id=int(row["id"]),
|
||||||
|
prev_hash=str(row["prev_hash"]),
|
||||||
|
entry_type=str(row["entry_type"]),
|
||||||
|
entry_data=data if isinstance(data, dict) else {},
|
||||||
|
timestamp=str(row["timestamp"]),
|
||||||
|
entry_hash=str(row["entry_hash"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def head(db_path: Path = db.DB_PATH) -> Optional[LogEntry]:
|
||||||
|
"""Latest log entry, or None if the chain is empty."""
|
||||||
|
row = db.translog_head(db_path=db_path)
|
||||||
|
return _row_to_entry(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def append(entry_type: str, entry_data: Dict[str, Any], db_path: Path = db.DB_PATH) -> LogEntry:
|
||||||
|
"""Atomically append one entry to the chain. Returns the persisted entry."""
|
||||||
|
prev = db.translog_head(db_path=db_path)
|
||||||
|
prev_hash = str(prev["entry_hash"]) if prev else GENESIS_PREV_HASH
|
||||||
|
timestamp = datetime.now(timezone.utc).isoformat()
|
||||||
|
entry_hash = compute_entry_hash(prev_hash, entry_type, entry_data, timestamp)
|
||||||
|
new_id = db.translog_append(
|
||||||
|
dict(
|
||||||
|
prev_hash=prev_hash,
|
||||||
|
entry_type=entry_type,
|
||||||
|
entry_data=json.dumps(entry_data, sort_keys=True),
|
||||||
|
timestamp=timestamp,
|
||||||
|
entry_hash=entry_hash,
|
||||||
|
),
|
||||||
|
db_path=db_path,
|
||||||
|
)
|
||||||
|
_log.info("translog.append", id=new_id, entry_type=entry_type, hash=entry_hash[:12])
|
||||||
|
return LogEntry(
|
||||||
|
id=new_id,
|
||||||
|
prev_hash=prev_hash,
|
||||||
|
entry_type=entry_type,
|
||||||
|
entry_data=entry_data,
|
||||||
|
timestamp=timestamp,
|
||||||
|
entry_hash=entry_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def verify_chain(start: int = 0, end: Optional[int] = None, db_path: Path = db.DB_PATH) -> Result[int, str]:
|
||||||
|
"""Walk entries [start, end] in id order, recompute each hash, compare.
|
||||||
|
|
||||||
|
Returns Ok(n_verified) when every entry's recomputed hash equals the
|
||||||
|
stored one and each prev_hash matches the previous entry's stored hash.
|
||||||
|
Returns Err with the offending id + expected/got hashes otherwise.
|
||||||
|
"""
|
||||||
|
rows = db.translog_range(start=start, end=end, db_path=db_path)
|
||||||
|
if not rows:
|
||||||
|
return Ok(0)
|
||||||
|
# Establish the prior hash anchor — either genesis (if walking from id=1)
|
||||||
|
# or the entry just before `start`.
|
||||||
|
first_id = int(rows[0]["id"])
|
||||||
|
if first_id <= 1:
|
||||||
|
prior_hash = GENESIS_PREV_HASH
|
||||||
|
else:
|
||||||
|
anchor = db.translog_get(first_id - 1, db_path=db_path)
|
||||||
|
if anchor is None:
|
||||||
|
return Err(f"missing anchor entry id={first_id - 1}")
|
||||||
|
prior_hash = str(anchor["entry_hash"])
|
||||||
|
|
||||||
|
verified = 0
|
||||||
|
for row in rows:
|
||||||
|
stored_prev = str(row["prev_hash"])
|
||||||
|
if stored_prev != prior_hash:
|
||||||
|
return Err(
|
||||||
|
f"broken at id={row['id']} expected_prev={prior_hash} got_prev={stored_prev}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
data = json.loads(row.get("entry_data") or "{}")
|
||||||
|
except Exception:
|
||||||
|
return Err(f"broken at id={row['id']} entry_data not JSON")
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return Err(f"broken at id={row['id']} entry_data not an object")
|
||||||
|
recomputed = compute_entry_hash(
|
||||||
|
stored_prev, str(row["entry_type"]), data, str(row["timestamp"])
|
||||||
|
)
|
||||||
|
stored_hash = str(row["entry_hash"])
|
||||||
|
if recomputed != stored_hash:
|
||||||
|
return Err(
|
||||||
|
f"broken at id={row['id']} expected={recomputed} got={stored_hash}"
|
||||||
|
)
|
||||||
|
prior_hash = stored_hash
|
||||||
|
verified += 1
|
||||||
|
return Ok(verified)
|
||||||
|
|
||||||
|
|
||||||
|
def recent(limit: int = 100, db_path: Path = db.DB_PATH) -> List[LogEntry]:
|
||||||
|
"""The latest `limit` entries, newest first."""
|
||||||
|
return [_row_to_entry(r) for r in db.translog_recent(limit=limit, db_path=db_path)]
|
||||||
|
|
||||||
|
|
||||||
|
def entries_after(entry_id: int, db_path: Path = db.DB_PATH) -> List[LogEntry]:
|
||||||
|
"""All entries with id > entry_id, oldest first — for peer sync."""
|
||||||
|
return [_row_to_entry(r) for r in db.translog_after(entry_id, db_path=db_path)]
|
||||||
118
tests/test_translog.py
Normal file
118
tests/test_translog.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
"""Transparency log — append, verify, tamper detection, sync slices."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import create_engine, update
|
||||||
|
|
||||||
|
from psyc import db
|
||||||
|
from psyc.lines import translog
|
||||||
|
from psyc.lines.translog import GENESIS_PREV_HASH
|
||||||
|
from psyc.result import Err, Ok
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fresh_db(tmp_path, monkeypatch):
|
||||||
|
test_db = tmp_path / "test.db"
|
||||||
|
eng = create_engine(f"sqlite:///{test_db}", future=True)
|
||||||
|
db._metadata.create_all(eng, checkfirst=True)
|
||||||
|
monkeypatch.setattr(db, "_engine", eng)
|
||||||
|
monkeypatch.setattr(db, "DB_PATH", test_db)
|
||||||
|
yield test_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_first_append_uses_genesis_prev_hash(fresh_db):
|
||||||
|
e = translog.append("signal", {"x": 1})
|
||||||
|
assert e.prev_hash == GENESIS_PREV_HASH
|
||||||
|
assert e.id >= 1
|
||||||
|
assert e.entry_type == "signal"
|
||||||
|
assert e.entry_data == {"x": 1}
|
||||||
|
# head matches
|
||||||
|
h = translog.head()
|
||||||
|
assert h is not None
|
||||||
|
assert h.id == e.id
|
||||||
|
assert h.entry_hash == e.entry_hash
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_chains_prev_hash(fresh_db):
|
||||||
|
e1 = translog.append("signal", {"a": 1})
|
||||||
|
e2 = translog.append("signal", {"b": 2})
|
||||||
|
e3 = translog.append("vouch", {"c": 3})
|
||||||
|
assert e2.prev_hash == e1.entry_hash
|
||||||
|
assert e3.prev_hash == e2.entry_hash
|
||||||
|
head = translog.head()
|
||||||
|
assert head is not None
|
||||||
|
assert head.entry_hash == e3.entry_hash
|
||||||
|
|
||||||
|
|
||||||
|
def test_verify_chain_ok_round_trip(fresh_db):
|
||||||
|
translog.append("signal", {"a": 1})
|
||||||
|
translog.append("signal", {"b": 2})
|
||||||
|
translog.append("vouch", {"c": 3})
|
||||||
|
result = translog.verify_chain()
|
||||||
|
assert isinstance(result, Ok)
|
||||||
|
assert result.value == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_verify_chain_empty_returns_zero(fresh_db):
|
||||||
|
result = translog.verify_chain()
|
||||||
|
assert isinstance(result, Ok)
|
||||||
|
assert result.value == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_verify_chain_detects_tampered_data(fresh_db):
|
||||||
|
e1 = translog.append("signal", {"a": 1})
|
||||||
|
e2 = translog.append("signal", {"b": 2})
|
||||||
|
|
||||||
|
# Mutate entry_data of the first row directly in the DB; entry_hash stays
|
||||||
|
# the same but no longer matches the recomputed hash.
|
||||||
|
with db.engine().begin() as conn:
|
||||||
|
conn.execute(
|
||||||
|
update(db.translog)
|
||||||
|
.where(db.translog.c.id == e1.id)
|
||||||
|
.values(entry_data=json.dumps({"a": 999}, sort_keys=True))
|
||||||
|
)
|
||||||
|
|
||||||
|
result = translog.verify_chain()
|
||||||
|
assert isinstance(result, Err)
|
||||||
|
assert "broken at id=" in result.reason
|
||||||
|
|
||||||
|
|
||||||
|
def test_verify_chain_detects_tampered_prev_hash(fresh_db):
|
||||||
|
translog.append("signal", {"a": 1})
|
||||||
|
e2 = translog.append("signal", {"b": 2})
|
||||||
|
# Flip e2.prev_hash so it no longer matches e1.entry_hash.
|
||||||
|
with db.engine().begin() as conn:
|
||||||
|
conn.execute(
|
||||||
|
update(db.translog)
|
||||||
|
.where(db.translog.c.id == e2.id)
|
||||||
|
.values(prev_hash="f" * 64)
|
||||||
|
)
|
||||||
|
result = translog.verify_chain()
|
||||||
|
assert isinstance(result, Err)
|
||||||
|
assert "broken at id=" in result.reason
|
||||||
|
|
||||||
|
|
||||||
|
def test_entries_after_returns_correct_slice(fresh_db):
|
||||||
|
e1 = translog.append("signal", {"a": 1})
|
||||||
|
e2 = translog.append("signal", {"b": 2})
|
||||||
|
e3 = translog.append("signal", {"c": 3})
|
||||||
|
|
||||||
|
after_zero = translog.entries_after(0)
|
||||||
|
assert [e.id for e in after_zero] == [e1.id, e2.id, e3.id]
|
||||||
|
|
||||||
|
after_e1 = translog.entries_after(e1.id)
|
||||||
|
assert [e.id for e in after_e1] == [e2.id, e3.id]
|
||||||
|
|
||||||
|
after_e3 = translog.entries_after(e3.id)
|
||||||
|
assert after_e3 == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_recent_newest_first(fresh_db):
|
||||||
|
e1 = translog.append("signal", {"a": 1})
|
||||||
|
e2 = translog.append("signal", {"b": 2})
|
||||||
|
e3 = translog.append("signal", {"c": 3})
|
||||||
|
recent = translog.recent(limit=10)
|
||||||
|
assert [e.id for e in recent] == [e3.id, e2.id, e1.id]
|
||||||
Reference in New Issue
Block a user