From c6655853ac1ef32016e343e226528cab1458bf18 Mon Sep 17 00:00:00 2001
From: m17hr1l <m17hr1l@wehackforyou.com>
Date: Sun, 17 May 2026 15:16:46 +0200
Subject: [PATCH] =?UTF-8?q?stage-3d:=20cockpit=20/train=20page=20=E2=80=94?=
 =?UTF-8?q?=20datasets=20+=20adapters=20+=20training=20metadata?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New /train route lists built JSONL datasets (examples, size) and trained
adapters with their base model, hyperparameters, dataset provenance, and
loss history. train_qlora.py now records train_loss + per-step loss_history
into training_meta.json so future runs surface a loss curve in the cockpit.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/train_qlora.py                | 10 +++-
 src/psyc/cockpit/app.py               |  8 ++++
 src/psyc/cockpit/templates/base.html  |  1 +
 src/psyc/cockpit/templates/train.html | 67 +++++++++++++++++++++++++++
 src/psyc/lines/train.py               | 27 +++++++++++
 5 files changed, 112 insertions(+), 1 deletion(-)
 create mode 100644 src/psyc/cockpit/templates/train.html
diff --git a/scripts/train_qlora.py b/scripts/train_qlora.py
index 8ae0748..35da0af 100644
--- a/scripts/train_qlora.py
+++ b/scripts/train_qlora.py
@@ -114,12 +114,18 @@ def main() -> None:
             report_to="none",
         ),
     )
-    trainer.train()
+    train_result = trainer.train()
 
     final_dir = output_dir / "final"
     final_dir.mkdir(parents=True, exist_ok=True)
     model.save_pretrained(str(final_dir))
     tokenizer.save_pretrained(str(final_dir))
+
+    loss_history = [
+        {"step": h["step"], "loss": h["loss"], "epoch": h.get("epoch")}
+        for h in trainer.state.log_history
+        if "loss" in h
+    ]
     (output_dir / "training_meta.json").write_text(json.dumps({
         "base_model": args.base_model,
         "lora_r": args.lora_r,
@@ -129,6 +135,8 @@ def main() -> None:
         "datasets": [str(p) for p in paths],
         "examples": len(examples),
         "seed": args.seed,
+        "train_loss": train_result.training_loss,
+        "loss_history": loss_history,
     }, indent=2))
     print(f"[psyc-train] adapter saved → {final_dir}")
 
diff --git a/src/psyc/cockpit/app.py b/src/psyc/cockpit/app.py
index 294dcad..9bf6423 100644
--- a/src/psyc/cockpit/app.py
+++ b/src/psyc/cockpit/app.py
@@ -14,6 +14,7 @@ from psyc import db, log
 from psyc.lines import ledger as ledger_line
 from psyc.lines import route as route_line
 from psyc.lines import seal as seal_line
+from psyc.lines import train as train_line
 from psyc.lines.route import BlockedRoute, Route
 from psyc.result import Err
 
@@ -75,6 +76,13 @@ def ledger_view(request: Request) -> HTMLResponse:
     return TEMPLATES.TemplateResponse(request, "ledger.html", {"entries": entries, "total": total})
 
 
+@app.get("/train", response_class=HTMLResponse)
+def train_view(request: Request) -> HTMLResponse:
+    datasets = train_line.list_datasets()
+    adapters = train_line.list_adapters()
+    return TEMPLATES.TemplateResponse(request, "train.html", {"datasets": datasets, "adapters": adapters})
+
+
 @app.get("/healthz")
 def healthz() -> dict:
     return {"status": "ok"}
diff --git a/src/psyc/cockpit/templates/base.html b/src/psyc/cockpit/templates/base.html
index 2446d3f..91c16d4 100644
--- a/src/psyc/cockpit/templates/base.html
+++ b/src/psyc/cockpit/templates/base.html
@@ -16,6 +16,7 @@
     <nav class="nav">
       <a href="/cases">Cases</a>
       <a href="/ledger">Ledger</a>
+      <a href="/train">Trainline</a>
       <a href="/healthz">Health</a>
     </nav>
   </header>
diff --git a/src/psyc/cockpit/templates/train.html b/src/psyc/cockpit/templates/train.html
new file mode 100644
index 0000000..b90d3b3
--- /dev/null
+++ b/src/psyc/cockpit/templates/train.html
@@ -0,0 +1,67 @@
+{% extends "base.html" %}
+{% block title %}Trainline — psyc{% endblock %}
+{% block content %}
+<section class="panel">
+  <div class="panel-head">
+    <h1>Trainline — Datasets</h1>
+    <span class="count">{{ datasets|length }} dataset{{ '' if datasets|length == 1 else 's' }}</span>
+  </div>
+  {% if not datasets %}
+    <p class="empty">No datasets yet. Run <code>psyc train-build-all</code>.</p>
+  {% else %}
+  <table class="cases">
+    <thead>
+      <tr><th>Dataset</th><th>Examples</th><th>Size</th><th>Built</th></tr>
+    </thead>
+    <tbody>
+    {% for d in datasets %}
+      <tr>
+        <td><code>{{ d.name }}</code></td>
+        <td>{{ d.examples }}</td>
+        <td class="muted">{{ d.size_bytes }} B</td>
+        <td class="muted">{{ d.modified[:16] }}</td>
+      </tr>
+    {% endfor %}
+    </tbody>
+  </table>
+  {% endif %}
+</section>
+
+<section class="panel" style="margin-top: 20px;">
+  <div class="panel-head">
+    <h1>Trainline — Adapters</h1>
+    <span class="count">{{ adapters|length }} adapter{{ '' if adapters|length == 1 else 's' }}</span>
+  </div>
+  {% if not adapters %}
+    <p class="empty">No adapters yet. Build <code>Dockerfile.train</code> and run a QLoRA fine-tune.</p>
+  {% else %}
+  <div class="grid">
+    {% for a in adapters %}
+    <div class="card wide">
+      <h2>{{ a.name }}{% if a.has_adapter %} <span class="outcome-badge outcome-actioned">trained</span>{% else %} <span class="outcome-badge outcome-rejected">incomplete</span>{% endif %}</h2>
+      <dl>
+        <dt>Base model</dt><dd><code>{{ a.base_model }}</code></dd>
+        <dt>Examples</dt><dd>{{ a.examples }}</dd>
+        <dt>Epochs</dt><dd>{{ a.epochs }}</dd>
+        <dt>LoRA r</dt><dd>{{ a.lora_r }}</dd>
+        <dt>Learning rate</dt><dd>{{ a.lr }}</dd>
+        <dt>Final train loss</dt><dd>{% if a.train_loss is not none %}{{ '%.4f'|format(a.train_loss) }}{% else %}<span class="muted">— (trained before loss capture)</span>{% endif %}</dd>
+        <dt>Datasets</dt><dd>{% for ds in a.datasets %}<code>{{ ds }}</code> {% endfor %}{% if not a.datasets %}—{% endif %}</dd>
+      </dl>
+      {% if a.loss_history %}
+      <h3>Loss by step</h3>
+      <table class="cases">
+        <thead><tr><th>Step</th><th>Epoch</th><th>Loss</th></tr></thead>
+        <tbody>
+        {% for h in a.loss_history %}
+          <tr><td>{{ h.step }}</td><td class="muted">{{ h.epoch }}</td><td>{{ '%.4f'|format(h.loss) }}</td></tr>
+        {% endfor %}
+        </tbody>
+      </table>
+      {% endif %}
+    </div>
+    {% endfor %}
+  </div>
+  {% endif %}
+</section>
+{% endblock %}
diff --git a/src/psyc/lines/train.py b/src/psyc/lines/train.py
index 14ecd55..25d509b 100644
--- a/src/psyc/lines/train.py
+++ b/src/psyc/lines/train.py
@@ -31,6 +31,7 @@ from psyc.models import Case, TLP
 _log = log.get(__name__)
 
 DATASETS_DIR = DATA_DIR / "datasets"
+ADAPTERS_DIR = DATA_DIR / "adapters"
 
 TASKS = ("ioc_extraction", "severity_classification", "routing_decision", "tlp_assignment")
 
@@ -245,3 +246,29 @@ def list_datasets() -> List[Dict[str, str]]:
             "modified": datetime.fromtimestamp(p.stat().st_mtime, tz=timezone.utc).isoformat(),
         })
     return out
+
+
+def list_adapters() -> List[Dict[str, object]]:
+    if not ADAPTERS_DIR.exists():
+        return []
+    out: List[Dict[str, object]] = []
+    for d in sorted(ADAPTERS_DIR.iterdir()):
+        if not d.is_dir():
+            continue
+        meta: Dict[str, object] = {}
+        meta_path = d / "training_meta.json"
+        if meta_path.exists():
+            meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        out.append({
+            "name": d.name,
+            "has_adapter": (d / "final" / "adapter_model.safetensors").exists(),
+            "base_model": meta.get("base_model", "—"),
+            "examples": meta.get("examples", 0),
+            "epochs": meta.get("epochs", 0),
+            "lora_r": meta.get("lora_r", 0),
+            "lr": meta.get("lr", 0),
+            "datasets": [Path(str(p)).name for p in meta.get("datasets", [])],
+            "train_loss": meta.get("train_loss"),
+            "loss_history": meta.get("loss_history", []),
+        })
+    return out