stage-3d: cockpit /train page — datasets + adapters + training metadata
New /train route lists built JSONL datasets (examples, size) and trained adapters with their base model, hyperparameters, dataset provenance, and loss history. train_qlora.py now records train_loss + per-step loss_history into training_meta.json so future runs surface a loss curve in the cockpit. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -114,12 +114,18 @@ def main() -> None:
|
|||||||
report_to="none",
|
report_to="none",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
trainer.train()
|
train_result = trainer.train()
|
||||||
|
|
||||||
final_dir = output_dir / "final"
|
final_dir = output_dir / "final"
|
||||||
final_dir.mkdir(parents=True, exist_ok=True)
|
final_dir.mkdir(parents=True, exist_ok=True)
|
||||||
model.save_pretrained(str(final_dir))
|
model.save_pretrained(str(final_dir))
|
||||||
tokenizer.save_pretrained(str(final_dir))
|
tokenizer.save_pretrained(str(final_dir))
|
||||||
|
|
||||||
|
loss_history = [
|
||||||
|
{"step": h["step"], "loss": h["loss"], "epoch": h.get("epoch")}
|
||||||
|
for h in trainer.state.log_history
|
||||||
|
if "loss" in h
|
||||||
|
]
|
||||||
(output_dir / "training_meta.json").write_text(json.dumps({
|
(output_dir / "training_meta.json").write_text(json.dumps({
|
||||||
"base_model": args.base_model,
|
"base_model": args.base_model,
|
||||||
"lora_r": args.lora_r,
|
"lora_r": args.lora_r,
|
||||||
@@ -129,6 +135,8 @@ def main() -> None:
|
|||||||
"datasets": [str(p) for p in paths],
|
"datasets": [str(p) for p in paths],
|
||||||
"examples": len(examples),
|
"examples": len(examples),
|
||||||
"seed": args.seed,
|
"seed": args.seed,
|
||||||
|
"train_loss": train_result.training_loss,
|
||||||
|
"loss_history": loss_history,
|
||||||
}, indent=2))
|
}, indent=2))
|
||||||
print(f"[psyc-train] adapter saved → {final_dir}")
|
print(f"[psyc-train] adapter saved → {final_dir}")
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from psyc import db, log
|
|||||||
from psyc.lines import ledger as ledger_line
|
from psyc.lines import ledger as ledger_line
|
||||||
from psyc.lines import route as route_line
|
from psyc.lines import route as route_line
|
||||||
from psyc.lines import seal as seal_line
|
from psyc.lines import seal as seal_line
|
||||||
|
from psyc.lines import train as train_line
|
||||||
from psyc.lines.route import BlockedRoute, Route
|
from psyc.lines.route import BlockedRoute, Route
|
||||||
from psyc.result import Err
|
from psyc.result import Err
|
||||||
|
|
||||||
@@ -75,6 +76,13 @@ def ledger_view(request: Request) -> HTMLResponse:
|
|||||||
return TEMPLATES.TemplateResponse(request, "ledger.html", {"entries": entries, "total": total})
|
return TEMPLATES.TemplateResponse(request, "ledger.html", {"entries": entries, "total": total})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/train", response_class=HTMLResponse)
|
||||||
|
def train_view(request: Request) -> HTMLResponse:
|
||||||
|
datasets = train_line.list_datasets()
|
||||||
|
adapters = train_line.list_adapters()
|
||||||
|
return TEMPLATES.TemplateResponse(request, "train.html", {"datasets": datasets, "adapters": adapters})
|
||||||
|
|
||||||
|
|
||||||
@app.get("/healthz")
|
@app.get("/healthz")
|
||||||
def healthz() -> dict:
|
def healthz() -> dict:
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
<nav class="nav">
|
<nav class="nav">
|
||||||
<a href="/cases">Cases</a>
|
<a href="/cases">Cases</a>
|
||||||
<a href="/ledger">Ledger</a>
|
<a href="/ledger">Ledger</a>
|
||||||
|
<a href="/train">Trainline</a>
|
||||||
<a href="/healthz">Health</a>
|
<a href="/healthz">Health</a>
|
||||||
</nav>
|
</nav>
|
||||||
</header>
|
</header>
|
||||||
|
|||||||
67
src/psyc/cockpit/templates/train.html
Normal file
67
src/psyc/cockpit/templates/train.html
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}Trainline — psyc{% endblock %}
|
||||||
|
{% block content %}
|
||||||
|
<section class="panel">
|
||||||
|
<div class="panel-head">
|
||||||
|
<h1>Trainline — Datasets</h1>
|
||||||
|
<span class="count">{{ datasets|length }} dataset{{ '' if datasets|length == 1 else 's' }}</span>
|
||||||
|
</div>
|
||||||
|
{% if not datasets %}
|
||||||
|
<p class="empty">No datasets yet. Run <code>psyc train-build-all</code>.</p>
|
||||||
|
{% else %}
|
||||||
|
<table class="cases">
|
||||||
|
<thead>
|
||||||
|
<tr><th>Dataset</th><th>Examples</th><th>Size</th><th>Built</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for d in datasets %}
|
||||||
|
<tr>
|
||||||
|
<td><code>{{ d.name }}</code></td>
|
||||||
|
<td>{{ d.examples }}</td>
|
||||||
|
<td class="muted">{{ d.size_bytes }} B</td>
|
||||||
|
<td class="muted">{{ d.modified[:16] }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="panel" style="margin-top: 20px;">
|
||||||
|
<div class="panel-head">
|
||||||
|
<h1>Trainline — Adapters</h1>
|
||||||
|
<span class="count">{{ adapters|length }} adapter{{ '' if adapters|length == 1 else 's' }}</span>
|
||||||
|
</div>
|
||||||
|
{% if not adapters %}
|
||||||
|
<p class="empty">No adapters yet. Build <code>Dockerfile.train</code> and run a QLoRA fine-tune.</p>
|
||||||
|
{% else %}
|
||||||
|
<div class="grid">
|
||||||
|
{% for a in adapters %}
|
||||||
|
<div class="card wide">
|
||||||
|
<h2>{{ a.name }}{% if a.has_adapter %} <span class="outcome-badge outcome-actioned">trained</span>{% else %} <span class="outcome-badge outcome-rejected">incomplete</span>{% endif %}</h2>
|
||||||
|
<dl>
|
||||||
|
<dt>Base model</dt><dd><code>{{ a.base_model }}</code></dd>
|
||||||
|
<dt>Examples</dt><dd>{{ a.examples }}</dd>
|
||||||
|
<dt>Epochs</dt><dd>{{ a.epochs }}</dd>
|
||||||
|
<dt>LoRA r</dt><dd>{{ a.lora_r }}</dd>
|
||||||
|
<dt>Learning rate</dt><dd>{{ a.lr }}</dd>
|
||||||
|
<dt>Final train loss</dt><dd>{% if a.train_loss is not none %}{{ '%.4f'|format(a.train_loss) }}{% else %}<span class="muted">— (trained before loss capture)</span>{% endif %}</dd>
|
||||||
|
<dt>Datasets</dt><dd>{% for ds in a.datasets %}<code>{{ ds }}</code> {% endfor %}{% if not a.datasets %}—{% endif %}</dd>
|
||||||
|
</dl>
|
||||||
|
{% if a.loss_history %}
|
||||||
|
<h3>Loss by step</h3>
|
||||||
|
<table class="cases">
|
||||||
|
<thead><tr><th>Step</th><th>Epoch</th><th>Loss</th></tr></thead>
|
||||||
|
<tbody>
|
||||||
|
{% for h in a.loss_history %}
|
||||||
|
<tr><td>{{ h.step }}</td><td class="muted">{{ h.epoch }}</td><td>{{ '%.4f'|format(h.loss) }}</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</section>
|
||||||
|
{% endblock %}
|
||||||
@@ -31,6 +31,7 @@ from psyc.models import Case, TLP
|
|||||||
_log = log.get(__name__)
|
_log = log.get(__name__)
|
||||||
|
|
||||||
DATASETS_DIR = DATA_DIR / "datasets"
|
DATASETS_DIR = DATA_DIR / "datasets"
|
||||||
|
ADAPTERS_DIR = DATA_DIR / "adapters"
|
||||||
|
|
||||||
TASKS = ("ioc_extraction", "severity_classification", "routing_decision", "tlp_assignment")
|
TASKS = ("ioc_extraction", "severity_classification", "routing_decision", "tlp_assignment")
|
||||||
|
|
||||||
@@ -245,3 +246,29 @@ def list_datasets() -> List[Dict[str, str]]:
|
|||||||
"modified": datetime.fromtimestamp(p.stat().st_mtime, tz=timezone.utc).isoformat(),
|
"modified": datetime.fromtimestamp(p.stat().st_mtime, tz=timezone.utc).isoformat(),
|
||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def list_adapters() -> List[Dict[str, object]]:
|
||||||
|
if not ADAPTERS_DIR.exists():
|
||||||
|
return []
|
||||||
|
out: List[Dict[str, object]] = []
|
||||||
|
for d in sorted(ADAPTERS_DIR.iterdir()):
|
||||||
|
if not d.is_dir():
|
||||||
|
continue
|
||||||
|
meta: Dict[str, object] = {}
|
||||||
|
meta_path = d / "training_meta.json"
|
||||||
|
if meta_path.exists():
|
||||||
|
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||||
|
out.append({
|
||||||
|
"name": d.name,
|
||||||
|
"has_adapter": (d / "final" / "adapter_model.safetensors").exists(),
|
||||||
|
"base_model": meta.get("base_model", "—"),
|
||||||
|
"examples": meta.get("examples", 0),
|
||||||
|
"epochs": meta.get("epochs", 0),
|
||||||
|
"lora_r": meta.get("lora_r", 0),
|
||||||
|
"lr": meta.get("lr", 0),
|
||||||
|
"datasets": [Path(str(p)).name for p in meta.get("datasets", [])],
|
||||||
|
"train_loss": meta.get("train_loss"),
|
||||||
|
"loss_history": meta.get("loss_history", []),
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|||||||
Reference in New Issue
Block a user