- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
65 lines
2.5 KiB
Bash
Executable File
65 lines
2.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# Overnight research + self-improvement pass
|
|
# Runs nightly at 2 AM PT when Maxwell is not active
|
|
# Exports training data to DB VM, compacts memory, refreshes index
|
|
|
|
LOG="$HOME/self-improving/overnight-$(date +%Y%m%d).log"
|
|
echo "=== Overnight run: $(date) ===" >> "$LOG"
|
|
|
|
# 1. Export new training data to DB VM
|
|
echo "[1/4] Exporting training data..." >> "$LOG"
|
|
python3 ~/scripts/export-training-data.py >> "$LOG" 2>&1
|
|
python3 ~/scripts/convert-training-data.py >> "$LOG" 2>&1
|
|
python3 ~/scripts/export-transcripts.py >> "$LOG" 2>&1
|
|
python3 ~/scripts/load-transcripts-to-db.py >> "$LOG" 2>&1
|
|
|
|
# 2. Check self-improving file sizes
|
|
echo "[2/4] Checking memory file sizes..." >> "$LOG"
|
|
for f in ~/self-improving/corrections.md ~/self-improving/memory.md; do
|
|
lines=$(wc -l < "$f" 2>/dev/null || echo 0)
|
|
if [ "$lines" -gt 100 ]; then
|
|
echo " WARNING: $f has $lines lines — needs compaction" >> "$LOG"
|
|
else
|
|
echo " OK: $f ($lines lines)" >> "$LOG"
|
|
fi
|
|
done
|
|
|
|
# 3. Refresh index.md counts
|
|
echo "[3/4] Refreshing index..." >> "$LOG"
|
|
python3 - << 'PYEOF' >> "$LOG" 2>&1
|
|
import os, glob
|
|
base = os.path.expanduser("~/self-improving")
|
|
lines = ["# Self-Improving Index", f"Updated: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M')}", ""]
|
|
for f in ["memory.md", "corrections.md"]:
|
|
path = os.path.join(base, f)
|
|
count = len(open(path).readlines()) if os.path.exists(path) else 0
|
|
lines.append(f"- {f}: {count} lines")
|
|
lines.append(f"- domains/: {len(glob.glob(base+'/domains/*.md'))} files")
|
|
lines.append(f"- projects/: {len(glob.glob(base+'/projects/*.md'))} files")
|
|
lines.append(f"- archive/: {len(glob.glob(base+'/archive/*.md'))} files")
|
|
with open(os.path.join(base, "index.md"), "w") as out:
|
|
out.write("\n".join(lines) + "\n")
|
|
print(" index.md refreshed")
|
|
PYEOF
|
|
|
|
# 4. Update heartbeat state timestamp
|
|
echo "[4/4] Updating heartbeat state..." >> "$LOG"
|
|
python3 - << 'PYEOF' >> "$LOG" 2>&1
|
|
import re, os
|
|
from datetime import datetime, timezone
|
|
path = os.path.expanduser("~/self-improving/heartbeat-state.md")
|
|
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
if os.path.exists(path):
|
|
content = open(path).read()
|
|
if "last_overnight_run" in content:
|
|
content = re.sub(r"last_overnight_run:.*", f"last_overnight_run: {ts}", content)
|
|
else:
|
|
content += f"\nlast_overnight_run: {ts}\n"
|
|
open(path, "w").write(content)
|
|
print(f" Updated last_overnight_run: {ts}")
|
|
else:
|
|
print(" heartbeat-state.md not found, skipping")
|
|
PYEOF
|
|
|
|
echo "=== Done: $(date) ===" >> "$LOG"
|