feat: initial import of all helper scripts from ~/scripts/
- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
This commit is contained in:
64
overnight-research.sh
Executable file
64
overnight-research.sh
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
# Overnight research + self-improvement pass
|
||||
# Runs nightly at 2 AM PT when Maxwell is not active
|
||||
# Exports training data to DB VM, compacts memory, refreshes index
|
||||
|
||||
LOG="$HOME/self-improving/overnight-$(date +%Y%m%d).log"
|
||||
echo "=== Overnight run: $(date) ===" >> "$LOG"
|
||||
|
||||
# 1. Export new training data to DB VM
|
||||
echo "[1/4] Exporting training data..." >> "$LOG"
|
||||
python3 ~/scripts/export-training-data.py >> "$LOG" 2>&1
|
||||
python3 ~/scripts/convert-training-data.py >> "$LOG" 2>&1
|
||||
python3 ~/scripts/export-transcripts.py >> "$LOG" 2>&1
|
||||
python3 ~/scripts/load-transcripts-to-db.py >> "$LOG" 2>&1
|
||||
|
||||
# 2. Check self-improving file sizes
|
||||
echo "[2/4] Checking memory file sizes..." >> "$LOG"
|
||||
for f in ~/self-improving/corrections.md ~/self-improving/memory.md; do
|
||||
lines=$(wc -l < "$f" 2>/dev/null || echo 0)
|
||||
if [ "$lines" -gt 100 ]; then
|
||||
echo " WARNING: $f has $lines lines — needs compaction" >> "$LOG"
|
||||
else
|
||||
echo " OK: $f ($lines lines)" >> "$LOG"
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. Refresh index.md counts
|
||||
echo "[3/4] Refreshing index..." >> "$LOG"
|
||||
python3 - << 'PYEOF' >> "$LOG" 2>&1
|
||||
import os, glob
|
||||
base = os.path.expanduser("~/self-improving")
|
||||
lines = ["# Self-Improving Index", f"Updated: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M')}", ""]
|
||||
for f in ["memory.md", "corrections.md"]:
|
||||
path = os.path.join(base, f)
|
||||
count = len(open(path).readlines()) if os.path.exists(path) else 0
|
||||
lines.append(f"- {f}: {count} lines")
|
||||
lines.append(f"- domains/: {len(glob.glob(base+'/domains/*.md'))} files")
|
||||
lines.append(f"- projects/: {len(glob.glob(base+'/projects/*.md'))} files")
|
||||
lines.append(f"- archive/: {len(glob.glob(base+'/archive/*.md'))} files")
|
||||
with open(os.path.join(base, "index.md"), "w") as out:
|
||||
out.write("\n".join(lines) + "\n")
|
||||
print(" index.md refreshed")
|
||||
PYEOF
|
||||
|
||||
# 4. Update heartbeat state timestamp
|
||||
echo "[4/4] Updating heartbeat state..." >> "$LOG"
|
||||
python3 - << 'PYEOF' >> "$LOG" 2>&1
|
||||
import re, os
|
||||
from datetime import datetime, timezone
|
||||
path = os.path.expanduser("~/self-improving/heartbeat-state.md")
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
if os.path.exists(path):
|
||||
content = open(path).read()
|
||||
if "last_overnight_run" in content:
|
||||
content = re.sub(r"last_overnight_run:.*", f"last_overnight_run: {ts}", content)
|
||||
else:
|
||||
content += f"\nlast_overnight_run: {ts}\n"
|
||||
open(path, "w").write(content)
|
||||
print(f" Updated last_overnight_run: {ts}")
|
||||
else:
|
||||
print(" heartbeat-state.md not found, skipping")
|
||||
PYEOF
|
||||
|
||||
echo "=== Done: $(date) ===" >> "$LOG"
|
||||
Reference in New Issue
Block a user