feat: initial import of all helper scripts from ~/scripts/

- Training data pipeline: convert, export, extract, load-to-db
- Infra tooling: infra-audit, infra-gitea-link
- RAG pipeline: rag-ingest, rag-query
- Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth
- Transcripts: export-transcripts
- Updated README with script index and token reduction strategy
This commit is contained in:
Grace
2026-03-16 22:32:48 -07:00
parent 462f5298e6
commit 014ec8bd5c
15 changed files with 2979 additions and 1 deletions

144
overnight-qwen3.py Executable file
View File

@@ -0,0 +1,144 @@
#!/usr/bin/env python3
"""
Overnight autonomous research agent using local qwen3:1.7b via Ollama.
Runs nightly at 1 AM PT when Maxwell is not active.
Writes research output to ~/self-improving/domains/ and Joplin notes.
Model: qwen3:1.7b on memory-engine container (127.0.0.1:11434)
"""
import json
import os
import subprocess
import urllib.request
import urllib.error
from datetime import datetime, timezone
OLLAMA_URL = "http://127.0.0.1:11434"
MODEL = "qwen3:1.7b"
DOMAINS_DIR = os.path.expanduser("~/self-improving/domains")
LOG_FILE = os.path.expanduser(f"~/self-improving/overnight-qwen3-{datetime.now().strftime('%Y%m%d')}.log")
TASK_LIST = [
# (filename, research prompt)
("gitea.md", "Document the Gitea REST API and `tea` CLI: key endpoints for issues, PRs, repos, labels. Include curl examples and common tea commands. Focus on what a homelab automation agent would use daily."),
("influxdb.md", "Document InfluxDB 2.x: Flux query language basics, writing data via API, key concepts (bucket, org, token). Include curl examples for write and query endpoints."),
("grafana.md", "Document Grafana HTTP API: dashboard CRUD, datasource management, annotations, alerting. Include curl examples. Focus on automation use cases."),
("prometheus.md", "Document Prometheus HTTP API: instant queries, range queries, targets, rules, alerts endpoints. Include curl examples for common monitoring queries."),
("alertmanager.md", "Document Alertmanager API and config: silence management, alert routing, receiver config (Telegram webhook). Include config YAML examples."),
("home-assistant.md", "Document Home Assistant REST API and websocket API: entity states, services, automations. Include curl examples for common operations."),
("n8n.md", "Document n8n workflow automation: REST API, webhook triggers, key nodes. Focus on homelab automation use cases."),
("ollama.md", "Document Ollama REST API: model management, generate, chat, embeddings endpoints. Include curl examples. Note: also covers memory-engine container at port 11434."),
("joplin-server.md", "Document Joplin Server and Joplin REST API (port 41184): notes CRUD, search, folders, tags. Include curl examples."),
("dashy.md", "Document Dashy configuration: config.yml structure, adding services, sections, icons. Focus on programmatic config management."),
("traefik.md", "Document Traefik v3: dynamic config, routers, services, middlewares, API dashboard. Include examples for homelab reverse proxy setup."),
("llama-cpp-server.md", "Document llama-server (llama.cpp) HTTP API: /completion, /chat/completions, /health, /props, /metrics endpoints. Include curl examples and key startup flags."),
("litellm.md", "Document LiteLLM proxy: config.yaml structure, model routing, OpenAI-compatible API. Focus on routing to local models."),
]
def log(msg):
ts = datetime.now().strftime("%H:%M:%S")
line = f"[{ts}] {msg}"
print(line)
with open(LOG_FILE, "a") as f:
f.write(line + "\n")
def ollama_generate(prompt, max_tokens=2000):
"""Call local Ollama qwen3:1.7b with /think disabled for factual output."""
payload = json.dumps({
"model": MODEL,
"prompt": prompt,
"stream": False,
"think": False,
"options": {
"num_predict": max_tokens,
"temperature": 0.3,
"top_p": 0.9,
}
}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/api/generate",
data=payload,
headers={"Content-Type": "application/json"},
method="POST"
)
try:
with urllib.request.urlopen(req, timeout=300) as resp:
result = json.load(resp)
return result.get("response", "").strip()
except Exception as e:
return f"ERROR: {e}"
def domain_exists_and_fresh(filename):
"""Skip if domain doc exists and was written recently (last 7 days)."""
path = os.path.join(DOMAINS_DIR, filename)
if not os.path.exists(path):
return False
age_days = (datetime.now().timestamp() - os.path.getmtime(path)) / 86400
return age_days < 7
def write_domain_doc(filename, content):
os.makedirs(DOMAINS_DIR, exist_ok=True)
path = os.path.join(DOMAINS_DIR, filename)
header = f"# {filename.replace('.md','').replace('-',' ').title()}\n\n"
header += f"*Generated by overnight-qwen3 agent — {datetime.now().strftime('%Y-%m-%d')}*\n\n"
with open(path, "w") as f:
f.write(header + content)
log(f" Wrote {path} ({len(content)} chars)")
def main():
log(f"=== Overnight Qwen3 research agent starting ===")
log(f"Model: {MODEL} @ {OLLAMA_URL}")
# Verify Ollama is up
try:
urllib.request.urlopen(f"{OLLAMA_URL}/api/tags", timeout=5)
log("Ollama reachable ✓")
except Exception as e:
log(f"Ollama not reachable: {e} — aborting")
return
completed = 0
skipped = 0
errors = 0
for filename, prompt in TASK_LIST:
if domain_exists_and_fresh(filename):
log(f"SKIP: {filename} (exists, < 7 days old)")
skipped += 1
continue
log(f"Researching: {filename}")
result = ollama_generate(prompt)
if result.startswith("ERROR:"):
log(f" FAILED: {result}")
errors += 1
continue
write_domain_doc(filename, result)
completed += 1
log(f"=== Done: {completed} written, {skipped} skipped, {errors} errors ===")
# Update heartbeat state
state_path = os.path.expanduser("~/self-improving/heartbeat-state.md")
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
if os.path.exists(state_path):
content = open(state_path).read()
import re
if "last_qwen3_run" in content:
content = re.sub(r"last_qwen3_run:.*", f"last_qwen3_run: {ts}", content)
else:
content += f"\nlast_qwen3_run: {ts}\n"
open(state_path, "w").write(content)
if __name__ == "__main__":
main()