feat: initial import of all helper scripts from ~/scripts/

- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
2026-03-16 22:32:48 -07:00
parent 462f5298e6
commit 014ec8bd5c
15 changed files with 2979 additions and 1 deletions
--- a/overnight-qwen3.py
+++ b/overnight-qwen3.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Overnight autonomous research agent using local qwen3:1.7b via Ollama.
+Runs nightly at 1 AM PT when Maxwell is not active.
+Writes research output to ~/self-improving/domains/ and Joplin notes.
+
+Model: qwen3:1.7b on memory-engine container (127.0.0.1:11434)
+"""
+
+import json
+import os
+import subprocess
+import urllib.request
+import urllib.error
+from datetime import datetime, timezone
+
+OLLAMA_URL = "http://127.0.0.1:11434"
+MODEL = "qwen3:1.7b"
+DOMAINS_DIR = os.path.expanduser("~/self-improving/domains")
+LOG_FILE = os.path.expanduser(f"~/self-improving/overnight-qwen3-{datetime.now().strftime('%Y%m%d')}.log")
+
+TASK_LIST = [
+    # (filename, research prompt)
+    ("gitea.md", "Document the Gitea REST API and `tea` CLI: key endpoints for issues, PRs, repos, labels. Include curl examples and common tea commands. Focus on what a homelab automation agent would use daily."),
+    ("influxdb.md", "Document InfluxDB 2.x: Flux query language basics, writing data via API, key concepts (bucket, org, token). Include curl examples for write and query endpoints."),
+    ("grafana.md", "Document Grafana HTTP API: dashboard CRUD, datasource management, annotations, alerting. Include curl examples. Focus on automation use cases."),
+    ("prometheus.md", "Document Prometheus HTTP API: instant queries, range queries, targets, rules, alerts endpoints. Include curl examples for common monitoring queries."),
+    ("alertmanager.md", "Document Alertmanager API and config: silence management, alert routing, receiver config (Telegram webhook). Include config YAML examples."),
+    ("home-assistant.md", "Document Home Assistant REST API and websocket API: entity states, services, automations. Include curl examples for common operations."),
+    ("n8n.md", "Document n8n workflow automation: REST API, webhook triggers, key nodes. Focus on homelab automation use cases."),
+    ("ollama.md", "Document Ollama REST API: model management, generate, chat, embeddings endpoints. Include curl examples. Note: also covers memory-engine container at port 11434."),
+    ("joplin-server.md", "Document Joplin Server and Joplin REST API (port 41184): notes CRUD, search, folders, tags. Include curl examples."),
+    ("dashy.md", "Document Dashy configuration: config.yml structure, adding services, sections, icons. Focus on programmatic config management."),
+    ("traefik.md", "Document Traefik v3: dynamic config, routers, services, middlewares, API dashboard. Include examples for homelab reverse proxy setup."),
+    ("llama-cpp-server.md", "Document llama-server (llama.cpp) HTTP API: /completion, /chat/completions, /health, /props, /metrics endpoints. Include curl examples and key startup flags."),
+    ("litellm.md", "Document LiteLLM proxy: config.yaml structure, model routing, OpenAI-compatible API. Focus on routing to local models."),
+]
+
+
+def log(msg):
+    ts = datetime.now().strftime("%H:%M:%S")
+    line = f"[{ts}] {msg}"
+    print(line)
+    with open(LOG_FILE, "a") as f:
+        f.write(line + "\n")
+
+
+def ollama_generate(prompt, max_tokens=2000):
+    """Call local Ollama qwen3:1.7b with /think disabled for factual output."""
+    payload = json.dumps({
+        "model": MODEL,
+        "prompt": prompt,
+        "stream": False,
+        "think": False,
+        "options": {
+            "num_predict": max_tokens,
+            "temperature": 0.3,
+            "top_p": 0.9,
+        }
+    }).encode()
+
+    req = urllib.request.Request(
+        f"{OLLAMA_URL}/api/generate",
+        data=payload,
+        headers={"Content-Type": "application/json"},
+        method="POST"
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=300) as resp:
+            result = json.load(resp)
+            return result.get("response", "").strip()
+    except Exception as e:
+        return f"ERROR: {e}"
+
+
+def domain_exists_and_fresh(filename):
+    """Skip if domain doc exists and was written recently (last 7 days)."""
+    path = os.path.join(DOMAINS_DIR, filename)
+    if not os.path.exists(path):
+        return False
+    age_days = (datetime.now().timestamp() - os.path.getmtime(path)) / 86400
+    return age_days < 7
+
+
+def write_domain_doc(filename, content):
+    os.makedirs(DOMAINS_DIR, exist_ok=True)
+    path = os.path.join(DOMAINS_DIR, filename)
+    header = f"# {filename.replace('.md','').replace('-',' ').title()}\n\n"
+    header += f"*Generated by overnight-qwen3 agent — {datetime.now().strftime('%Y-%m-%d')}*\n\n"
+    with open(path, "w") as f:
+        f.write(header + content)
+    log(f"  Wrote {path} ({len(content)} chars)")
+
+
+def main():
+    log(f"=== Overnight Qwen3 research agent starting ===")
+    log(f"Model: {MODEL} @ {OLLAMA_URL}")
+
+    # Verify Ollama is up
+    try:
+        urllib.request.urlopen(f"{OLLAMA_URL}/api/tags", timeout=5)
+        log("Ollama reachable ✓")
+    except Exception as e:
+        log(f"Ollama not reachable: {e} — aborting")
+        return
+
+    completed = 0
+    skipped = 0
+    errors = 0
+
+    for filename, prompt in TASK_LIST:
+        if domain_exists_and_fresh(filename):
+            log(f"SKIP: {filename} (exists, < 7 days old)")
+            skipped += 1
+            continue
+
+        log(f"Researching: {filename}")
+        result = ollama_generate(prompt)
+
+        if result.startswith("ERROR:"):
+            log(f"  FAILED: {result}")
+            errors += 1
+            continue
+
+        write_domain_doc(filename, result)
+        completed += 1
+
+    log(f"=== Done: {completed} written, {skipped} skipped, {errors} errors ===")
+
+    # Update heartbeat state
+    state_path = os.path.expanduser("~/self-improving/heartbeat-state.md")
+    ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    if os.path.exists(state_path):
+        content = open(state_path).read()
+        import re
+        if "last_qwen3_run" in content:
+            content = re.sub(r"last_qwen3_run:.*", f"last_qwen3_run: {ts}", content)
+        else:
+            content += f"\nlast_qwen3_run: {ts}\n"
+        open(state_path, "w").write(content)
+
+
+if __name__ == "__main__":
+    main()