feat: initial import of all helper scripts from ~/scripts/

- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
2026-03-16 22:32:48 -07:00
parent 462f5298e6
commit 014ec8bd5c
15 changed files with 2979 additions and 1 deletions
--- a/finetune-lora.py
+++ b/finetune-lora.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""
+Grace LoRA fine-tuning script using Unsloth.
+Model: Qwen3-8B (loaded from local GGUF or HuggingFace)
+GPU:   GPU 1 (GTX 1080, 8GB VRAM) — GPU 0 reserved for live inference
+Data:  ~/training-data/cleaned/  (SFT)
+       ~/training-data/dpo/      (DPO preference pairs)
+
+Usage:
+    source ~/unsloth-env/bin/activate
+    python3 ~/scripts/finetune-lora.py [--dpo] [--dry-run]
+
+Output:
+    ~/models/grace-lora-YYYYMMDD/   (LoRA adapter)
+    Copied to grace@192.168.20.87:~/models/
+
+IMPORTANT: Do not run this until we have 200+ clean examples.
+Current count is tracked in ~/self-improving/convert-state.json
+"""
+
+import argparse
+import glob
+import json
+import os
+import shutil
+import subprocess
+from datetime import datetime, timezone
+
+# ── Config ──────────────────────────────────────────────────────────────────
+NFS_BASE    = "/mnt/ai-storage/grace"
+CLEAN_DIR   = os.path.join(NFS_BASE, "training-data/cleaned")
+DPO_DIR     = os.path.join(NFS_BASE, "training-data/dpo")
+OUTPUT_BASE = os.path.join(NFS_BASE, "models")
+
+# Use GPU 1 only — GPU 0 is running inference
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+# LoRA hyperparameters (safe defaults for Qwen3-8B on 8GB VRAM)
+LORA_CONFIG = {
+    "r": 16,
+    "lora_alpha": 16,
+    "lora_dropout": 0.1,
+    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
+                       "gate_proj", "up_proj", "down_proj"],
+}
+
+# SFT training config
+SFT_CONFIG = {
+    "learning_rate": 1e-4,
+    "num_train_epochs": 1,
+    "per_device_train_batch_size": 2,
+    "gradient_accumulation_steps": 4,
+    "max_seq_length": 8192,
+    "kl_coef": 0.05,       # KL anchor — keeps adapter close to base
+    "warmup_ratio": 0.05,
+    "lr_scheduler_type": "cosine",
+    "fp16": False,
+    "bf16": True,          # GTX 1080 doesn't support bf16 natively — falls back to fp16
+}
+
+# DPO config
+DPO_CONFIG = {
+    "beta": 0.1,           # start conservative; sweep upward if needed
+    "learning_rate": 2e-5,
+    "num_train_epochs": 1,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "max_length": 4096,
+}
+
+MIN_EXAMPLES_SFT = 200
+MIN_EXAMPLES_DPO = 50
+
+
+def count_examples(directory: str) -> int:
+    total = 0
+    for f in glob.glob(os.path.join(directory, "*.jsonl")):
+        with open(f) as fh:
+            total += sum(1 for line in fh if line.strip())
+    return total
+
+
+def load_dataset_from_dir(directory: str):
+    """Load all JSONL files in a directory into a HuggingFace dataset."""
+    from datasets import load_dataset
+    files = sorted(glob.glob(os.path.join(directory, "*.jsonl")))
+    if not files:
+        raise FileNotFoundError(f"No JSONL files found in {directory}")
+    return load_dataset("json", data_files=files, split="train")
+
+
+def run_sft(model, tokenizer, output_dir: str, dry_run: bool = False):
+    """Run SFT with KL anchor using unsloth + TRL."""
+    from trl import SFTTrainer, SFTConfig
+    from unsloth.chat_templates import get_chat_template
+
+    tokenizer = get_chat_template(tokenizer, chat_template="qwen-3")
+
+    dataset = load_dataset_from_dir(CLEAN_DIR)
+    print(f"SFT dataset: {len(dataset)} examples")
+
+    if dry_run:
+        print("[DRY RUN] Would train SFT on", len(dataset), "examples")
+        return
+
+    def format_chat(example):
+        convs = example["conversations"]
+        text = tokenizer.apply_chat_template(
+            convs, tokenize=False, add_generation_prompt=False
+        )
+        return {"text": text}
+
+    dataset = dataset.map(format_chat, remove_columns=dataset.column_names)
+
+    trainer = SFTTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=dataset,
+        args=SFTConfig(
+            output_dir=output_dir,
+            learning_rate=SFT_CONFIG["learning_rate"],
+            num_train_epochs=SFT_CONFIG["num_train_epochs"],
+            per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"],
+            gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"],
+            max_seq_length=SFT_CONFIG["max_seq_length"],
+            warmup_ratio=SFT_CONFIG["warmup_ratio"],
+            lr_scheduler_type=SFT_CONFIG["lr_scheduler_type"],
+            fp16=SFT_CONFIG["fp16"],
+            bf16=SFT_CONFIG["bf16"],
+            dataset_text_field="text",
+            save_strategy="epoch",
+            logging_steps=10,
+        ),
+    )
+    trainer.train()
+    model.save_pretrained(output_dir)
+    tokenizer.save_pretrained(output_dir)
+    print(f"SFT adapter saved → {output_dir}")
+
+
+def run_dpo(model, tokenizer, output_dir: str, dry_run: bool = False):
+    """Run DPO preference tuning."""
+    from trl import DPOTrainer, DPOConfig
+
+    dataset = load_dataset_from_dir(DPO_DIR)
+    print(f"DPO dataset: {len(dataset)} pairs")
+
+    if dry_run:
+        print("[DRY RUN] Would train DPO on", len(dataset), "pairs")
+        return
+
+    trainer = DPOTrainer(
+        model=model,
+        ref_model=None,  # unsloth handles reference model internally
+        tokenizer=tokenizer,
+        train_dataset=dataset,
+        args=DPOConfig(
+            output_dir=output_dir + "-dpo",
+            beta=DPO_CONFIG["beta"],
+            learning_rate=DPO_CONFIG["learning_rate"],
+            num_train_epochs=DPO_CONFIG["num_train_epochs"],
+            per_device_train_batch_size=DPO_CONFIG["per_device_train_batch_size"],
+            gradient_accumulation_steps=DPO_CONFIG["gradient_accumulation_steps"],
+            max_length=DPO_CONFIG["max_length"],
+            fp16=True,
+            save_strategy="epoch",
+            logging_steps=5,
+        ),
+    )
+    trainer.train()
+    model.save_pretrained(output_dir + "-dpo")
+    print(f"DPO adapter saved → {output_dir}-dpo")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Grace LoRA fine-tuning")
+    parser.add_argument("--dpo", action="store_true", help="Also run DPO after SFT")
+    parser.add_argument("--dry-run", action="store_true", help="Check data counts, don't train")
+    args = parser.parse_args()
+
+    # ── Pre-flight checks ──
+    sft_count = count_examples(CLEAN_DIR)
+    dpo_count = count_examples(DPO_DIR)
+    print(f"Training data: {sft_count} SFT examples, {dpo_count} DPO pairs")
+
+    if sft_count < MIN_EXAMPLES_SFT:
+        print(f"⚠️  Not enough SFT data yet ({sft_count}/{MIN_EXAMPLES_SFT} minimum).")
+        print("   Keep having conversations with Grace — the exporter runs nightly.")
+        if not args.dry_run:
+            return
+
+    if args.dpo and dpo_count < MIN_EXAMPLES_DPO:
+        print(f"⚠️  Not enough DPO pairs yet ({dpo_count}/{MIN_EXAMPLES_DPO} minimum). Skipping DPO.")
+        args.dpo = False
+
+    if args.dry_run:
+        print("\n[DRY RUN] Pre-flight check complete. Run without --dry-run to train.")
+        return
+
+    # ── Load model ──
+    from unsloth import FastLanguageModel
+
+    timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    output_dir = os.path.join(OUTPUT_BASE, f"grace-lora-{timestamp}")
+    os.makedirs(output_dir, exist_ok=True)
+
+    print("\nLoading Qwen3-8B with unsloth (4-bit, GPU 1)...")
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name="Qwen/Qwen3-8B-Instruct",
+        max_seq_length=SFT_CONFIG["max_seq_length"],
+        dtype=None,          # auto-detect
+        load_in_4bit=True,   # QLoRA — fits in 8GB VRAM
+    )
+
+    model = FastLanguageModel.get_peft_model(
+        model,
+        **LORA_CONFIG,
+        bias="none",
+        use_gradient_checkpointing="unsloth",
+    )
+
+    # ── SFT ──
+    print("\n=== Stage 1: SFT with KL anchor ===")
+    run_sft(model, tokenizer, output_dir, dry_run=args.dry_run)
+
+    # ── DPO (optional) ──
+    if args.dpo:
+        print("\n=== Stage 2: DPO preference tuning ===")
+        run_dpo(model, tokenizer, output_dir, dry_run=args.dry_run)
+
+    # Output is already on NFS — no copy needed
+    print(f"\nAdapter saved to NFS: {output_dir}")
+
+    # ── Save run metadata ──
+    meta = {
+        "timestamp": timestamp,
+        "output_dir": output_dir,
+        "sft_examples": sft_count,
+        "dpo_pairs": dpo_count if args.dpo else 0,
+        "lora_config": LORA_CONFIG,
+        "sft_config": SFT_CONFIG,
+        "dpo_config": DPO_CONFIG if args.dpo else None,
+    }
+    with open(os.path.join(output_dir, "run-meta.json"), "w") as f:
+        json.dump(meta, f, indent=2)
+
+    print(f"\n✅ Done. Adapter at: {output_dir}")
+    print("To use: load the adapter with llama.cpp --lora or swap into docker-compose")
+
+
+if __name__ == "__main__":
+    main()