feat: initial import of all helper scripts from ~/scripts/
- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
This commit is contained in:
252
finetune-lora.py
Executable file
252
finetune-lora.py
Executable file
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Grace LoRA fine-tuning script using Unsloth.
|
||||
Model: Qwen3-8B (loaded from local GGUF or HuggingFace)
|
||||
GPU: GPU 1 (GTX 1080, 8GB VRAM) — GPU 0 reserved for live inference
|
||||
Data: ~/training-data/cleaned/ (SFT)
|
||||
~/training-data/dpo/ (DPO preference pairs)
|
||||
|
||||
Usage:
|
||||
source ~/unsloth-env/bin/activate
|
||||
python3 ~/scripts/finetune-lora.py [--dpo] [--dry-run]
|
||||
|
||||
Output:
|
||||
~/models/grace-lora-YYYYMMDD/ (LoRA adapter)
|
||||
Copied to grace@192.168.20.87:~/models/
|
||||
|
||||
IMPORTANT: Do not run this until we have 200+ clean examples.
|
||||
Current count is tracked in ~/self-improving/convert-state.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# ── Config ──────────────────────────────────────────────────────────────────
|
||||
NFS_BASE = "/mnt/ai-storage/grace"
|
||||
CLEAN_DIR = os.path.join(NFS_BASE, "training-data/cleaned")
|
||||
DPO_DIR = os.path.join(NFS_BASE, "training-data/dpo")
|
||||
OUTPUT_BASE = os.path.join(NFS_BASE, "models")
|
||||
|
||||
# Use GPU 1 only — GPU 0 is running inference
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
|
||||
# LoRA hyperparameters (safe defaults for Qwen3-8B on 8GB VRAM)
|
||||
LORA_CONFIG = {
|
||||
"r": 16,
|
||||
"lora_alpha": 16,
|
||||
"lora_dropout": 0.1,
|
||||
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj"],
|
||||
}
|
||||
|
||||
# SFT training config
|
||||
SFT_CONFIG = {
|
||||
"learning_rate": 1e-4,
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 2,
|
||||
"gradient_accumulation_steps": 4,
|
||||
"max_seq_length": 8192,
|
||||
"kl_coef": 0.05, # KL anchor — keeps adapter close to base
|
||||
"warmup_ratio": 0.05,
|
||||
"lr_scheduler_type": "cosine",
|
||||
"fp16": False,
|
||||
"bf16": True, # GTX 1080 doesn't support bf16 natively — falls back to fp16
|
||||
}
|
||||
|
||||
# DPO config
|
||||
DPO_CONFIG = {
|
||||
"beta": 0.1, # start conservative; sweep upward if needed
|
||||
"learning_rate": 2e-5,
|
||||
"num_train_epochs": 1,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"max_length": 4096,
|
||||
}
|
||||
|
||||
MIN_EXAMPLES_SFT = 200
|
||||
MIN_EXAMPLES_DPO = 50
|
||||
|
||||
|
||||
def count_examples(directory: str) -> int:
|
||||
total = 0
|
||||
for f in glob.glob(os.path.join(directory, "*.jsonl")):
|
||||
with open(f) as fh:
|
||||
total += sum(1 for line in fh if line.strip())
|
||||
return total
|
||||
|
||||
|
||||
def load_dataset_from_dir(directory: str):
|
||||
"""Load all JSONL files in a directory into a HuggingFace dataset."""
|
||||
from datasets import load_dataset
|
||||
files = sorted(glob.glob(os.path.join(directory, "*.jsonl")))
|
||||
if not files:
|
||||
raise FileNotFoundError(f"No JSONL files found in {directory}")
|
||||
return load_dataset("json", data_files=files, split="train")
|
||||
|
||||
|
||||
def run_sft(model, tokenizer, output_dir: str, dry_run: bool = False):
|
||||
"""Run SFT with KL anchor using unsloth + TRL."""
|
||||
from trl import SFTTrainer, SFTConfig
|
||||
from unsloth.chat_templates import get_chat_template
|
||||
|
||||
tokenizer = get_chat_template(tokenizer, chat_template="qwen-3")
|
||||
|
||||
dataset = load_dataset_from_dir(CLEAN_DIR)
|
||||
print(f"SFT dataset: {len(dataset)} examples")
|
||||
|
||||
if dry_run:
|
||||
print("[DRY RUN] Would train SFT on", len(dataset), "examples")
|
||||
return
|
||||
|
||||
def format_chat(example):
|
||||
convs = example["conversations"]
|
||||
text = tokenizer.apply_chat_template(
|
||||
convs, tokenize=False, add_generation_prompt=False
|
||||
)
|
||||
return {"text": text}
|
||||
|
||||
dataset = dataset.map(format_chat, remove_columns=dataset.column_names)
|
||||
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=dataset,
|
||||
args=SFTConfig(
|
||||
output_dir=output_dir,
|
||||
learning_rate=SFT_CONFIG["learning_rate"],
|
||||
num_train_epochs=SFT_CONFIG["num_train_epochs"],
|
||||
per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"],
|
||||
gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"],
|
||||
max_seq_length=SFT_CONFIG["max_seq_length"],
|
||||
warmup_ratio=SFT_CONFIG["warmup_ratio"],
|
||||
lr_scheduler_type=SFT_CONFIG["lr_scheduler_type"],
|
||||
fp16=SFT_CONFIG["fp16"],
|
||||
bf16=SFT_CONFIG["bf16"],
|
||||
dataset_text_field="text",
|
||||
save_strategy="epoch",
|
||||
logging_steps=10,
|
||||
),
|
||||
)
|
||||
trainer.train()
|
||||
model.save_pretrained(output_dir)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
print(f"SFT adapter saved → {output_dir}")
|
||||
|
||||
|
||||
def run_dpo(model, tokenizer, output_dir: str, dry_run: bool = False):
|
||||
"""Run DPO preference tuning."""
|
||||
from trl import DPOTrainer, DPOConfig
|
||||
|
||||
dataset = load_dataset_from_dir(DPO_DIR)
|
||||
print(f"DPO dataset: {len(dataset)} pairs")
|
||||
|
||||
if dry_run:
|
||||
print("[DRY RUN] Would train DPO on", len(dataset), "pairs")
|
||||
return
|
||||
|
||||
trainer = DPOTrainer(
|
||||
model=model,
|
||||
ref_model=None, # unsloth handles reference model internally
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=dataset,
|
||||
args=DPOConfig(
|
||||
output_dir=output_dir + "-dpo",
|
||||
beta=DPO_CONFIG["beta"],
|
||||
learning_rate=DPO_CONFIG["learning_rate"],
|
||||
num_train_epochs=DPO_CONFIG["num_train_epochs"],
|
||||
per_device_train_batch_size=DPO_CONFIG["per_device_train_batch_size"],
|
||||
gradient_accumulation_steps=DPO_CONFIG["gradient_accumulation_steps"],
|
||||
max_length=DPO_CONFIG["max_length"],
|
||||
fp16=True,
|
||||
save_strategy="epoch",
|
||||
logging_steps=5,
|
||||
),
|
||||
)
|
||||
trainer.train()
|
||||
model.save_pretrained(output_dir + "-dpo")
|
||||
print(f"DPO adapter saved → {output_dir}-dpo")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Grace LoRA fine-tuning")
|
||||
parser.add_argument("--dpo", action="store_true", help="Also run DPO after SFT")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Check data counts, don't train")
|
||||
args = parser.parse_args()
|
||||
|
||||
# ── Pre-flight checks ──
|
||||
sft_count = count_examples(CLEAN_DIR)
|
||||
dpo_count = count_examples(DPO_DIR)
|
||||
print(f"Training data: {sft_count} SFT examples, {dpo_count} DPO pairs")
|
||||
|
||||
if sft_count < MIN_EXAMPLES_SFT:
|
||||
print(f"⚠️ Not enough SFT data yet ({sft_count}/{MIN_EXAMPLES_SFT} minimum).")
|
||||
print(" Keep having conversations with Grace — the exporter runs nightly.")
|
||||
if not args.dry_run:
|
||||
return
|
||||
|
||||
if args.dpo and dpo_count < MIN_EXAMPLES_DPO:
|
||||
print(f"⚠️ Not enough DPO pairs yet ({dpo_count}/{MIN_EXAMPLES_DPO} minimum). Skipping DPO.")
|
||||
args.dpo = False
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN] Pre-flight check complete. Run without --dry-run to train.")
|
||||
return
|
||||
|
||||
# ── Load model ──
|
||||
from unsloth import FastLanguageModel
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||
output_dir = os.path.join(OUTPUT_BASE, f"grace-lora-{timestamp}")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
print("\nLoading Qwen3-8B with unsloth (4-bit, GPU 1)...")
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name="Qwen/Qwen3-8B-Instruct",
|
||||
max_seq_length=SFT_CONFIG["max_seq_length"],
|
||||
dtype=None, # auto-detect
|
||||
load_in_4bit=True, # QLoRA — fits in 8GB VRAM
|
||||
)
|
||||
|
||||
model = FastLanguageModel.get_peft_model(
|
||||
model,
|
||||
**LORA_CONFIG,
|
||||
bias="none",
|
||||
use_gradient_checkpointing="unsloth",
|
||||
)
|
||||
|
||||
# ── SFT ──
|
||||
print("\n=== Stage 1: SFT with KL anchor ===")
|
||||
run_sft(model, tokenizer, output_dir, dry_run=args.dry_run)
|
||||
|
||||
# ── DPO (optional) ──
|
||||
if args.dpo:
|
||||
print("\n=== Stage 2: DPO preference tuning ===")
|
||||
run_dpo(model, tokenizer, output_dir, dry_run=args.dry_run)
|
||||
|
||||
# Output is already on NFS — no copy needed
|
||||
print(f"\nAdapter saved to NFS: {output_dir}")
|
||||
|
||||
# ── Save run metadata ──
|
||||
meta = {
|
||||
"timestamp": timestamp,
|
||||
"output_dir": output_dir,
|
||||
"sft_examples": sft_count,
|
||||
"dpo_pairs": dpo_count if args.dpo else 0,
|
||||
"lora_config": LORA_CONFIG,
|
||||
"sft_config": SFT_CONFIG,
|
||||
"dpo_config": DPO_CONFIG if args.dpo else None,
|
||||
}
|
||||
with open(os.path.join(output_dir, "run-meta.json"), "w") as f:
|
||||
json.dump(meta, f, indent=2)
|
||||
|
||||
print(f"\n✅ Done. Adapter at: {output_dir}")
|
||||
print("To use: load the adapter with llama.cpp --lora or swap into docker-compose")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user