robbiemu's picture
scripts used in generating my submission
3eb9439 verified
#!/usr/bin/env bash
# Final project training launcher for SmolLM3 fine-tuning with hf jobs.
# Adapts the competition-ready workflow documented in final_plan.md.
set -euo pipefail
# --- Required configuration --------------------------------------------------
: "${HF_TOKEN:?HF_TOKEN must be exported with write permissions.}"
: "${HF_USERNAME:?HF_USERNAME must be exported before running this script.}"
MODEL_NAME="${MODEL_NAME:-smollm3-sft-math-tuned}"
BASE_MODEL="${BASE_MODEL:-HuggingFaceTB/SmolLM3-3B-Base}"
DATASET_NAME="${DATASET_NAME:-${HF_USERNAME}/MetaMathQA-formatted}"
HUB_MODEL_ID="${HUB_MODEL_ID:-${HF_USERNAME}/${MODEL_NAME}}"
# --- Training hyperparameters (override via env when needed) -----------------
LEARNING_RATE="${LEARNING_RATE:-5e-5}"
PER_DEVICE_BATCH="${PER_DEVICE_BATCH:-2}"
GRAD_ACCUM="${GRAD_ACCUM:-8}"
MAX_STEPS="${MAX_STEPS:-1000}"
EVAL_STEPS="${EVAL_STEPS:-200}"
SAVE_STEPS="${SAVE_STEPS:-200}"
SEED="${SEED:-42}"
OUTPUT_DIR="${OUTPUT_DIR:-smollm3-sft-jobs-math}"
# --- LoRA configuration ------------------------------------------------------
LORA_R="${LORA_R:-8}"
LORA_ALPHA="${LORA_ALPHA:-32}"
LORA_DROPOUT="${LORA_DROPOUT:-0.05}"
IFS=' ' read -r -a LORA_TARGET_MODULES_ARR <<< "${LORA_TARGET_MODULES:-q_proj k_proj v_proj o_proj gate_proj up_proj down_proj}"
# --- Job settings ------------------------------------------------------------
FLAVOR="${FLAVOR:-a10g-large}"
TIMEOUT="${TIMEOUT:-2h}"
if [[ "${DEBUG:-0}" != "0" ]]; then
echo "[DEBUG] Running shortened smoke test."
MAX_STEPS="${DEBUG_MAX_STEPS:-20}"
FLAVOR="${DEBUG_FLAVOR:-t4-small}"
TIMEOUT="${DEBUG_TIMEOUT:-20m}"
fi
echo "Submitting training job:"
echo " Hub model id : ${HUB_MODEL_ID}"
echo " Base model : ${BASE_MODEL}"
echo " Dataset : ${DATASET_NAME}"
echo " Flavor : ${FLAVOR}"
echo " Max steps : ${MAX_STEPS}"
hf jobs uv run \
--flavor "${FLAVOR}" \
--timeout "${TIMEOUT}" \
--secrets HF_TOKEN \
"https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" \
--model_name_or_path "${BASE_MODEL}" \
--dataset_name "${DATASET_NAME}" \
--learning_rate "${LEARNING_RATE}" \
--per_device_train_batch_size "${PER_DEVICE_BATCH}" \
--gradient_accumulation_steps "${GRAD_ACCUM}" \
--max_steps "${MAX_STEPS}" \
--eval_steps "${EVAL_STEPS}" \
--save_steps "${SAVE_STEPS}" \
--seed "${SEED}" \
--output_dir "${OUTPUT_DIR}" \
--hub_model_id "${HUB_MODEL_ID}" \
--report_to none \
--use_peft \
--lora_r "${LORA_R}" \
--lora_alpha "${LORA_ALPHA}" \
--lora_dropout "${LORA_DROPOUT}" \
--lora_target_modules "${LORA_TARGET_MODULES_ARR[@]}" \
--bf16 True \
--push_to_hub
echo "Training job queued. Monitor progress on the Hugging Face Hub Jobs dashboard."