scripts used in generating my submission

these scripts I built (with help from LLMs: gemini 2.5 pro and gpt5) and they encapsulate the work done in creating my submission. The only thing I am not sharing are the planning documents I wrote for my submission, as 1. I currently appear to have first place and 2. that may not last, and part of my planning documents include improvements I can make, if I am so inspired.

Files changed (4) hide show

evaluate.sh +49 -0
format_dataset.py +38 -0
merge.sh +90 -0
train.sh +74 -0

evaluate.sh ADDED Viewed

	@@ -0,0 +1,49 @@

+#!/usr/bin/env bash
+# Final project evaluation launcher for gsm8k using lighteval + vLLM on hf jobs.
+set -euo pipefail
+: "${HF_TOKEN:?HF_TOKEN must be exported with write permissions.}"
+: "${HF_USERNAME:?HF_USERNAME must be exported before running this script.}"
+MODEL_NAME="${MODEL_NAME:-smollm3-sft-math-tuned}"
+HUB_MODEL_ID="${HUB_MODEL_ID:-${HF_USERNAME}/${MODEL_NAME}}"
+RESULTS_TEMPLATE_DEFAULT='{org}/details_{org}__{model}_private'
+RESULTS_PATH_TEMPLATE="${RESULTS_PATH_TEMPLATE:-$RESULTS_TEMPLATE_DEFAULT}"
+HUB_MODEL_REVISION="${HUB_MODEL_REVISION:-main}"
+FLAVOR="${FLAVOR:-a10g-large}"
+TIMEOUT="${TIMEOUT:-60m}"
+TASK_SPEC="${TASK_SPEC:-lighteval|gsm8k|0}"
+if [[ "${DEBUG:-0}" != "0" ]]; then
+  echo "[DEBUG] Running evaluation on a reduced setup."
+  FLAVOR="${DEBUG_FLAVOR:-t4-small}"
+  TIMEOUT="${DEBUG_TIMEOUT:-20m}"
+fi
+echo "Submitting evaluation job:"
+echo "  Model          : ${HUB_MODEL_ID}"
+echo "  Results template : ${RESULTS_PATH_TEMPLATE}"
+echo "  Hardware       : ${FLAVOR}"
+echo "  Task spec      : ${TASK_SPEC}"
+MODEL_ARGS="model_name=${HUB_MODEL_ID}"
+if [[ -n "${HUB_MODEL_REVISION}" && "${HUB_MODEL_REVISION}" != "none" ]]; then
+  MODEL_ARGS="${MODEL_ARGS},revision=${HUB_MODEL_REVISION}"
+fi
+hf jobs uv run \
+  --flavor "${FLAVOR}" \
+  --timeout "${TIMEOUT}" \
+  --with "git+https://github.com/huggingface/lighteval@main#egg=lighteval[vllm,gsm8k]" \
+  --with emoji \
+  --secrets HF_TOKEN \
+  lighteval vllm \
+  "${MODEL_ARGS}" \
+  "${TASK_SPEC}" \
+  --push-to-hub \
+  --results-org "${HF_USERNAME}" \
+  --results-path-template "${RESULTS_PATH_TEMPLATE}"
+echo "Evaluation job queued. Check your datasets under ${HF_USERNAME}/ for the 'details_*' entry once complete."
+echo "Recommendation: apply self-consistency by sampling multiple completions locally and majority-voting the final answer."

format_dataset.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+from datasets import load_dataset
+# --- Configuration ---
+# Your Hugging Face username is needed to create the new dataset repo.
+# Make sure you are logged in via `huggingface-cli login`.
+HF_USERNAME = os.getenv("HF_USERNAME")
+if not HF_USERNAME:
+    raise ValueError("Please set the HF_USERNAME environment variable.")
+SOURCE_DATASET = "meta-math/MetaMathQA"
+NEW_DATASET_NAME = f"{HF_USERNAME}/MetaMathQA-formatted"
+# --- Formatting Function ---
+# This function creates the single 'text' column the training script needs.
+def format_prompt(example):
+    # The prompt format should ideally match the base model's training style.
+    # A simple question/answer format is a good starting point.
+    return {"text": f"Question: {example['query']}\n\nAnswer: {example['response']}"}
+# --- Main Script ---
+if __name__ == "__main__":
+    print(f"Loading original dataset '{SOURCE_DATASET}'...")
+    dataset = load_dataset(SOURCE_DATASET, split="train")
+    print("Formatting dataset...")
+    formatted_dataset = dataset.map(format_prompt)
+    # Optional: remove old columns to keep the dataset clean
+    formatted_dataset = formatted_dataset.remove_columns(["query", "response", "type"])
+    print(f"Pushing formatted dataset to '{NEW_DATASET_NAME}'...")
+    formatted_dataset.push_to_hub(NEW_DATASET_NAME)
+    print("\n✅ Success! Your formatted dataset is ready on the Hub.")
+    print("You can now update your train.sh script.")

merge.sh ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/usr/bin/env bash
+# HF Job to merge a LoRA adapter with its base model and push to the Hub.
+set -euo pipefail
+# --- Configuration (update if necessary) -------------------------------------
+: "${HF_TOKEN:?HF_TOKEN must be exported with write permissions.}"
+: "${HF_USERNAME:?HF_USERNAME must be exported before running this script.}"
+# The base model you trained from
+BASE_MODEL="${BASE_MODEL:-HuggingFaceTB/SmolLM3-3B-Base}"
+# The repo containing your LoRA adapter files
+ADAPTER_ID="${ADAPTER_ID:-${HF_USERNAME}/smollm3-sft-math-tuned}"
+# The repo where the final MERGED model will be saved.
+# This should be the same as your adapter repo to replace it.
+HUB_MODEL_ID="${HUB_MODEL_ID:-${ADAPTER_ID}}"
+# --- Job settings ------------------------------------------------------------
+# Merging a 3B model can be memory intensive. a10g-large is a safe choice.
+FLAVOR="${FLAVOR:-a10g-large}"
+TIMEOUT="${TIMEOUT:-30m}"
+echo "Submitting LoRA merge job:"
+echo "  Base model      : ${BASE_MODEL}"
+echo "  Adapter repo    : ${ADAPTER_ID}"
+echo "  Final Hub repo  : ${HUB_MODEL_ID}"
+echo "  Hardware        : ${FLAVOR}"
+# --- hf jobs command ---
+hf jobs uv run \
+  --flavor "${FLAVOR}" \
+  --timeout "${TIMEOUT}" \
+  --secrets HF_TOKEN \
+  --with "torch" \
+  --with "peft" \
+  --with "transformers>=4.38.0" \
+  --with "accelerate" \
+  --with "huggingface_hub" \
+  --env "BASE_MODEL=${BASE_MODEL}" \
+  --env "ADAPTER_ID=${ADAPTER_ID}" \
+  --env "HUB_MODEL_ID=${HUB_MODEL_ID}" \
+  python -c "$(cat <<'EOF'
+import os
+import torch
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import HfApi, create_repo
+# --- Get variables from environment ---
+base_model_id = os.getenv("BASE_MODEL")
+adapter_id = os.getenv("ADAPTER_ID")
+hub_model_id = os.getenv("HUB_MODEL_ID")
+output_dir = "merged-model" # Local temp directory in the job runner
+# --- Load Models and Merge ---
+print(f"Loading base model: {base_model_id}")
+base_model = AutoModelForCausalLM.from_pretrained(
+    base_model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+print(f"Loading adapter: {adapter_id}")
+model = PeftModel.from_pretrained(base_model, adapter_id)
+print("Merging adapter weights...")
+model = model.merge_and_unload()
+tokenizer = AutoTokenizer.from_pretrained(base_model_id)
+# --- Save and Upload ---
+print(f"Saving merged model locally to '{output_dir}'")
+model.save_pretrained(output_dir)
+tokenizer.save_pretrained(output_dir)
+print(f"Uploading merged model to the Hub at {hub_model_id}")
+# Ensure the repo exists and then upload the contents of our folder
+create_repo(hub_model_id, repo_type="model", exist_ok=True)
+api = HfApi()
+api.upload_folder(
+    folder_path=output_dir,
+    repo_id=hub_model_id,
+    repo_type="model",
+    commit_message="Upload merged LoRA model",
+)
+print("Job complete.")
+EOF
+)"
+echo "Merge job queued. Check your model repo '${HUB_MODEL_ID}' for the merged files upon completion."

train.sh ADDED Viewed

	@@ -0,0 +1,74 @@

+#!/usr/bin/env bash
+# Final project training launcher for SmolLM3 fine-tuning with hf jobs.
+# Adapts the competition-ready workflow documented in final_plan.md.
+set -euo pipefail
+# --- Required configuration --------------------------------------------------
+: "${HF_TOKEN:?HF_TOKEN must be exported with write permissions.}"
+: "${HF_USERNAME:?HF_USERNAME must be exported before running this script.}"
+MODEL_NAME="${MODEL_NAME:-smollm3-sft-math-tuned}"
+BASE_MODEL="${BASE_MODEL:-HuggingFaceTB/SmolLM3-3B-Base}"
+DATASET_NAME="${DATASET_NAME:-${HF_USERNAME}/MetaMathQA-formatted}"
+HUB_MODEL_ID="${HUB_MODEL_ID:-${HF_USERNAME}/${MODEL_NAME}}"
+# --- Training hyperparameters (override via env when needed) -----------------
+LEARNING_RATE="${LEARNING_RATE:-5e-5}"
+PER_DEVICE_BATCH="${PER_DEVICE_BATCH:-2}"
+GRAD_ACCUM="${GRAD_ACCUM:-8}"
+MAX_STEPS="${MAX_STEPS:-1000}"
+EVAL_STEPS="${EVAL_STEPS:-200}"
+SAVE_STEPS="${SAVE_STEPS:-200}"
+SEED="${SEED:-42}"
+OUTPUT_DIR="${OUTPUT_DIR:-smollm3-sft-jobs-math}"
+# --- LoRA configuration ------------------------------------------------------
+LORA_R="${LORA_R:-8}"
+LORA_ALPHA="${LORA_ALPHA:-32}"
+LORA_DROPOUT="${LORA_DROPOUT:-0.05}"
+IFS=' ' read -r -a LORA_TARGET_MODULES_ARR <<< "${LORA_TARGET_MODULES:-q_proj k_proj v_proj o_proj gate_proj up_proj down_proj}"
+# --- Job settings ------------------------------------------------------------
+FLAVOR="${FLAVOR:-a10g-large}"
+TIMEOUT="${TIMEOUT:-2h}"
+if [[ "${DEBUG:-0}" != "0" ]]; then
+  echo "[DEBUG] Running shortened smoke test."
+  MAX_STEPS="${DEBUG_MAX_STEPS:-20}"
+  FLAVOR="${DEBUG_FLAVOR:-t4-small}"
+  TIMEOUT="${DEBUG_TIMEOUT:-20m}"
+fi
+echo "Submitting training job:"
+echo "  Hub model id : ${HUB_MODEL_ID}"
+echo "  Base model   : ${BASE_MODEL}"
+echo "  Dataset      : ${DATASET_NAME}"
+echo "  Flavor       : ${FLAVOR}"
+echo "  Max steps    : ${MAX_STEPS}"
+hf jobs uv run \
+  --flavor "${FLAVOR}" \
+  --timeout "${TIMEOUT}" \
+  --secrets HF_TOKEN \
+  "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" \
+  --model_name_or_path "${BASE_MODEL}" \
+  --dataset_name "${DATASET_NAME}" \
+  --learning_rate "${LEARNING_RATE}" \
+  --per_device_train_batch_size "${PER_DEVICE_BATCH}" \
+  --gradient_accumulation_steps "${GRAD_ACCUM}" \
+  --max_steps "${MAX_STEPS}" \
+  --eval_steps "${EVAL_STEPS}" \
+  --save_steps "${SAVE_STEPS}" \
+  --seed "${SEED}" \
+  --output_dir "${OUTPUT_DIR}" \
+  --hub_model_id "${HUB_MODEL_ID}" \
+  --report_to none \
+  --use_peft \
+  --lora_r "${LORA_R}" \
+  --lora_alpha "${LORA_ALPHA}" \
+  --lora_dropout "${LORA_DROPOUT}" \
+  --lora_target_modules "${LORA_TARGET_MODULES_ARR[@]}" \
+  --bf16 True \
+  --push_to_hub
+echo "Training job queued. Monitor progress on the Hugging Face Hub Jobs dashboard."