robbiemu
/

smollm3-sft-math-tuned

Text Generation

Generated from Trainer

Model card Files Files and versions

smollm3-sft-math-tuned / merge.sh

robbiemu's picture

scripts used in generating my submission

3eb9439 verified about 2 months ago

history blame contribute delete

2.96 kB

	#!/usr/bin/env bash
	# HF Job to merge a LoRA adapter with its base model and push to the Hub.
	set -euo pipefail

	# --- Configuration (update if necessary) -------------------------------------
	: "${HF_TOKEN:?HF_TOKEN must be exported with write permissions.}"
	: "${HF_USERNAME:?HF_USERNAME must be exported before running this script.}"

	# The base model you trained from
	BASE_MODEL="${BASE_MODEL:-HuggingFaceTB/SmolLM3-3B-Base}"
	# The repo containing your LoRA adapter files
	ADAPTER_ID="${ADAPTER_ID:-${HF_USERNAME}/smollm3-sft-math-tuned}"
	# The repo where the final MERGED model will be saved.
	# This should be the same as your adapter repo to replace it.
	HUB_MODEL_ID="${HUB_MODEL_ID:-${ADAPTER_ID}}"

	# --- Job settings ------------------------------------------------------------
	# Merging a 3B model can be memory intensive. a10g-large is a safe choice.
	FLAVOR="${FLAVOR:-a10g-large}"
	TIMEOUT="${TIMEOUT:-30m}"

	echo "Submitting LoRA merge job:"
	echo " Base model : ${BASE_MODEL}"
	echo " Adapter repo : ${ADAPTER_ID}"
	echo " Final Hub repo : ${HUB_MODEL_ID}"
	echo " Hardware : ${FLAVOR}"

	# --- hf jobs command ---
	hf jobs uv run \
	--flavor "${FLAVOR}" \
	--timeout "${TIMEOUT}" \
	--secrets HF_TOKEN \
	--with "torch" \
	--with "peft" \
	--with "transformers>=4.38.0" \
	--with "accelerate" \
	--with "huggingface_hub" \
	--env "BASE_MODEL=${BASE_MODEL}" \
	--env "ADAPTER_ID=${ADAPTER_ID}" \
	--env "HUB_MODEL_ID=${HUB_MODEL_ID}" \
	python -c "$(cat <<'EOF'
	import os
	import torch
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import HfApi, create_repo

	# --- Get variables from environment ---
	base_model_id = os.getenv("BASE_MODEL")
	adapter_id = os.getenv("ADAPTER_ID")
	hub_model_id = os.getenv("HUB_MODEL_ID")
	output_dir = "merged-model" # Local temp directory in the job runner

	# --- Load Models and Merge ---
	print(f"Loading base model: {base_model_id}")
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)

	print(f"Loading adapter: {adapter_id}")
	model = PeftModel.from_pretrained(base_model, adapter_id)

	print("Merging adapter weights...")
	model = model.merge_and_unload()

	tokenizer = AutoTokenizer.from_pretrained(base_model_id)

	# --- Save and Upload ---
	print(f"Saving merged model locally to '{output_dir}'")
	model.save_pretrained(output_dir)
	tokenizer.save_pretrained(output_dir)

	print(f"Uploading merged model to the Hub at {hub_model_id}")
	# Ensure the repo exists and then upload the contents of our folder
	create_repo(hub_model_id, repo_type="model", exist_ok=True)
	api = HfApi()
	api.upload_folder(
	folder_path=output_dir,
	repo_id=hub_model_id,
	repo_type="model",
	commit_message="Upload merged LoRA model",
	)

	print("Job complete.")
	EOF
	)"

	echo "Merge job queued. Check your model repo '${HUB_MODEL_ID}' for the merged files upon completion."