|
|
#!/usr/bin/env bash |
|
|
|
|
|
set -euo pipefail |
|
|
|
|
|
|
|
|
: "${HF_TOKEN:?HF_TOKEN must be exported with write permissions.}" |
|
|
: "${HF_USERNAME:?HF_USERNAME must be exported before running this script.}" |
|
|
|
|
|
|
|
|
BASE_MODEL="${BASE_MODEL:-HuggingFaceTB/SmolLM3-3B-Base}" |
|
|
|
|
|
ADAPTER_ID="${ADAPTER_ID:-${HF_USERNAME}/smollm3-sft-math-tuned}" |
|
|
|
|
|
|
|
|
HUB_MODEL_ID="${HUB_MODEL_ID:-${ADAPTER_ID}}" |
|
|
|
|
|
|
|
|
|
|
|
FLAVOR="${FLAVOR:-a10g-large}" |
|
|
TIMEOUT="${TIMEOUT:-30m}" |
|
|
|
|
|
echo "Submitting LoRA merge job:" |
|
|
echo " Base model : ${BASE_MODEL}" |
|
|
echo " Adapter repo : ${ADAPTER_ID}" |
|
|
echo " Final Hub repo : ${HUB_MODEL_ID}" |
|
|
echo " Hardware : ${FLAVOR}" |
|
|
|
|
|
|
|
|
hf jobs uv run \ |
|
|
--flavor "${FLAVOR}" \ |
|
|
--timeout "${TIMEOUT}" \ |
|
|
--secrets HF_TOKEN \ |
|
|
--with "torch" \ |
|
|
--with "peft" \ |
|
|
--with "transformers>=4.38.0" \ |
|
|
--with "accelerate" \ |
|
|
--with "huggingface_hub" \ |
|
|
--env "BASE_MODEL=${BASE_MODEL}" \ |
|
|
--env "ADAPTER_ID=${ADAPTER_ID}" \ |
|
|
--env "HUB_MODEL_ID=${HUB_MODEL_ID}" \ |
|
|
python -c "$(cat <<'EOF' |
|
|
import os |
|
|
import torch |
|
|
from peft import PeftModel |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from huggingface_hub import HfApi, create_repo |
|
|
|
|
|
# --- Get variables from environment --- |
|
|
base_model_id = os.getenv("BASE_MODEL") |
|
|
adapter_id = os.getenv("ADAPTER_ID") |
|
|
hub_model_id = os.getenv("HUB_MODEL_ID") |
|
|
output_dir = "merged-model" # Local temp directory in the job runner |
|
|
|
|
|
# --- Load Models and Merge --- |
|
|
print(f"Loading base model: {base_model_id}") |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
base_model_id, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto", |
|
|
) |
|
|
|
|
|
print(f"Loading adapter: {adapter_id}") |
|
|
model = PeftModel.from_pretrained(base_model, adapter_id) |
|
|
|
|
|
print("Merging adapter weights...") |
|
|
model = model.merge_and_unload() |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(base_model_id) |
|
|
|
|
|
# --- Save and Upload --- |
|
|
print(f"Saving merged model locally to '{output_dir}'") |
|
|
model.save_pretrained(output_dir) |
|
|
tokenizer.save_pretrained(output_dir) |
|
|
|
|
|
print(f"Uploading merged model to the Hub at {hub_model_id}") |
|
|
# Ensure the repo exists and then upload the contents of our folder |
|
|
create_repo(hub_model_id, repo_type="model", exist_ok=True) |
|
|
api = HfApi() |
|
|
api.upload_folder( |
|
|
folder_path=output_dir, |
|
|
repo_id=hub_model_id, |
|
|
repo_type="model", |
|
|
commit_message="Upload merged LoRA model", |
|
|
) |
|
|
|
|
|
print("Job complete.") |
|
|
EOF |
|
|
)" |
|
|
|
|
|
echo "Merge job queued. Check your model repo '${HUB_MODEL_ID}' for the merged files upon completion." |