Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,28 +58,27 @@ def get_quantization_recipe(method, model_architecture):
|
|
| 58 |
raise ValueError(f"Unsupported quantization method: {method}")
|
| 59 |
|
| 60 |
# --------------------------------------------------------------------------------
|
| 61 |
-
# CHANGE #1:
|
| 62 |
# --------------------------------------------------------------------------------
|
| 63 |
-
def compress_and_upload(model_id: str, quant_method: str):
|
| 64 |
"""
|
| 65 |
Compresses a model using llm-compressor and uploads it to a new HF repo.
|
| 66 |
"""
|
| 67 |
if not model_id:
|
| 68 |
raise gr.Error("Please select a model from the search bar.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
# Check for login status by calling whoami(). It will raise an error if not logged in.
|
| 71 |
try:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
raise gr.Error("Authentication error. Please log in to continue.")
|
| 75 |
-
username = user_info["name"]
|
| 76 |
-
except Exception as e:
|
| 77 |
-
raise gr.Error(f"Authentication error. Please log in to continue. Details: {e}")
|
| 78 |
|
| 79 |
-
try:
|
| 80 |
# --- 1. Load Model and Tokenizer ---
|
| 81 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None)
|
| 82 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 83 |
|
| 84 |
output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
|
| 85 |
|
|
@@ -100,8 +99,7 @@ def compress_and_upload(model_id: str, quant_method: str):
|
|
| 100 |
)
|
| 101 |
|
| 102 |
# --- 4. Create Repo and Upload ---
|
| 103 |
-
|
| 104 |
-
api = HfApi()
|
| 105 |
repo_id = f"{username}/{output_dir}"
|
| 106 |
|
| 107 |
repo_url = api.create_repo(repo_id=repo_id, exist_ok=True)
|
|
@@ -134,7 +132,7 @@ This conversion was performed by the `llm-compressor-my-repo` Hugging Face Space
|
|
| 134 |
For more details on the recipe used, refer to the `recipe.yaml` file in this repository.
|
| 135 |
"""
|
| 136 |
card = ModelCard(card_content)
|
| 137 |
-
card.push_to_hub(repo_id
|
| 138 |
|
| 139 |
return f'<h1>✅ Success!</h1><br/>Model compressed and saved to your new repo: <a href="{repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>'
|
| 140 |
|
|
@@ -168,20 +166,14 @@ with gr.Blocks(css="footer {display: none !important;}") as demo:
|
|
| 168 |
output_html = gr.HTML(label="Result")
|
| 169 |
|
| 170 |
# --------------------------------------------------------------------------------
|
| 171 |
-
# CHANGE #2: The
|
| 172 |
# --------------------------------------------------------------------------------
|
| 173 |
compress_button.click(
|
| 174 |
fn=compress_and_upload,
|
| 175 |
-
inputs=[model_input, quant_method_dropdown],
|
| 176 |
outputs=output_html
|
| 177 |
)
|
| 178 |
-
|
| 179 |
-
gr.Examples
|
| 180 |
-
examples=[
|
| 181 |
-
["mistralai/Mistral-7B-Instruct-v0.2", "AWQ"],
|
| 182 |
-
["meta-llama/Llama-2-7b-chat-hf", "GPTQ"],
|
| 183 |
-
],
|
| 184 |
-
inputs=[model_input, quant_method_dropdown],
|
| 185 |
-
)
|
| 186 |
|
| 187 |
demo.queue(max_size=5).launch()
|
|
|
|
| 58 |
raise ValueError(f"Unsupported quantization method: {method}")
|
| 59 |
|
| 60 |
# --------------------------------------------------------------------------------
|
| 61 |
+
# CHANGE #1: Reverted to the correct function signature that accepts the OAuthToken
|
| 62 |
# --------------------------------------------------------------------------------
|
| 63 |
+
def compress_and_upload(model_id: str, quant_method: str, oauth_token: gr.OAuthToken | None):
|
| 64 |
"""
|
| 65 |
Compresses a model using llm-compressor and uploads it to a new HF repo.
|
| 66 |
"""
|
| 67 |
if not model_id:
|
| 68 |
raise gr.Error("Please select a model from the search bar.")
|
| 69 |
+
|
| 70 |
+
if oauth_token is None:
|
| 71 |
+
raise gr.Error("Authentication error. Please log in to continue.")
|
| 72 |
+
|
| 73 |
+
token = oauth_token.token
|
| 74 |
|
|
|
|
| 75 |
try:
|
| 76 |
+
# Use the provided token for all hub interactions
|
| 77 |
+
username = whoami(token=token)["name"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
|
|
|
| 79 |
# --- 1. Load Model and Tokenizer ---
|
| 80 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map=None, token=token)
|
| 81 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
|
| 82 |
|
| 83 |
output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
|
| 84 |
|
|
|
|
| 99 |
)
|
| 100 |
|
| 101 |
# --- 4. Create Repo and Upload ---
|
| 102 |
+
api = HfApi(token=token)
|
|
|
|
| 103 |
repo_id = f"{username}/{output_dir}"
|
| 104 |
|
| 105 |
repo_url = api.create_repo(repo_id=repo_id, exist_ok=True)
|
|
|
|
| 132 |
For more details on the recipe used, refer to the `recipe.yaml` file in this repository.
|
| 133 |
"""
|
| 134 |
card = ModelCard(card_content)
|
| 135 |
+
card.push_to_hub(repo_id, token=token)
|
| 136 |
|
| 137 |
return f'<h1>✅ Success!</h1><br/>Model compressed and saved to your new repo: <a href="{repo_url}" target="_blank" style="text-decoration:underline">{repo_id}</a>'
|
| 138 |
|
|
|
|
| 166 |
output_html = gr.HTML(label="Result")
|
| 167 |
|
| 168 |
# --------------------------------------------------------------------------------
|
| 169 |
+
# CHANGE #2: The `login_button` is correctly passed as an input.
|
| 170 |
# --------------------------------------------------------------------------------
|
| 171 |
compress_button.click(
|
| 172 |
fn=compress_and_upload,
|
| 173 |
+
inputs=[model_input, quant_method_dropdown, login_button],
|
| 174 |
outputs=output_html
|
| 175 |
)
|
| 176 |
+
|
| 177 |
+
# CHANGE #3: Removed the gr.Examples component to prevent the TypeError.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
demo.queue(max_size=5).launch()
|