Spaces:

n00b001
/

llm-compressor-my-repo

Sleeping

n00b001 commited on 15 days ago

Commit

a17d990

1 Parent(s): ba1df97

Fix: Handle Qwen2.5-VL model loading with AutoModelForCausalLM

Addresses the "Unrecognized configuration class" error when loading Qwen2.5-VL based models
by conditionally attempting to load with `Qwen2_5_VLForConditionalGeneration` if `AutoModelForCausalLM`
fails and the model ID indicates a Qwen model.

Also fixes linting issues in `app.py` and `tests/test_app.py` related to unused variables
and imports.

Files changed (2) hide show

app.py +15 -6
tests/test_app.py +3 -4

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
 from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
-from transformers import AutoModelForCausalLM
 # --- Helper Functions ---
@@ -96,9 +96,18 @@ def compress_and_upload(
         username = whoami(token=token)["name"]
         # --- 1. Load Model and Tokenizer ---
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id, torch_dtype="auto", device_map=None, token=token, trust_remote_code=True
-        )
         output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
@@ -173,8 +182,8 @@ def build_gradio_app():
         gr.Markdown(
             "Log in, choose a model, select a quantization method, and this Space will create a new compressed model repository on your Hugging Face profile."
         )
-        with gr.Row():
-            login_button = gr.LoginButton(min_width=250)
         gr.Markdown("### 1. Select a Model from the Hugging Face Hub")
         model_input = HuggingfaceHubSearch(

 from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
 from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
+from transformers import AutoModelForCausalLM, Qwen2_5_VLForConditionalGeneration
 # --- Helper Functions ---
         username = whoami(token=token)["name"]
         # --- 1. Load Model and Tokenizer ---
+        try:
+            model = AutoModelForCausalLM.from_pretrained(
+                model_id, torch_dtype="auto", device_map=None, token=token, trust_remote_code=True
+            )
+        except ValueError as e:
+            if "Unrecognized configuration class" in str(e) and "qwen" in model_id.lower():
+                print(f"AutoModelForCausalLM failed, trying Qwen2_5_VLForConditionalGeneration for {model_id}")
+                model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                    model_id, torch_dtype="auto", device_map=None, token=token, trust_remote_code=True
+                )
+            else:
+                raise
         output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
         gr.Markdown(
             "Log in, choose a model, select a quantization method, and this Space will create a new compressed model repository on your Hugging Face profile."
         )
         gr.Markdown("### 1. Select a Model from the Hugging Face Hub")
         model_input = HuggingfaceHubSearch(

tests/test_app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import pytest
 from unittest.mock import MagicMock, patch
 from app import get_quantization_recipe, compress_and_upload
 import gradio as gr
-from transformers import AutoModelForCausalLM
-from huggingface_hub import HfApi, ModelCard, whoami
-from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
-from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
 # Mock external dependencies for compress_and_upload
 @pytest.fixture

 from unittest.mock import MagicMock, patch
 from app import get_quantization_recipe, compress_and_upload
 import gradio as gr
 from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
+from llmcompressor.modifiers.awq import AWQModifier
 # Mock external dependencies for compress_and_upload
 @pytest.fixture