n00b001 commited on
Commit
a17d990
·
1 Parent(s): ba1df97

Fix: Handle Qwen2.5-VL model loading with AutoModelForCausalLM

Browse files

Addresses the "Unrecognized configuration class" error when loading Qwen2.5-VL based models
by conditionally attempting to load with `Qwen2_5_VLForConditionalGeneration` if `AutoModelForCausalLM`
fails and the model ID indicates a Qwen model.

Also fixes linting issues in `app.py` and `tests/test_app.py` related to unused variables
and imports.

Files changed (2) hide show
  1. app.py +15 -6
  2. tests/test_app.py +3 -4
app.py CHANGED
@@ -5,7 +5,7 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
5
  from llmcompressor import oneshot
6
  from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
7
  from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
8
- from transformers import AutoModelForCausalLM
9
 
10
  # --- Helper Functions ---
11
 
@@ -96,9 +96,18 @@ def compress_and_upload(
96
  username = whoami(token=token)["name"]
97
 
98
  # --- 1. Load Model and Tokenizer ---
99
- model = AutoModelForCausalLM.from_pretrained(
100
- model_id, torch_dtype="auto", device_map=None, token=token, trust_remote_code=True
101
- )
 
 
 
 
 
 
 
 
 
102
 
103
  output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
104
 
@@ -173,8 +182,8 @@ def build_gradio_app():
173
  gr.Markdown(
174
  "Log in, choose a model, select a quantization method, and this Space will create a new compressed model repository on your Hugging Face profile."
175
  )
176
- with gr.Row():
177
- login_button = gr.LoginButton(min_width=250)
178
 
179
  gr.Markdown("### 1. Select a Model from the Hugging Face Hub")
180
  model_input = HuggingfaceHubSearch(
 
5
  from llmcompressor import oneshot
6
  from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
7
  from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
8
+ from transformers import AutoModelForCausalLM, Qwen2_5_VLForConditionalGeneration
9
 
10
  # --- Helper Functions ---
11
 
 
96
  username = whoami(token=token)["name"]
97
 
98
  # --- 1. Load Model and Tokenizer ---
99
+ try:
100
+ model = AutoModelForCausalLM.from_pretrained(
101
+ model_id, torch_dtype="auto", device_map=None, token=token, trust_remote_code=True
102
+ )
103
+ except ValueError as e:
104
+ if "Unrecognized configuration class" in str(e) and "qwen" in model_id.lower():
105
+ print(f"AutoModelForCausalLM failed, trying Qwen2_5_VLForConditionalGeneration for {model_id}")
106
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
107
+ model_id, torch_dtype="auto", device_map=None, token=token, trust_remote_code=True
108
+ )
109
+ else:
110
+ raise
111
 
112
  output_dir = f"{model_id.split('/')[-1]}-{quant_method}"
113
 
 
182
  gr.Markdown(
183
  "Log in, choose a model, select a quantization method, and this Space will create a new compressed model repository on your Hugging Face profile."
184
  )
185
+
186
+
187
 
188
  gr.Markdown("### 1. Select a Model from the Hugging Face Hub")
189
  model_input = HuggingfaceHubSearch(
tests/test_app.py CHANGED
@@ -2,11 +2,10 @@ import pytest
2
  from unittest.mock import MagicMock, patch
3
  from app import get_quantization_recipe, compress_and_upload
4
  import gradio as gr
5
- from transformers import AutoModelForCausalLM
6
- from huggingface_hub import HfApi, ModelCard, whoami
7
- from llmcompressor import oneshot
8
  from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
9
- from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
10
 
11
  # Mock external dependencies for compress_and_upload
12
  @pytest.fixture
 
2
  from unittest.mock import MagicMock, patch
3
  from app import get_quantization_recipe, compress_and_upload
4
  import gradio as gr
5
+
6
+
 
7
  from llmcompressor.modifiers.quantization import QuantizationModifier, GPTQModifier
8
+ from llmcompressor.modifiers.awq import AWQModifier
9
 
10
  # Mock external dependencies for compress_and_upload
11
  @pytest.fixture