Spaces:
Running
Running
File size: 3,889 Bytes
d95ff5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
#!/usr/bin/env python
"""
Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models
"""
from app import get_quantization_recipe
def test_qwen2_5_vl_compatible_methods():
"""
Test all methods that should work with Qwen2.5-VL models
"""
print("Testing quantization methods compatible with Qwen2.5-VL models...")
# Methods that should work
compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
all_passed = True
for method in compatible_methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"β {method} works with Qwen2_5_VLForConditionalGeneration")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" - Uses sequential onloading: {recipe[0].sequential_targets}")
print(f" - Ignore patterns: {recipe[0].ignore}")
except Exception as e:
print(f"β {method} failed: {e}")
all_passed = False
return all_passed
def test_awq_incompatibility():
"""
Test that AWQ properly fails for Qwen2.5-VL models
"""
print("\nTesting AWQ incompatibility with Qwen2.5-VL models...")
try:
recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration")
print("β AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)")
return False
except ValueError as e:
if "not compatible" in str(e) and "rotary positional embeddings" in str(e):
print(f"β AWQ properly fails for Qwen2.5-VL: {e}")
return True
else:
print(f"β AWQ failed but with wrong error: {e}")
return False
def test_awq_still_works_for_llama():
"""
Test that AWQ still works for Llama models
"""
print("\nTesting AWQ still works for Llama models...")
try:
recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM")
print(f"β AWQ still works for LlamaForCausalLM")
print(f" - Ignore patterns: {recipe[0].ignore}")
return True
except Exception as e:
print(f"β AWQ failed for LlamaForCausalLM: {e}")
return False
def test_target_model():
"""
Test with the specific target model
"""
print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration")
# All methods except AWQ should work
methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
success_count = 0
for method in methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
success_count += 1
except Exception as e:
print(f"Method {method} failed: {e}")
print(f"β {success_count}/{len(methods)} methods work for target model")
return success_count == len(methods)
if __name__ == "__main__":
print("Final verification after fixing AWQ incompatibility issue\n")
test1 = test_qwen2_5_vl_compatible_methods()
test2 = test_awq_incompatibility()
test3 = test_awq_still_works_for_llama()
test4 = test_target_model()
print(f"\n{'='*60}")
if test1 and test2 and test3 and test4:
print("β
ALL TESTS PASSED")
print("\nSOLUTION SUMMARY:")
print("β’ AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility")
print("β’ All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL")
print("β’ AWQ still works for Llama models as expected")
print("β’ Sequential onloading is preserved for memory efficiency")
print("β’ Users will get clear error messages when trying incompatible methods")
else:
print("β SOME TESTS FAILED")
print(f"{'='*60}") |