File size: 3,889 Bytes
d95ff5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
"""
Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models
"""

from app import get_quantization_recipe

def test_qwen2_5_vl_compatible_methods():
    """
    Test all methods that should work with Qwen2.5-VL models
    """
    print("Testing quantization methods compatible with Qwen2.5-VL models...")
    
    # Methods that should work
    compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
    
    all_passed = True
    for method in compatible_methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            print(f"βœ“ {method} works with Qwen2_5_VLForConditionalGeneration")
            if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
                print(f"  - Uses sequential onloading: {recipe[0].sequential_targets}")
            print(f"  - Ignore patterns: {recipe[0].ignore}")
        except Exception as e:
            print(f"βœ— {method} failed: {e}")
            all_passed = False
    
    return all_passed

def test_awq_incompatibility():
    """
    Test that AWQ properly fails for Qwen2.5-VL models
    """
    print("\nTesting AWQ incompatibility with Qwen2.5-VL models...")
    
    try:
        recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration")
        print("βœ— AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)")
        return False
    except ValueError as e:
        if "not compatible" in str(e) and "rotary positional embeddings" in str(e):
            print(f"βœ“ AWQ properly fails for Qwen2.5-VL: {e}")
            return True
        else:
            print(f"βœ— AWQ failed but with wrong error: {e}")
            return False

def test_awq_still_works_for_llama():
    """
    Test that AWQ still works for Llama models
    """
    print("\nTesting AWQ still works for Llama models...")
    
    try:
        recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM")
        print(f"βœ“ AWQ still works for LlamaForCausalLM")
        print(f"  - Ignore patterns: {recipe[0].ignore}")
        return True
    except Exception as e:
        print(f"βœ— AWQ failed for LlamaForCausalLM: {e}")
        return False

def test_target_model():
    """
    Test with the specific target model
    """
    print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration")
    
    # All methods except AWQ should work
    methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
    
    success_count = 0
    for method in methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            success_count += 1
        except Exception as e:
            print(f"Method {method} failed: {e}")
    
    print(f"βœ“ {success_count}/{len(methods)} methods work for target model")
    return success_count == len(methods)

if __name__ == "__main__":
    print("Final verification after fixing AWQ incompatibility issue\n")
    
    test1 = test_qwen2_5_vl_compatible_methods()
    test2 = test_awq_incompatibility() 
    test3 = test_awq_still_works_for_llama()
    test4 = test_target_model()
    
    print(f"\n{'='*60}")
    if test1 and test2 and test3 and test4:
        print("βœ… ALL TESTS PASSED")
        print("\nSOLUTION SUMMARY:")
        print("β€’ AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility")
        print("β€’ All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL")
        print("β€’ AWQ still works for Llama models as expected")
        print("β€’ Sequential onloading is preserved for memory efficiency")
        print("β€’ Users will get clear error messages when trying incompatible methods")
    else:
        print("❌ SOME TESTS FAILED")
    print(f"{'='*60}")