Spaces:

n00b001
/

llm-compressor-my-repo

Running

File size: 3,889 Bytes

d95ff5b

#!/usr/bin/env python
"""
Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models
"""

from app import get_quantization_recipe

def test_qwen2_5_vl_compatible_methods():
    """
    Test all methods that should work with Qwen2.5-VL models
    """
    print("Testing quantization methods compatible with Qwen2.5-VL models...")
    
    # Methods that should work
    compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
    
    all_passed = True
    for method in compatible_methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            print(f"✓ {method} works with Qwen2_5_VLForConditionalGeneration")
            if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
                print(f"  - Uses sequential onloading: {recipe[0].sequential_targets}")
            print(f"  - Ignore patterns: {recipe[0].ignore}")
        except Exception as e:
            print(f"✗ {method} failed: {e}")
            all_passed = False
    
    return all_passed

def test_awq_incompatibility():
    """
    Test that AWQ properly fails for Qwen2.5-VL models
    """
    print("\nTesting AWQ incompatibility with Qwen2.5-VL models...")
    
    try:
        recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration")
        print("✗ AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)")
        return False
    except ValueError as e:
        if "not compatible" in str(e) and "rotary positional embeddings" in str(e):
            print(f"✓ AWQ properly fails for Qwen2.5-VL: {e}")
            return True
        else:
            print(f"✗ AWQ failed but with wrong error: {e}")
            return False

def test_awq_still_works_for_llama():
    """
    Test that AWQ still works for Llama models
    """
    print("\nTesting AWQ still works for Llama models...")
    
    try:
        recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM")
        print(f"✓ AWQ still works for LlamaForCausalLM")
        print(f"  - Ignore patterns: {recipe[0].ignore}")
        return True
    except Exception as e:
        print(f"✗ AWQ failed for LlamaForCausalLM: {e}")
        return False

def test_target_model():
    """
    Test with the specific target model
    """
    print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration")
    
    # All methods except AWQ should work
    methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
    
    success_count = 0
    for method in methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            success_count += 1
        except Exception as e:
            print(f"Method {method} failed: {e}")
    
    print(f"✓ {success_count}/{len(methods)} methods work for target model")
    return success_count == len(methods)

if __name__ == "__main__":
    print("Final verification after fixing AWQ incompatibility issue\n")
    
    test1 = test_qwen2_5_vl_compatible_methods()
    test2 = test_awq_incompatibility() 
    test3 = test_awq_still_works_for_llama()
    test4 = test_target_model()
    
    print(f"\n{'='*60}")
    if test1 and test2 and test3 and test4:
        print("✅ ALL TESTS PASSED")
        print("\nSOLUTION SUMMARY:")
        print("• AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility")
        print("• All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL")
        print("• AWQ still works for Llama models as expected")
        print("• Sequential onloading is preserved for memory efficiency")
        print("• Users will get clear error messages when trying incompatible methods")
    else:
        print("❌ SOME TESTS FAILED")
    print(f"{'='*60}")