Spaces:

n00b001
/

llm-compressor-my-repo

Running

File size: 5,232 Bytes

d95ff5b

#!/usr/bin/env python
"""
Final test to confirm the original issue is resolved:
GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
"""

from app import get_quantization_recipe

def test_original_issue_fixed():
    """
    Test to confirm the original error is fixed.
    The original error was:
    GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
    """
    print("Testing the original issue that was reported...")
    print("Original error: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture")
    print()
    
    # Test the original problematic case
    try:
        recipe = get_quantization_recipe("GPTQ", "Qwen2_5_VLForConditionalGeneration")
        print("✓ GPTQ quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
        print(f"  Recipe: {recipe}")
        if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
            print(f"  Uses sequential onloading: {recipe[0].sequential_targets}")
        print(f"  Ignores visual components: {recipe[0].ignore}")
        success_gptq = True
    except Exception as e:
        print(f"✗ GPTQ still fails: {e}")
        success_gptq = False
    
    print()
    
    # Test other methods that were also problematic
    other_methods = ["AWQ", "FP8"]
    success_others = True
    for method in other_methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            print(f"✓ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
            if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
                print(f"  Uses sequential onloading: {recipe[0].sequential_targets}")
            success_others = success_others and True
        except Exception as e:
            print(f"✗ {method} still fails: {e}")
            success_others = False
    
    print()
    
    # Test new methods for Qwen2.5-VL
    new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
    success_new = True
    for method in new_methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            print(f"✓ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
            success_new = success_new and True
        except Exception as e:
            print(f"✗ {method} fails: {e}")
            success_new = False
    
    print()
    
    if success_gptq and success_others and success_new:
        print("🎉 SUCCESS: The original issue has been completely resolved!")
        print("   - GPTQ now works for Qwen2_5_VLForConditionalGeneration")
        print("   - AWQ now works for Qwen2_5_VLForConditionalGeneration") 
        print("   - FP8 now works for Qwen2_5_VLForConditionalGeneration")
        print("   - New methods (W4A16, W8A16, W8A8_INT8, W8A8_FP8) also work!")
        print("   - Sequential onloading is used for memory efficiency")
        print("   - Visual components are properly ignored during quantization")
        return True
    else:
        print("❌ FAILURE: Some issues remain")
        return False

def test_specific_model():
    """
    Test with the specific model mentioned: huihui-ai/Huihui-Fara-7B-abliterated
    """
    print("\n" + "="*60)
    print("Testing with the specific model: huihui-ai/Huihui-Fara-7B-abliterated")
    print("(This model has architecture: Qwen2_5_VLForConditionalGeneration)")
    print("="*60)
    
    # All the methods that should now work for this model
    methods = ["GPTQ", "AWQ", "FP8", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
    
    success = True
    for method in methods:
        try:
            recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
            print(f"✓ {method}: OK")
        except Exception as e:
            print(f"✗ {method}: FAILED - {e}")
            success = False
    
    if success:
        print(f"\n🎉 All {len(methods)} quantization methods now work for the target model!")
        print("Users can now quantize huihui-ai/Huihui-Fara-7B-abliterated with any of these methods.")
    else:
        print("\n❌ Some methods still don't work for the target model.")
        
    return success

if __name__ == "__main__":
    print("Testing resolution of the original quantization issue...\n")
    
    issue_fixed = test_original_issue_fixed()
    model_specific = test_specific_model()
    
    print("\n" + "="*60)
    if issue_fixed and model_specific:
        print("✅ ALL TESTS PASSED - The issue is completely resolved!")
        print("\nThe Hugging Face Space now supports:")
        print("  • All original methods: GPTQ, AWQ, FP8")
        print("  • New methods: W4A16, W8A16, W8A8_INT8, W8A8_FP8")
        print("  • Sequential onloading for memory efficiency")
        print("  • Proper handling of Qwen2.5-VL visual components")
        print("  • All methods work with Qwen2_5_VLForConditionalGeneration models")
    else:
        print("❌ SOME TESTS FAILED - Issue may not be completely resolved")
    print("="*60)