Spaces:
Running
Running
File size: 5,232 Bytes
d95ff5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
#!/usr/bin/env python
"""
Final test to confirm the original issue is resolved:
GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
"""
from app import get_quantization_recipe
def test_original_issue_fixed():
"""
Test to confirm the original error is fixed.
The original error was:
GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
"""
print("Testing the original issue that was reported...")
print("Original error: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture")
print()
# Test the original problematic case
try:
recipe = get_quantization_recipe("GPTQ", "Qwen2_5_VLForConditionalGeneration")
print("β GPTQ quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
print(f" Recipe: {recipe}")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" Uses sequential onloading: {recipe[0].sequential_targets}")
print(f" Ignores visual components: {recipe[0].ignore}")
success_gptq = True
except Exception as e:
print(f"β GPTQ still fails: {e}")
success_gptq = False
print()
# Test other methods that were also problematic
other_methods = ["AWQ", "FP8"]
success_others = True
for method in other_methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"β {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" Uses sequential onloading: {recipe[0].sequential_targets}")
success_others = success_others and True
except Exception as e:
print(f"β {method} still fails: {e}")
success_others = False
print()
# Test new methods for Qwen2.5-VL
new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
success_new = True
for method in new_methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"β {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
success_new = success_new and True
except Exception as e:
print(f"β {method} fails: {e}")
success_new = False
print()
if success_gptq and success_others and success_new:
print("π SUCCESS: The original issue has been completely resolved!")
print(" - GPTQ now works for Qwen2_5_VLForConditionalGeneration")
print(" - AWQ now works for Qwen2_5_VLForConditionalGeneration")
print(" - FP8 now works for Qwen2_5_VLForConditionalGeneration")
print(" - New methods (W4A16, W8A16, W8A8_INT8, W8A8_FP8) also work!")
print(" - Sequential onloading is used for memory efficiency")
print(" - Visual components are properly ignored during quantization")
return True
else:
print("β FAILURE: Some issues remain")
return False
def test_specific_model():
"""
Test with the specific model mentioned: huihui-ai/Huihui-Fara-7B-abliterated
"""
print("\n" + "="*60)
print("Testing with the specific model: huihui-ai/Huihui-Fara-7B-abliterated")
print("(This model has architecture: Qwen2_5_VLForConditionalGeneration)")
print("="*60)
# All the methods that should now work for this model
methods = ["GPTQ", "AWQ", "FP8", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
success = True
for method in methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"β {method}: OK")
except Exception as e:
print(f"β {method}: FAILED - {e}")
success = False
if success:
print(f"\nπ All {len(methods)} quantization methods now work for the target model!")
print("Users can now quantize huihui-ai/Huihui-Fara-7B-abliterated with any of these methods.")
else:
print("\nβ Some methods still don't work for the target model.")
return success
if __name__ == "__main__":
print("Testing resolution of the original quantization issue...\n")
issue_fixed = test_original_issue_fixed()
model_specific = test_specific_model()
print("\n" + "="*60)
if issue_fixed and model_specific:
print("β
ALL TESTS PASSED - The issue is completely resolved!")
print("\nThe Hugging Face Space now supports:")
print(" β’ All original methods: GPTQ, AWQ, FP8")
print(" β’ New methods: W4A16, W8A16, W8A8_INT8, W8A8_FP8")
print(" β’ Sequential onloading for memory efficiency")
print(" β’ Proper handling of Qwen2.5-VL visual components")
print(" β’ All methods work with Qwen2_5_VLForConditionalGeneration models")
else:
print("β SOME TESTS FAILED - Issue may not be completely resolved")
print("="*60) |