Q8_K_XL notes
llama-quantize --imatrix <imatrix_unsloth.dat> --tensor-type output.weight=Q8_0 --tensor-type token_embd.weight=Q8_0 --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).attn_k.weight=Q8_0" --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).attn_output.weight=Q8_0" --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).attn_q.weight=Q8_0" --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).attn_v.weight=Q8_0" --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).ffn_down.weight=Q8_0" --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).ffn_gate.weight=Q8_0" --tensor-type "blk.(0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39).ffn_up.weight=Q8_0" <input.gguf> <output.gguf> Q8_0
llama-quantize --tensor-type output.weight=F16 --tensor-type token_embd.weight=F16 --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).attn_k.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).attn_k.weight=F16" --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).attn_output.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).attn_output.weight=F16" --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).attn_q.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).attn_q.weight=F16" --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).attn_v.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).attn_v.weight=F16" --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).ffn_down.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).ffn_down.weight=F16" --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).ffn_gate.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).ffn_gate.weight=F16" --tensor-type "blk.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37).ffn_up.weight=Q8_0" --tensor-type "blk.(0|1|2|38|39).ffn_up.weight=F16" <input.gguf> <output.gguf> Q8_0