llama : correction of the attn.v.weight quantization for IQ3_XS (gger…

…ganov#6209) IQ3_XS was not mentioned, IQ3_S and IQ3_M were present twice. That PR corrects this in the manner which was probably intended initially.
tybalex · Apr 17, 2024 · 2e1c47b · 2e1c47b
1 parent 2e8752f
commit 2e1c47b
Showing 1 changed file with 1 addition and 7 deletions.
diff --git a/llama.cpp b/llama.cpp
@@ -12027,13 +12027,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
  new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
  }
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
- new_type = GGML_TYPE_Q4_K;
- }
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
- new_type = GGML_TYPE_Q4_K;
- }
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
+ else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && qs.model.hparams.n_gqa() >= 4) {
  new_type = GGML_TYPE_Q4_K;
  }
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {