llama : fix integer overflow during quantization (ggerganov#6063)

hodlen · Apr 1, 2024 · a5e1600 · a5e1600
1 parent 0e9969a
commit a5e1600
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/llama.cpp b/llama.cpp
@@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
  return new_type;
 }
 
-static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
+static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
  std::mutex mutex;
  int counter = 0;
  size_t new_size = 0;