gguf-py : add support for I8, I16 and I32 (#6045)

* Refactor dtype handling to be extensible This code is equivalent as before, but now it is prepared to easily add more NumPy dtypes. * Add support for I8, I16 and I32 These types are allowed in the GGUF specification. * Add support for I8, I16 and I32 to gguf_writer * Add support for I8, I16, I32 to gguf_reader
ggerganov · Mar 14, 2024 · 3ca2348 · 3ca2348
1 parent 3fe8d7a
commit 3ca2348
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 4 deletions.
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -661,6 +661,9 @@ class GGMLQuantizationType(IntEnum):
  IQ3_S = 21
  IQ2_S = 22
  IQ4_XS = 23
+ I8 = 24
+ I16 = 25
+ I32 = 26
 
 
 class GGUFEndian(IntEnum):
@@ -727,6 +730,9 @@ def get_type(val: Any) -> GGUFValueType:
  GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
  GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
  GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
+ GGMLQuantizationType.I8: (1, 1),
+ GGMLQuantizationType.I16: (1, 2),
+ GGMLQuantizationType.I32: (1, 4),
 }
 
 

diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
@@ -248,6 +248,15 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
  elif ggml_type == GGMLQuantizationType.F16:
  item_count = n_elems
  item_type = np.float16
+ elif ggml_type == GGMLQuantizationType.I8:
+ item_count = n_elems
+ item_type = np.int8
+ elif ggml_type == GGMLQuantizationType.I16:
+ item_count = n_elems
+ item_type = np.int16
+ elif ggml_type == GGMLQuantizationType.I32:
+ item_count = n_elems
+ item_type = np.int32
  else:
  item_count = n_bytes
  item_type = np.uint8

diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
@@ -196,9 +196,6 @@ def add_tensor_info(
  if self.state is not WriterState.EMPTY:
  raise ValueError(f'Expected output file to be empty, got {self.state}')
 
- if raw_dtype is None and tensor_dtype not in (np.float32, np.float16):
- raise ValueError("Only F32 and F16 tensors are supported for now")
-
  encoded_name = name.encode("utf8")
  self.ti_data += self._pack("Q", len(encoded_name))
  self.ti_data += encoded_name
@@ -207,7 +204,18 @@ def add_tensor_info(
  for i in range(n_dims):
  self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
  if raw_dtype is None:
- dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
+ if tensor_shape == np.float32:
+ dtype = GGMLQuantizationType.F32
+ elif tensor_dtype == np.float16:
+ dtype = GGMLQuantizationType.F16
+ elif tensor_dtype == np.int8:
+ dtype = GGMLQuantizationType.I8
+ elif tensor_dtype == np.int16:
+ dtype = GGMLQuantizationType.I16
+ elif tensor_dtype == np.int32:
+ dtype = GGMLQuantizationType.I32
+ else:
+ raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
  else:
  dtype = raw_dtype
  self.ti_data += self._pack("I", dtype)