GGUF parser: support big-endian files (#545)

The important snippet is: ```ts const [littleEndian, version] = (() => { /// ggerganov/llama.cpp#3957 /// Assume this code is always running on little-endian /// but wants to be able to parse both endianness const version = r.view.getUint32(4, true); if (version & 65535) { return [true, version]; } else { return [false, r.view.getUint32(4, false)]; } })(); ``` from ggerganov/llama.cpp#3957 and thanks to @ggerganov [comment](https://github.com/huggingface/huggingface.js/pull/540/files#r1521103912)
huggingface · Mar 13, 2024 · 12d28d9 · 12d28d9
1 parent 8ec3643
commit 12d28d9
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 30 deletions.
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
@@ -6,13 +6,17 @@ const URL_MISTRAL_7B =
  "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf";
 const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf";
 
+const URL_BIG_ENDIAN =
+ "https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16-big-endian.gguf";
+
 describe("gguf", () => {
  it("should parse a llama2 7b", async () => {
  const { metadata, tensorInfos } = await gguf(URL_LLAMA);
 
  /// metadata
 
  expect(metadata).toMatchObject({
+ // partial list, do not exhaustively list (tokenizer is quite big for instance)
  version: 2,
  tensor_count: 291n,
  kv_count: 19n,
@@ -48,6 +52,7 @@ describe("gguf", () => {
  ]);
 
  /// Tensor infos
+ /// By convention we test the first and last tensor.
 
  expect(tensorInfos.length).toEqual(291);
  expect(tensorInfos[0]).toMatchObject({
@@ -136,4 +141,41 @@ describe("gguf", () => {
  dtype: GGMLQuantizationType.F32,
  });
  });
+
+ it("should parse a big-endian file", async () => {
+ const { metadata, tensorInfos } = await gguf(URL_BIG_ENDIAN);
+
+ /// metadata
+
+ expect(metadata).toMatchObject({
+ version: 3,
+ tensor_count: 197n,
+ kv_count: 23n,
+ "general.architecture": "bert",
+ "general.file_type": GGMLQuantizationType.F16,
+ "general.name": "bge-small-en-v1.5",
+ "bert.attention.causal": false,
+ "bert.attention.head_count": 12,
+ "bert.attention.layer_norm_epsilon": 9.999999960041972e-13,
+ "bert.block_count": 12,
+ "bert.context_length": 512,
+ "bert.embedding_length": 384,
+ "bert.feed_forward_length": 1536,
+ "bert.pooling_type": 2,
+ });
+
+ /// Tensor infos
+
+ expect(tensorInfos.length).toEqual(197);
+ expect(tensorInfos[0]).toMatchObject({
+ name: "token_embd_norm.bias",
+ shape: [384n],
+ dtype: GGMLQuantizationType.F32,
+ });
+ expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
+ name: "blk.9.ffn_down.weight",
+ shape: [1536n, 384n],
+ dtype: GGMLQuantizationType.F16,
+ });
+ });
 });
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
@@ -4,6 +4,12 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];
 type Version = 1 | 2 | 3;
 const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
 
+/**
+ * Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
+ * Your executor might do little-endian byte order, so it might be
+ * check for 0x46554747 and letting the endianness cancel out.
+ * Consider being *very* explicit about the byte order here.
+ */
 const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF"
 
 export enum GGMLQuantizationType {
@@ -112,67 +118,72 @@ class RangeView {
  }
 }
 
-function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint {
+/**
+ * Note: A good article about binary data in JS: https://javascript.info/arraybuffer-binary-arrays
+ */
+
+function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): bigint {
  switch (version) {
  case 1: {
- const n = view.getUint32(byteOffset, true);
+ const n = view.getUint32(byteOffset, littleEndian);
  return BigInt(n);
  }
  case 2:
  case 3: {
- return view.getBigUint64(byteOffset, true);
+ return view.getBigUint64(byteOffset, littleEndian);
  }
  }
 }
 
-function readString(view: DataView, offset: number): { value: string; length: number } {
- const length = view.getBigUint64(offset, true);
+function readString(view: DataView, offset: number, littleEndian: boolean): { value: string; length: number } {
+ const length = view.getBigUint64(offset, littleEndian);
  const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
  return { value, length: 8 + Number(length) };
 }
 
 function readMetadataValue(
  view: DataView,
  type: GGUFValueType,
- offset: number
+ offset: number,
+ littleEndian: boolean
 ): { value: MetadataValue; length: number } {
  switch (type) {
  case GGUFValueType.UINT8:
  return { value: view.getUint8(offset), length: 1 };
  case GGUFValueType.INT8:
  return { value: view.getInt8(offset), length: 1 };
  case GGUFValueType.UINT16:
- return { value: view.getUint16(offset, true), length: 2 };
+ return { value: view.getUint16(offset, littleEndian), length: 2 };
  case GGUFValueType.INT16:
- return { value: view.getInt16(offset, true), length: 2 };
+ return { value: view.getInt16(offset, littleEndian), length: 2 };
  case GGUFValueType.UINT32:
- return { value: view.getUint32(offset, true), length: 4 };
+ return { value: view.getUint32(offset, littleEndian), length: 4 };
  case GGUFValueType.INT32:
- return { value: view.getInt32(offset, true), length: 4 };
+ return { value: view.getInt32(offset, littleEndian), length: 4 };
  case GGUFValueType.FLOAT32:
- return { value: view.getFloat32(offset, true), length: 4 };
+ return { value: view.getFloat32(offset, littleEndian), length: 4 };
  case GGUFValueType.BOOL:
  return { value: view.getUint8(offset) !== 0, length: 1 };
  case GGUFValueType.STRING:
- return readString(view, offset);
+ return readString(view, offset, littleEndian);
  case GGUFValueType.ARRAY: {
- const arrayType = view.getUint32(offset, true);
- const arrayLength = view.getBigUint64(offset + 4, true);
+ const arrayType = view.getUint32(offset, littleEndian);
+ const arrayLength = view.getBigUint64(offset + 4, littleEndian);
  let length = 12;
  const arrayValues: MetadataValue[] = [];
  for (let i = 0; i < arrayLength; i++) {
- const { value, length: _length } = readMetadataValue(view, arrayType, offset + length);
+ const { value, length: _length } = readMetadataValue(view, arrayType, offset + length, littleEndian);
  arrayValues.push(value);
  length += _length;
  }
  return { value: arrayValues, length };
  }
  case GGUFValueType.UINT64:
- return { value: view.getBigUint64(offset, true), length: 8 };
+ return { value: view.getBigUint64(offset, littleEndian), length: 8 };
  case GGUFValueType.INT64:
- return { value: view.getBigInt64(offset, true), length: 8 };
+ return { value: view.getBigInt64(offset, littleEndian), length: 8 };
  case GGUFValueType.FLOAT64:
- return { value: view.getFloat64(offset, true), length: 8 };
+ return { value: view.getFloat64(offset, littleEndian), length: 8 };
  }
 }
 
@@ -207,16 +218,36 @@ export async function gguf(
  const r = new RangeView(url, params);
  await r.fetchChunk();
 
- if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) {
+ const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => {
+ for (let i = 0; i < header.length; i++) {
+ if (header[i] !== buffer[i]) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ if (!checkBuffer(new Uint8Array(r.view.buffer.slice(0, 4)), ggufMagicNumber)) {
  throw new Error("not a valid gguf file: not starting with GGUF magic number");
  }
 
- const version = r.view.getUint32(4, true);
+ const [littleEndian, version] = (() => {
+ /// https://github.com/ggerganov/llama.cpp/issues/3957
+ /// Assume this code is always running on little-endian
+ /// but wants to be able to parse both endianness
+ const version = r.view.getUint32(4, true);
+ if (version & 65535) {
+ return [true, version];
+ } else {
+ return [false, r.view.getUint32(4, false)];
+ }
+ })();
+
  if (!isVersion(version)) {
  throw new Error(`not a valid gguf file: unsupported version "${version}"`);
  }
- const tensorCount = readVersionedSize(r.view, 8, version);
- const numKv = readVersionedSize(r.view, 16, version);
+ const tensorCount = readVersionedSize(r.view, 8, version, littleEndian);
+ const numKv = readVersionedSize(r.view, 16, version, littleEndian);
 
  const metadata: GGUFMetadata = {
  version,
@@ -230,11 +261,11 @@ export async function gguf(
  await r.fetchChunkIfNeeded(offset);
 
  // read key
- const keyResult = readString(r.view, offset);
+ const keyResult = readString(r.view, offset, littleEndian);
  offset += keyResult.length;
 
  // read value type
- const valueType = r.view.getUint32(offset, true);
+ const valueType = r.view.getUint32(offset, littleEndian);
  offset += 4;
 
  if (!isGGUFValueType(valueType)) {
@@ -245,7 +276,7 @@ export async function gguf(
  while (!valueResult) {
  try {
  // read value
- valueResult = readMetadataValue(r.view, valueType, offset);
+ valueResult = readMetadataValue(r.view, valueType, offset, littleEndian);
  } catch (err) {
  if (err instanceof RangeError) {
  await r.fetchChunk();
@@ -264,21 +295,21 @@ export async function gguf(
  await r.fetchChunkIfNeeded(offset);
 
  // read tensor name
- const keyResult = readString(r.view, offset);
+ const keyResult = readString(r.view, offset, littleEndian);
  offset += keyResult.length;
 
- const nDims = r.view.getUint32(offset, true);
+ const nDims = r.view.getUint32(offset, littleEndian);
  offset += 4;
 
  const shape: bigint[] = [];
  for (let dim = 0; dim < nDims; dim++) {
- shape.push(r.view.getBigUint64(offset, true));
+ shape.push(r.view.getBigUint64(offset, littleEndian));
  offset += 8;
  }
 
- const type = r.view.getUint32(offset, true);
+ const type = r.view.getUint32(offset, littleEndian);
  offset += 4;
- const tensorOffset = r.view.getBigUint64(offset, true);
+ const tensorOffset = r.view.getBigUint64(offset, littleEndian);
  offset += 8;
 
  tensorInfos.push({