Skip to content

Commit

Permalink
GGUF parser: support big-endian files (#545)
Browse files Browse the repository at this point in the history
The important snippet is:

```ts
const [littleEndian, version] = (() => {
	/// ggerganov/llama.cpp#3957
	/// Assume this code is always running on little-endian
	/// but wants to be able to parse both endianness
	const version = r.view.getUint32(4, true);
	if (version & 65535) {
		return [true, version];
	} else {
		return [false, r.view.getUint32(4, false)];
	}
})();
```

from ggerganov/llama.cpp#3957 and thanks to
@ggerganov
[comment](https://github.com/huggingface/huggingface.js/pull/540/files#r1521103912)
  • Loading branch information
julien-c committed Mar 13, 2024
1 parent 8ec3643 commit 12d28d9
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 30 deletions.
42 changes: 42 additions & 0 deletions packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@ const URL_MISTRAL_7B =
"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf";
const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf";

const URL_BIG_ENDIAN =
"https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16-big-endian.gguf";

describe("gguf", () => {
it("should parse a llama2 7b", async () => {
const { metadata, tensorInfos } = await gguf(URL_LLAMA);

/// metadata

expect(metadata).toMatchObject({
// partial list, do not exhaustively list (tokenizer is quite big for instance)
version: 2,
tensor_count: 291n,
kv_count: 19n,
Expand Down Expand Up @@ -48,6 +52,7 @@ describe("gguf", () => {
]);

/// Tensor infos
/// By convention we test the first and last tensor.

expect(tensorInfos.length).toEqual(291);
expect(tensorInfos[0]).toMatchObject({
Expand Down Expand Up @@ -136,4 +141,41 @@ describe("gguf", () => {
dtype: GGMLQuantizationType.F32,
});
});

it("should parse a big-endian file", async () => {
const { metadata, tensorInfos } = await gguf(URL_BIG_ENDIAN);

/// metadata

expect(metadata).toMatchObject({
version: 3,
tensor_count: 197n,
kv_count: 23n,
"general.architecture": "bert",
"general.file_type": GGMLQuantizationType.F16,
"general.name": "bge-small-en-v1.5",
"bert.attention.causal": false,
"bert.attention.head_count": 12,
"bert.attention.layer_norm_epsilon": 9.999999960041972e-13,
"bert.block_count": 12,
"bert.context_length": 512,
"bert.embedding_length": 384,
"bert.feed_forward_length": 1536,
"bert.pooling_type": 2,
});

/// Tensor infos

expect(tensorInfos.length).toEqual(197);
expect(tensorInfos[0]).toMatchObject({
name: "token_embd_norm.bias",
shape: [384n],
dtype: GGMLQuantizationType.F32,
});
expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
name: "blk.9.ffn_down.weight",
shape: [1536n, 384n],
dtype: GGMLQuantizationType.F16,
});
});
});
91 changes: 61 additions & 30 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];
type Version = 1 | 2 | 3;
const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;

/**
* Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
* Your executor might do little-endian byte order, so it might be
* check for 0x46554747 and letting the endianness cancel out.
* Consider being *very* explicit about the byte order here.
*/
const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF"

export enum GGMLQuantizationType {
Expand Down Expand Up @@ -112,67 +118,72 @@ class RangeView {
}
}

function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint {
/**
* Note: A good article about binary data in JS: https://javascript.info/arraybuffer-binary-arrays
*/

function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): bigint {
switch (version) {
case 1: {
const n = view.getUint32(byteOffset, true);
const n = view.getUint32(byteOffset, littleEndian);
return BigInt(n);
}
case 2:
case 3: {
return view.getBigUint64(byteOffset, true);
return view.getBigUint64(byteOffset, littleEndian);
}
}
}

function readString(view: DataView, offset: number): { value: string; length: number } {
const length = view.getBigUint64(offset, true);
function readString(view: DataView, offset: number, littleEndian: boolean): { value: string; length: number } {
const length = view.getBigUint64(offset, littleEndian);
const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
return { value, length: 8 + Number(length) };
}

function readMetadataValue(
view: DataView,
type: GGUFValueType,
offset: number
offset: number,
littleEndian: boolean
): { value: MetadataValue; length: number } {
switch (type) {
case GGUFValueType.UINT8:
return { value: view.getUint8(offset), length: 1 };
case GGUFValueType.INT8:
return { value: view.getInt8(offset), length: 1 };
case GGUFValueType.UINT16:
return { value: view.getUint16(offset, true), length: 2 };
return { value: view.getUint16(offset, littleEndian), length: 2 };
case GGUFValueType.INT16:
return { value: view.getInt16(offset, true), length: 2 };
return { value: view.getInt16(offset, littleEndian), length: 2 };
case GGUFValueType.UINT32:
return { value: view.getUint32(offset, true), length: 4 };
return { value: view.getUint32(offset, littleEndian), length: 4 };
case GGUFValueType.INT32:
return { value: view.getInt32(offset, true), length: 4 };
return { value: view.getInt32(offset, littleEndian), length: 4 };
case GGUFValueType.FLOAT32:
return { value: view.getFloat32(offset, true), length: 4 };
return { value: view.getFloat32(offset, littleEndian), length: 4 };
case GGUFValueType.BOOL:
return { value: view.getUint8(offset) !== 0, length: 1 };
case GGUFValueType.STRING:
return readString(view, offset);
return readString(view, offset, littleEndian);
case GGUFValueType.ARRAY: {
const arrayType = view.getUint32(offset, true);
const arrayLength = view.getBigUint64(offset + 4, true);
const arrayType = view.getUint32(offset, littleEndian);
const arrayLength = view.getBigUint64(offset + 4, littleEndian);
let length = 12;
const arrayValues: MetadataValue[] = [];
for (let i = 0; i < arrayLength; i++) {
const { value, length: _length } = readMetadataValue(view, arrayType, offset + length);
const { value, length: _length } = readMetadataValue(view, arrayType, offset + length, littleEndian);
arrayValues.push(value);
length += _length;
}
return { value: arrayValues, length };
}
case GGUFValueType.UINT64:
return { value: view.getBigUint64(offset, true), length: 8 };
return { value: view.getBigUint64(offset, littleEndian), length: 8 };
case GGUFValueType.INT64:
return { value: view.getBigInt64(offset, true), length: 8 };
return { value: view.getBigInt64(offset, littleEndian), length: 8 };
case GGUFValueType.FLOAT64:
return { value: view.getFloat64(offset, true), length: 8 };
return { value: view.getFloat64(offset, littleEndian), length: 8 };
}
}

Expand Down Expand Up @@ -207,16 +218,36 @@ export async function gguf(
const r = new RangeView(url, params);
await r.fetchChunk();

if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) {
const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => {
for (let i = 0; i < header.length; i++) {
if (header[i] !== buffer[i]) {
return false;
}
}
return true;
};

if (!checkBuffer(new Uint8Array(r.view.buffer.slice(0, 4)), ggufMagicNumber)) {
throw new Error("not a valid gguf file: not starting with GGUF magic number");
}

const version = r.view.getUint32(4, true);
const [littleEndian, version] = (() => {
/// https://github.com/ggerganov/llama.cpp/issues/3957
/// Assume this code is always running on little-endian
/// but wants to be able to parse both endianness
const version = r.view.getUint32(4, true);
if (version & 65535) {
return [true, version];
} else {
return [false, r.view.getUint32(4, false)];
}
})();

if (!isVersion(version)) {
throw new Error(`not a valid gguf file: unsupported version "${version}"`);
}
const tensorCount = readVersionedSize(r.view, 8, version);
const numKv = readVersionedSize(r.view, 16, version);
const tensorCount = readVersionedSize(r.view, 8, version, littleEndian);
const numKv = readVersionedSize(r.view, 16, version, littleEndian);

const metadata: GGUFMetadata = {
version,
Expand All @@ -230,11 +261,11 @@ export async function gguf(
await r.fetchChunkIfNeeded(offset);

// read key
const keyResult = readString(r.view, offset);
const keyResult = readString(r.view, offset, littleEndian);
offset += keyResult.length;

// read value type
const valueType = r.view.getUint32(offset, true);
const valueType = r.view.getUint32(offset, littleEndian);
offset += 4;

if (!isGGUFValueType(valueType)) {
Expand All @@ -245,7 +276,7 @@ export async function gguf(
while (!valueResult) {
try {
// read value
valueResult = readMetadataValue(r.view, valueType, offset);
valueResult = readMetadataValue(r.view, valueType, offset, littleEndian);
} catch (err) {
if (err instanceof RangeError) {
await r.fetchChunk();
Expand All @@ -264,21 +295,21 @@ export async function gguf(
await r.fetchChunkIfNeeded(offset);

// read tensor name
const keyResult = readString(r.view, offset);
const keyResult = readString(r.view, offset, littleEndian);
offset += keyResult.length;

const nDims = r.view.getUint32(offset, true);
const nDims = r.view.getUint32(offset, littleEndian);
offset += 4;

const shape: bigint[] = [];
for (let dim = 0; dim < nDims; dim++) {
shape.push(r.view.getBigUint64(offset, true));
shape.push(r.view.getBigUint64(offset, littleEndian));
offset += 8;
}

const type = r.view.getUint32(offset, true);
const type = r.view.getUint32(offset, littleEndian);
offset += 4;
const tensorOffset = r.view.getBigUint64(offset, true);
const tensorOffset = r.view.getBigUint64(offset, littleEndian);
offset += 8;

tensorInfos.push({
Expand Down

0 comments on commit 12d28d9

Please sign in to comment.