From fba88a40bcc3e53f06c4fd4506d2963d15404da1 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 15:02:09 +0100
Subject: [PATCH 01/14] scaffolding from CONTRIBUTING.md

---
 .github/workflows/gguf-publish.yml | 63 ++++++++++++++++++++++++++++++
 packages/gguf/.prettierignore      |  4 ++
 packages/gguf/README.md            | 10 +++++
 packages/gguf/package.json         | 51 ++++++++++++++++++++++++
 packages/gguf/pnpm-lock.yaml       |  1 +
 packages/gguf/src/gguf.spec.ts     |  0
 packages/gguf/src/gguf.ts          |  0
 packages/gguf/src/index.ts         |  0
 packages/gguf/tsconfig.json        | 18 +++++++++
 packages/gguf/tsup.config.ts       | 26 ++++++++++++
 pnpm-workspace.yaml                |  1 +
 11 files changed, 174 insertions(+)
 create mode 100644 .github/workflows/gguf-publish.yml
 create mode 100644 packages/gguf/.prettierignore
 create mode 100644 packages/gguf/README.md
 create mode 100644 packages/gguf/package.json
 create mode 100644 packages/gguf/pnpm-lock.yaml
 create mode 100644 packages/gguf/src/gguf.spec.ts
 create mode 100644 packages/gguf/src/gguf.ts
 create mode 100644 packages/gguf/src/index.ts
 create mode 100644 packages/gguf/tsconfig.json
 create mode 100644 packages/gguf/tsup.config.ts

diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
new file mode 100644
index 000000000..7091e0dcc
--- /dev/null
+++ b/.github/workflows/gguf-publish.yml
@@ -0,0 +1,63 @@
+name: GGUF - Version and Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      newversion:
+        type: choice
+        description: "Semantic Version Bump Type"
+        default: patch
+        options:
+          - patch
+          - minor
+          - major
+
+concurrency:
+  group: "push-to-main"
+
+defaults:
+  run:
+    working-directory: packages/gguf
+
+jobs:
+  version_and_release:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          # Needed to push the tag and the commit on the main branch, otherwise we get:
+          # > Run git push --follow-tags
+          # remote: error: GH006: Protected branch update failed for refs/heads/main.
+          # remote: error: Changes must be made through a pull request. Required status check "lint" is expected.
+          token: ${{ secrets.BOT_ACCESS_TOKEN }}
+      - run: corepack enable
+      - uses: actions/setup-node@v3
+        with:
+          node-version: "18"
+          cache: "pnpm"
+          cache-dependency-path: |
+            packages/gguf/pnpm-lock.yaml
+          # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token.  REQUIRED
+          registry-url: "https://registry.npmjs.org"
+      - run: pnpm install
+      - run: git config --global user.name machineuser
+      - run: git config --global user.email infra+machineuser@huggingface.co
+      - run: |
+          PACKAGE_VERSION=$(node -p "require('./package.json').version")
+          BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')")
+          # Update package.json with the new version
+          node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
+          git commit . -m "🔖 @hugginface/gguf $BUMPED_VERSION"
+          git tag "gguf-v$BUMPED_VERSION"
+      - run: pnpm publish --no-git-checks .
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+      - run: git pull --rebase && git push --follow-tags
+      # hack - reuse actions/setup-node@v3 just to set a new registry
+      - uses: actions/setup-node@v3
+        with:
+          node-version: "18"
+          registry-url: "https://npm.pkg.github.com"
+      - run: pnpm publish --no-git-checks .
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/packages/gguf/.prettierignore b/packages/gguf/.prettierignore
new file mode 100644
index 000000000..cac0c6949
--- /dev/null
+++ b/packages/gguf/.prettierignore
@@ -0,0 +1,4 @@
+pnpm-lock.yaml
+# In order to avoid code samples to have tabs, they don't display well on npm
+README.md
+dist
\ No newline at end of file
diff --git a/packages/gguf/README.md b/packages/gguf/README.md
new file mode 100644
index 000000000..0ca108483
--- /dev/null
+++ b/packages/gguf/README.md
@@ -0,0 +1,10 @@
+# `@huggingface/gguf`
+
+A GGUF parser that works on remotely hosted files.
+
+## Spec
+
+https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
+
+## Acknowledgements
+
diff --git a/packages/gguf/package.json b/packages/gguf/package.json
new file mode 100644
index 000000000..d82b2c6a1
--- /dev/null
+++ b/packages/gguf/package.json
@@ -0,0 +1,51 @@
+{
+	"name": "@huggingface/gguf",
+	"packageManager": "pnpm@8.10.5",
+	"version": "0.0.1",
+	"description": "a GGUF parser that works on remotely hosted files",
+	"repository": "https://github.com/huggingface/huggingface.js.git",
+	"publishConfig": {
+		"access": "public"
+	},
+	"main": "./dist/index.js",
+	"module": "./dist/index.mjs",
+	"types": "./dist/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"require": "./dist/index.js",
+			"import": "./dist/index.mjs"
+		}
+	},
+	"browser": {
+		"./dist/index.js": "./dist/browser/index.js",
+		"./dist/index.mjs": "./dist/browser/index.mjs"
+	},
+	"engines": {
+		"node": ">=18"
+	},
+	"source": "index.ts",
+	"scripts": {
+		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
+		"lint:check": "eslint --ext .cjs,.ts .",
+		"format": "prettier --write .",
+		"format:check": "prettier --check .",
+		"prepublishOnly": "pnpm run build",
+		"build": "tsup src/index.ts --format cjs,esm --clean --dts",
+		"test": "vitest run",
+		"check": "tsc"
+	},
+	"files": [
+		"dist",
+		"src",
+		"tsconfig.json"
+	],
+	"keywords": [
+		"huggingface",
+		"hub",
+		"gguf"
+	],
+	"author": "Hugging Face",
+	"license": "MIT",
+	"devDependencies": {}
+}
diff --git a/packages/gguf/pnpm-lock.yaml b/packages/gguf/pnpm-lock.yaml
new file mode 100644
index 000000000..7a06cc796
--- /dev/null
+++ b/packages/gguf/pnpm-lock.yaml
@@ -0,0 +1 @@
+lockfileVersion: '6.0'
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/gguf/src/index.ts b/packages/gguf/src/index.ts
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/gguf/tsconfig.json b/packages/gguf/tsconfig.json
new file mode 100644
index 000000000..37823efde
--- /dev/null
+++ b/packages/gguf/tsconfig.json
@@ -0,0 +1,18 @@
+{
+	"compilerOptions": {
+		"allowSyntheticDefaultImports": true,
+		"lib": ["ES2022", "DOM"],
+		"module": "CommonJS",
+		"moduleResolution": "node",
+		"target": "ES2022",
+		"forceConsistentCasingInFileNames": true,
+		"strict": true,
+		"noImplicitAny": true,
+		"strictNullChecks": true,
+		"skipLibCheck": true,
+		"noImplicitOverride": true,
+		"outDir": "./dist"
+	},
+	"include": ["src"],
+	"exclude": ["dist"]
+}
diff --git a/packages/gguf/tsup.config.ts b/packages/gguf/tsup.config.ts
new file mode 100644
index 000000000..6203927ca
--- /dev/null
+++ b/packages/gguf/tsup.config.ts
@@ -0,0 +1,26 @@
+import type { Options } from "tsup";
+
+const baseConfig: Options = {
+	entry: ["./index.ts"],
+	format: ["cjs", "esm"],
+	outDir: "dist",
+	clean: true,
+	dts: {
+		resolve: true,
+	},
+};
+
+const nodeConfig: Options = {
+	...baseConfig,
+	platform: "node",
+};
+
+const browserConfig: Options = {
+	...baseConfig,
+	platform: "browser",
+	target: "es2018",
+	splitting: true,
+	outDir: "dist/browser",
+};
+
+export default [nodeConfig, browserConfig];
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index c28561b18..fe48f2587 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -6,5 +6,6 @@ packages:
   - "packages/agents"
   - "packages/languages"
   - "packages/tasks"
+  - "packages/gguf"
   - "packages/jinja"
   - "packages/widgets"

From e624cb046cc8eac42641ef31921ac5f8e561600c Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 19:05:36 +0100
Subject: [PATCH 02/14] initial import

---
 packages/gguf/README.md        |   2 +
 packages/gguf/src/gguf.spec.ts | 139 +++++++++++++++++++++
 packages/gguf/src/gguf.ts      | 221 +++++++++++++++++++++++++++++++++
 packages/gguf/src/index.ts     |   1 +
 4 files changed, 363 insertions(+)

diff --git a/packages/gguf/README.md b/packages/gguf/README.md
index 0ca108483..cf27353e6 100644
--- a/packages/gguf/README.md
+++ b/packages/gguf/README.md
@@ -6,5 +6,7 @@ A GGUF parser that works on remotely hosted files.
 
 https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
 
+Reference implementation (Python): https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/gguf_reader.py
+
 ## Acknowledgements
 
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
index e69de29bb..6c19c4a34 100644
--- a/packages/gguf/src/gguf.spec.ts
+++ b/packages/gguf/src/gguf.spec.ts
@@ -0,0 +1,139 @@
+import { describe, expect, it } from "vitest";
+import { GGMLQuantizationType, gguf } from "./gguf";
+
+const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf";
+const URL_MISTRAL_7B =
+	"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf";
+const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf";
+
+describe("gguf", () => {
+	it("should parse a llama2 7b", async () => {
+		const { metadata, tensorInfos } = await gguf(URL_LLAMA);
+
+		/// metadata
+
+		expect(metadata).toMatchObject({
+			version: 2,
+			tensor_count: 291n,
+			kv_count: 19n,
+			"general.architecture": "llama",
+			"general.file_type": 10,
+			"general.name": "LLaMA v2",
+			"general.quantization_version": 2,
+			"llama.attention.head_count": 32,
+			"llama.attention.head_count_kv": 32,
+			"llama.attention.layer_norm_rms_epsilon": 9.999999974752427e-7,
+			"llama.block_count": 32,
+			"llama.context_length": 4096,
+			"llama.embedding_length": 4096,
+			"llama.feed_forward_length": 11008,
+			"llama.rope.dimension_count": 128,
+		});
+
+		const tokens = metadata["tokenizer.ggml.tokens"];
+		if (!Array.isArray(tokens)) {
+			throw new Error();
+		}
+		expect(tokens.slice(0, 10)).toEqual([
+			"<unk>",
+			"<s>",
+			"</s>",
+			"<0x00>",
+			"<0x01>",
+			"<0x02>",
+			"<0x03>",
+			"<0x04>",
+			"<0x05>",
+			"<0x06>",
+		]);
+
+		/// Tensor infos
+
+		expect(tensorInfos.length).toEqual(291);
+		expect(tensorInfos[0]).toMatchObject({
+			name: "token_embd.weight",
+			shape: [4096n, 32000n],
+			type: GGMLQuantizationType.Q2_K,
+		});
+		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
+			name: "output_norm.weight",
+			shape: [4096n],
+			type: GGMLQuantizationType.F32,
+		});
+	});
+
+	it("should parse a mistral 7b", async () => {
+		const { metadata, tensorInfos } = await gguf(URL_MISTRAL_7B);
+
+		/// metadata
+
+		expect(metadata).toMatchObject({
+			version: 3,
+			tensor_count: 291n,
+			kv_count: 24n,
+			"general.architecture": "llama",
+			"general.file_type": 17,
+			"general.name": "mistralai_mistral-7b-instruct-v0.2",
+			"general.quantization_version": 2,
+			"llama.attention.head_count": 32,
+			"llama.attention.head_count_kv": 8,
+			"llama.attention.layer_norm_rms_epsilon": 0.000009999999747378752,
+			"llama.block_count": 32,
+			"llama.context_length": 32768,
+			"llama.embedding_length": 4096,
+			"llama.feed_forward_length": 14336,
+			"llama.rope.dimension_count": 128,
+		});
+
+		/// Tensor infos
+
+		expect(tensorInfos.length).toEqual(291);
+		expect(tensorInfos[0]).toMatchObject({
+			name: "token_embd.weight",
+			shape: [4096n, 32000n],
+			type: GGMLQuantizationType.Q5_K,
+		});
+		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
+			name: "output.weight",
+			shape: [4096n, 32000n],
+			type: GGMLQuantizationType.Q6_K,
+		});
+	});
+
+	it("should parse a gemma 2b", async () => {
+		const { metadata, tensorInfos } = await gguf(URL_GEMMA_2B);
+
+		/// metadata
+
+		expect(metadata).toMatchObject({
+			version: 3,
+			tensor_count: 164n,
+			kv_count: 21n,
+			"general.architecture": "gemma",
+			"general.file_type": GGMLQuantizationType.Q8_K, // 15
+			"general.name": "gemma-2b-it",
+			"general.quantization_version": 2,
+			"gemma.attention.head_count": 8,
+			"gemma.attention.head_count_kv": 1,
+			"gemma.attention.layer_norm_rms_epsilon": 9.999999974752427e-7,
+			"gemma.block_count": 18,
+			"gemma.context_length": 8192,
+			"gemma.embedding_length": 2048,
+			"gemma.feed_forward_length": 16384,
+		});
+
+		/// Tensor infos
+
+		expect(tensorInfos.length).toEqual(164);
+		expect(tensorInfos[0]).toMatchObject({
+			name: "token_embd.weight",
+			shape: [2048n, 256128n],
+			type: GGMLQuantizationType.Q4_K,
+		});
+		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
+			name: "blk.9.ffn_norm.weight",
+			shape: [2048n],
+			type: GGMLQuantizationType.F32,
+		});
+	});
+});
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index e69de29bb..3b468610e 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -0,0 +1,221 @@
+type MetadataBaseValue = string | number | bigint | boolean;
+type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
+
+type Version = 1 | 2 | 3;
+const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
+
+const ggufMagicNumber = [0x47, 0x47, 0x55, 0x46]; /// "GGUF"
+
+export enum GGMLQuantizationType {
+	F32 = 0,
+	F16 = 1,
+	Q4_0 = 2,
+	Q4_1 = 3,
+	Q5_0 = 6,
+	Q5_1 = 7,
+	Q8_0 = 8,
+	Q8_1 = 9,
+	Q2_K = 10,
+	Q3_K = 11,
+	Q4_K = 12,
+	Q5_K = 13,
+	Q6_K = 14,
+	Q8_K = 15,
+	IQ2_XXS = 16,
+	IQ2_XS = 17,
+	IQ3_XXS = 18,
+	IQ1_S = 19,
+	IQ4_NL = 20,
+	IQ3_S = 21,
+	IQ2_S = 22,
+	IQ4_XS = 23,
+}
+
+enum GGUFValueType {
+	UINT8 = 0,
+	INT8 = 1,
+	UINT16 = 2,
+	INT16 = 3,
+	UINT32 = 4,
+	INT32 = 5,
+	FLOAT32 = 6,
+	BOOL = 7,
+	STRING = 8,
+	ARRAY = 9,
+	UINT64 = 10,
+	INT64 = 11,
+	FLOAT64 = 12,
+}
+function isGGUFValueType(n: number): n is GGUFValueType {
+	return typeof GGUFValueType[n] === "string";
+}
+
+const HTTP_CHUNK_SIZE = 60 * 10 ** 6;
+
+async function rangeFromUrl(url: string, range: [number, number]): Promise<DataView> {
+	const buf = await (
+		await fetch(url, {
+			headers: {
+				Range: `bytes=${range[0]}-${range[1]}`,
+			},
+		})
+	).arrayBuffer();
+	return new DataView(buf);
+}
+
+function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint {
+	switch (version) {
+		case 1: {
+			const n = view.getUint32(byteOffset, true);
+			return BigInt(n);
+		}
+		case 2:
+		case 3: {
+			return view.getBigUint64(byteOffset, true);
+		}
+	}
+}
+
+function readString(view: DataView, offset: number): { value: string; newOffset: number } {
+	const length = view.getBigUint64(offset, true);
+	const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
+	return { value, newOffset: offset + 8 + Number(length) };
+}
+
+function readMetadataValue(
+	view: DataView,
+	type: GGUFValueType,
+	offset: number
+): { value: MetadataValue; newOffset: number } {
+	switch (type) {
+		case GGUFValueType.UINT8:
+			return { value: view.getUint8(offset), newOffset: offset + 1 };
+		case GGUFValueType.INT8:
+			return { value: view.getInt8(offset), newOffset: offset + 1 };
+		case GGUFValueType.UINT16:
+			return { value: view.getUint16(offset, true), newOffset: offset + 2 };
+		case GGUFValueType.INT16:
+			return { value: view.getInt16(offset, true), newOffset: offset + 2 };
+		case GGUFValueType.UINT32:
+			return { value: view.getUint32(offset, true), newOffset: offset + 4 };
+		case GGUFValueType.INT32:
+			return { value: view.getInt32(offset, true), newOffset: offset + 4 };
+		case GGUFValueType.FLOAT32:
+			return { value: view.getFloat32(offset, true), newOffset: offset + 4 };
+		case GGUFValueType.BOOL:
+			return { value: view.getUint8(offset) !== 0, newOffset: offset + 1 };
+		case GGUFValueType.STRING:
+			return readString(view, offset);
+		case GGUFValueType.ARRAY: {
+			const arrayType = view.getUint32(offset, true);
+			const arrayLength = view.getBigUint64(offset + 4, true);
+			let arrayOffset = offset + 12;
+			const arrayValues: MetadataValue[] = [];
+			for (let i = 0; i < arrayLength; i++) {
+				const { value, newOffset } = readMetadataValue(view, arrayType, arrayOffset);
+				arrayValues.push(value);
+				arrayOffset = newOffset;
+			}
+			return { value: arrayValues, newOffset: arrayOffset };
+		}
+		case GGUFValueType.UINT64:
+			return { value: view.getBigUint64(offset, true), newOffset: offset + 8 };
+		case GGUFValueType.INT64:
+			return { value: view.getBigInt64(offset, true), newOffset: offset + 8 };
+		case GGUFValueType.FLOAT64:
+			return { value: view.getFloat64(offset, true), newOffset: offset + 8 };
+	}
+}
+
+export type GGUFMetadata = {
+	version: Version;
+	tensor_count: bigint;
+	kv_count: bigint;
+} & Record<string, MetadataValue>;
+
+export interface GGUFTensorInfo {
+	name: string;
+	n_dims: number;
+	shape: bigint[];
+	type: GGMLQuantizationType;
+	offset: bigint;
+}
+
+export interface GGUFParseOutput {
+	metadata: GGUFMetadata;
+	tensorInfos: GGUFTensorInfo[];
+}
+
+export async function gguf(url: string): Promise<GGUFParseOutput> {
+	const view = await rangeFromUrl(url, [0, HTTP_CHUNK_SIZE - 1]);
+	if (view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) {
+		throw new Error("not a valid gguf file: no gguf magic number");
+	}
+
+	const version = view.getUint32(4, true);
+	if (!isVersion(version)) {
+		throw new Error("not a valid gguf file: unsupported version");
+	}
+	const tensorCount = readVersionedSize(view, 8, version);
+	const numKv = readVersionedSize(view, 16, version);
+
+	const metadata: GGUFMetadata = {
+		version,
+		tensor_count: tensorCount,
+		kv_count: numKv,
+	};
+	// initial offset after header
+	let offset = 24;
+
+	for (let i = 0; i < numKv; i++) {
+		// read key
+		const keyResult = readString(view, offset);
+		offset = keyResult.newOffset;
+
+		// read value type
+		const valueType = view.getUint32(offset, true);
+		offset += 4;
+
+		if (!isGGUFValueType(valueType)) {
+			throw new Error("Unsupported metadata type: " + valueType);
+		}
+
+		// read value
+		const valueResult = readMetadataValue(view, valueType, offset);
+		offset = valueResult.newOffset;
+
+		metadata[keyResult.value] = valueResult.value;
+	}
+
+	const tensorInfos: GGUFTensorInfo[] = [];
+
+	for (let i = 0; i < tensorCount; i++) {
+		// read tensor name
+		const keyResult = readString(view, offset);
+		offset = keyResult.newOffset;
+
+		const nDims = view.getUint32(offset, true);
+		offset += 4;
+
+		const shape: bigint[] = [];
+		for (let dim = 0; dim < nDims; dim++) {
+			shape.push(view.getBigUint64(offset, true));
+			offset += 8;
+		}
+
+		const type = view.getUint32(offset, true);
+		offset += 4;
+		const tensorOffset = view.getBigUint64(offset, true);
+		offset += 8;
+
+		tensorInfos.push({
+			name: keyResult.value,
+			n_dims: nDims,
+			shape,
+			type,
+			offset: tensorOffset,
+		});
+	}
+
+	return { metadata, tensorInfos };
+}
diff --git a/packages/gguf/src/index.ts b/packages/gguf/src/index.ts
index e69de29bb..bc0ba0958 100644
--- a/packages/gguf/src/index.ts
+++ b/packages/gguf/src/index.ts
@@ -0,0 +1 @@
+export * from "./gguf";

From 1877baba139077bf61a8293a09987c37b73adf45 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 20:15:42 +0100
Subject: [PATCH 03/14] fetch ranges of HTTP data when needed

---
 packages/gguf/src/gguf.ts | 92 +++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 24 deletions(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index 3b468610e..914b9b413 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -50,17 +50,55 @@ function isGGUFValueType(n: number): n is GGUFValueType {
 	return typeof GGUFValueType[n] === "string";
 }
 
-const HTTP_CHUNK_SIZE = 60 * 10 ** 6;
-
-async function rangeFromUrl(url: string, range: [number, number]): Promise<DataView> {
-	const buf = await (
-		await fetch(url, {
-			headers: {
-				Range: `bytes=${range[0]}-${range[1]}`,
-			},
-		})
-	).arrayBuffer();
-	return new DataView(buf);
+const HTTP_CHUNK_SIZE = 2 * 10 ** 6; /// 2MB
+const HTTP_DATA_LEEWAY = 1 * 10 ** 6; /// 1MB
+
+/**
+ * Internal stateful instance to fetch ranges of HTTP data when needed
+ */
+class RangeView {
+	private chunk: number;
+	private buffer: ArrayBuffer;
+
+	readonly view: DataView;
+
+	constructor(public url: string) {
+		this.chunk = 0;
+		/// TODO(fix typing)
+		// eslint-disable-next-line @typescript-eslint/ban-ts-comment
+		// @ts-ignore
+		this.buffer = new ArrayBuffer(0, { maxByteLength: 50 * 10 ** 6 });
+		this.view = new DataView(this.buffer);
+	}
+	/**
+	 * Fetch a new chunk from the server
+	 */
+	async fetchChunk() {
+		const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
+		const buf = new Uint8Array(
+			await (
+				await fetch(this.url, {
+					headers: {
+						Range: `bytes=${range[0]}-${range[1]}`,
+					},
+				})
+			).arrayBuffer()
+		);
+		/// TODO(fix typing)
+		// eslint-disable-next-line @typescript-eslint/ban-ts-comment
+		// @ts-ignore
+		this.buffer.resize((this.chunk + 1) * HTTP_CHUNK_SIZE);
+		new Uint8Array(this.buffer).set(buf, this.chunk * HTTP_CHUNK_SIZE);
+		this.chunk += 1;
+	}
+	/**
+	 * Check whether we need to fetch a new chunk
+	 */
+	async check(offset: number) {
+		if (this.view.byteLength - offset < HTTP_DATA_LEEWAY) {
+			await this.fetchChunk();
+		}
+	}
 }
 
 function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint {
@@ -147,17 +185,19 @@ export interface GGUFParseOutput {
 }
 
 export async function gguf(url: string): Promise<GGUFParseOutput> {
-	const view = await rangeFromUrl(url, [0, HTTP_CHUNK_SIZE - 1]);
-	if (view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) {
+	const r = new RangeView(url);
+	await r.fetchChunk();
+
+	if (r.view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) {
 		throw new Error("not a valid gguf file: no gguf magic number");
 	}
 
-	const version = view.getUint32(4, true);
+	const version = r.view.getUint32(4, true);
 	if (!isVersion(version)) {
 		throw new Error("not a valid gguf file: unsupported version");
 	}
-	const tensorCount = readVersionedSize(view, 8, version);
-	const numKv = readVersionedSize(view, 16, version);
+	const tensorCount = readVersionedSize(r.view, 8, version);
+	const numKv = readVersionedSize(r.view, 16, version);
 
 	const metadata: GGUFMetadata = {
 		version,
@@ -168,12 +208,14 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 	let offset = 24;
 
 	for (let i = 0; i < numKv; i++) {
+		await r.check(offset);
+
 		// read key
-		const keyResult = readString(view, offset);
+		const keyResult = readString(r.view, offset);
 		offset = keyResult.newOffset;
 
 		// read value type
-		const valueType = view.getUint32(offset, true);
+		const valueType = r.view.getUint32(offset, true);
 		offset += 4;
 
 		if (!isGGUFValueType(valueType)) {
@@ -181,7 +223,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 		}
 
 		// read value
-		const valueResult = readMetadataValue(view, valueType, offset);
+		const valueResult = readMetadataValue(r.view, valueType, offset);
 		offset = valueResult.newOffset;
 
 		metadata[keyResult.value] = valueResult.value;
@@ -190,22 +232,24 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 	const tensorInfos: GGUFTensorInfo[] = [];
 
 	for (let i = 0; i < tensorCount; i++) {
+		await r.check(offset);
+
 		// read tensor name
-		const keyResult = readString(view, offset);
+		const keyResult = readString(r.view, offset);
 		offset = keyResult.newOffset;
 
-		const nDims = view.getUint32(offset, true);
+		const nDims = r.view.getUint32(offset, true);
 		offset += 4;
 
 		const shape: bigint[] = [];
 		for (let dim = 0; dim < nDims; dim++) {
-			shape.push(view.getBigUint64(offset, true));
+			shape.push(r.view.getBigUint64(offset, true));
 			offset += 8;
 		}
 
-		const type = view.getUint32(offset, true);
+		const type = r.view.getUint32(offset, true);
 		offset += 4;
-		const tensorOffset = view.getBigUint64(offset, true);
+		const tensorOffset = r.view.getBigUint64(offset, true);
 		offset += 8;
 
 		tensorInfos.push({

From abfe90994043cfab1f599cb095edd9678958291b Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 20:30:09 +0100
Subject: [PATCH 04/14] Yay! it's working now

---
 packages/gguf/src/gguf.ts | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index 914b9b413..2d4ea6230 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -51,7 +51,7 @@ function isGGUFValueType(n: number): n is GGUFValueType {
 }
 
 const HTTP_CHUNK_SIZE = 2 * 10 ** 6; /// 2MB
-const HTTP_DATA_LEEWAY = 1 * 10 ** 6; /// 1MB
+const HTTP_DATA_LEEWAY = 5 * 10 ** 5; /// 500kb
 
 /**
  * Internal stateful instance to fetch ranges of HTTP data when needed
@@ -222,10 +222,20 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 			throw new Error("Unsupported metadata type: " + valueType);
 		}
 
-		// read value
-		const valueResult = readMetadataValue(r.view, valueType, offset);
+		let valueResult: { value: MetadataValue; newOffset: number } | undefined;
+		while (!valueResult) {
+			try {
+				// read value
+				valueResult = readMetadataValue(r.view, valueType, offset);
+			} catch (err) {
+				if (err instanceof RangeError) {
+					await r.fetchChunk();
+				} else {
+					throw err;
+				}
+			}
+		}
 		offset = valueResult.newOffset;
-
 		metadata[keyResult.value] = valueResult.value;
 	}
 

From 97f5d256d5122d2db1896a67cfd13ebef6bcfa27 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 20:34:28 +0100
Subject: [PATCH 05/14] Acknowledgements

---
 packages/gguf/README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/packages/gguf/README.md b/packages/gguf/README.md
index cf27353e6..d2e831107 100644
--- a/packages/gguf/README.md
+++ b/packages/gguf/README.md
@@ -8,5 +8,10 @@ https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
 
 Reference implementation (Python): https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/gguf_reader.py
 
-## Acknowledgements
+## Acknowledgements & Inspirations
+
+- https://github.com/hyparam/hyllama by @platypii (MIT license)
+- https://github.com/ahoylabs/gguf.js by @biw @dkogut1996 @spencekim (MIT license)
+
+🔥❤️
 

From 8e289220884447a5cf525e07db960212dc384144 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 23:03:28 +0100
Subject: [PATCH 06/14] browser compat?

---
 packages/gguf/src/gguf.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index 2d4ea6230..f0b7dcc46 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -4,7 +4,7 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];
 type Version = 1 | 2 | 3;
 const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
 
-const ggufMagicNumber = [0x47, 0x47, 0x55, 0x46]; /// "GGUF"
+const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF"
 
 export enum GGMLQuantizationType {
 	F32 = 0,
@@ -188,8 +188,8 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 	const r = new RangeView(url);
 	await r.fetchChunk();
 
-	if (r.view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) {
-		throw new Error("not a valid gguf file: no gguf magic number");
+	if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) {
+		throw new Error("not a valid gguf file: not starting with GGUF magic number");
 	}
 
 	const version = r.view.getUint32(4, true);

From 312aef0710b29428453013a10d33ac5e3698e357 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Fri, 8 Mar 2024 23:07:23 +0100
Subject: [PATCH 07/14] `ArrayBuffer.resize` requires Node 20

---
 .github/workflows/agents-publish.yml    | 4 ++--
 .github/workflows/gguf-publish.yml      | 4 ++--
 .github/workflows/hub-publish.yml       | 4 ++--
 .github/workflows/inference-publish.yml | 4 ++--
 .github/workflows/jinja-publish.yml     | 4 ++--
 .github/workflows/languages-publish.yml | 4 ++--
 .github/workflows/lint.yml              | 2 +-
 .github/workflows/tasks-publish.yml     | 4 ++--
 .github/workflows/test.yml              | 2 +-
 .github/workflows/widgets-publish.yml   | 4 ++--
 packages/gguf/package.json              | 2 +-
 11 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/agents-publish.yml b/.github/workflows/agents-publish.yml
index cc29c1ebf..08108b585 100644
--- a/.github/workflows/agents-publish.yml
+++ b/.github/workflows/agents-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/agents/pnpm-lock.yaml
@@ -59,7 +59,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
index 7091e0dcc..dec6cd891 100644
--- a/.github/workflows/gguf-publish.yml
+++ b/.github/workflows/gguf-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/gguf/pnpm-lock.yaml
@@ -56,7 +56,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/hub-publish.yml b/.github/workflows/hub-publish.yml
index 9229d2309..4a75fe7af 100644
--- a/.github/workflows/hub-publish.yml
+++ b/.github/workflows/hub-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/hub/pnpm-lock.yaml
@@ -59,7 +59,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/inference-publish.yml b/.github/workflows/inference-publish.yml
index 4deb63fc3..bbf0a9c0f 100644
--- a/.github/workflows/inference-publish.yml
+++ b/.github/workflows/inference-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/inference/pnpm-lock.yaml
@@ -59,7 +59,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/jinja-publish.yml b/.github/workflows/jinja-publish.yml
index d6495ca85..5057d4ed5 100644
--- a/.github/workflows/jinja-publish.yml
+++ b/.github/workflows/jinja-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/jinja/pnpm-lock.yaml
@@ -56,7 +56,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/languages-publish.yml b/.github/workflows/languages-publish.yml
index a5d0a2d59..5dca90f89 100644
--- a/.github/workflows/languages-publish.yml
+++ b/.github/workflows/languages-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/languages/pnpm-lock.yaml
@@ -56,7 +56,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index e941ec8f4..6b43df9fa 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -29,7 +29,7 @@ jobs:
 
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: "**/pnpm-lock.yaml"
       - run: |
diff --git a/.github/workflows/tasks-publish.yml b/.github/workflows/tasks-publish.yml
index a9eaac92e..4c8b4567e 100644
--- a/.github/workflows/tasks-publish.yml
+++ b/.github/workflows/tasks-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/tasks/pnpm-lock.yaml
@@ -56,7 +56,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 42a019081..70e1e1257 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -29,7 +29,7 @@ jobs:
 
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: "**/pnpm-lock.yaml"
       - run: |
diff --git a/.github/workflows/widgets-publish.yml b/.github/workflows/widgets-publish.yml
index f4652ffe9..7b431ad14 100644
--- a/.github/workflows/widgets-publish.yml
+++ b/.github/workflows/widgets-publish.yml
@@ -33,7 +33,7 @@ jobs:
       - run: corepack enable
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           cache: "pnpm"
           cache-dependency-path: |
             packages/widgets/pnpm-lock.yaml
@@ -168,7 +168,7 @@ jobs:
       # hack - reuse actions/setup-node@v3 just to set a new registry
       - uses: actions/setup-node@v3
         with:
-          node-version: "18"
+          node-version: "20"
           registry-url: "https://npm.pkg.github.com"
       - run: pnpm publish --no-git-checks .
         env:
diff --git a/packages/gguf/package.json b/packages/gguf/package.json
index d82b2c6a1..c4d214c04 100644
--- a/packages/gguf/package.json
+++ b/packages/gguf/package.json
@@ -22,7 +22,7 @@
 		"./dist/index.mjs": "./dist/browser/index.mjs"
 	},
 	"engines": {
-		"node": ">=18"
+		"node": ">=20"
 	},
 	"source": "index.ts",
 	"scripts": {

From e3030fd0d2b1fb5b3d395c6f23bc1dbf01e09726 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Mon, 11 Mar 2024 12:47:16 +0100
Subject: [PATCH 08/14] Update packages/gguf/src/gguf.ts

Co-authored-by: Mishig <mishig.davaadorj@coloradocollege.edu>
---
 packages/gguf/src/gguf.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index f0b7dcc46..e1833c752 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -194,7 +194,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 
 	const version = r.view.getUint32(4, true);
 	if (!isVersion(version)) {
-		throw new Error("not a valid gguf file: unsupported version");
+		throw new Error(`not a valid gguf file: unsupported version "${version}"`);
 	}
 	const tensorCount = readVersionedSize(r.view, 8, version);
 	const numKv = readVersionedSize(r.view, 16, version);

From 4eac1cde8fecb7abb31bf22620ee1e0b6655c896 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Mon, 11 Mar 2024 12:51:09 +0100
Subject: [PATCH 09/14] set this as a const

---
 packages/gguf/src/gguf.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index e1833c752..b400a49d3 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -52,6 +52,7 @@ function isGGUFValueType(n: number): n is GGUFValueType {
 
 const HTTP_CHUNK_SIZE = 2 * 10 ** 6; /// 2MB
 const HTTP_DATA_LEEWAY = 5 * 10 ** 5; /// 500kb
+const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB
 
 /**
  * Internal stateful instance to fetch ranges of HTTP data when needed
@@ -67,7 +68,7 @@ class RangeView {
 		/// TODO(fix typing)
 		// eslint-disable-next-line @typescript-eslint/ban-ts-comment
 		// @ts-ignore
-		this.buffer = new ArrayBuffer(0, { maxByteLength: 50 * 10 ** 6 });
+		this.buffer = new ArrayBuffer(0, { maxByteLength: HTTP_TOTAL_MAX_SIZE });
 		this.view = new DataView(this.buffer);
 	}
 	/**

From 9366d4ae27a0ff6e289b7f63280ff56d0c80a715 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Mon, 11 Mar 2024 12:53:10 +0100
Subject: [PATCH 10/14] review from @mishig25

---
 packages/gguf/src/gguf.spec.ts | 12 ++++++------
 packages/gguf/src/gguf.ts      | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
index 6c19c4a34..a1fdf2b21 100644
--- a/packages/gguf/src/gguf.spec.ts
+++ b/packages/gguf/src/gguf.spec.ts
@@ -53,12 +53,12 @@ describe("gguf", () => {
 		expect(tensorInfos[0]).toMatchObject({
 			name: "token_embd.weight",
 			shape: [4096n, 32000n],
-			type: GGMLQuantizationType.Q2_K,
+			dtype: GGMLQuantizationType.Q2_K,
 		});
 		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
 			name: "output_norm.weight",
 			shape: [4096n],
-			type: GGMLQuantizationType.F32,
+			dtype: GGMLQuantizationType.F32,
 		});
 	});
 
@@ -91,12 +91,12 @@ describe("gguf", () => {
 		expect(tensorInfos[0]).toMatchObject({
 			name: "token_embd.weight",
 			shape: [4096n, 32000n],
-			type: GGMLQuantizationType.Q5_K,
+			dtype: GGMLQuantizationType.Q5_K,
 		});
 		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
 			name: "output.weight",
 			shape: [4096n, 32000n],
-			type: GGMLQuantizationType.Q6_K,
+			dtype: GGMLQuantizationType.Q6_K,
 		});
 	});
 
@@ -128,12 +128,12 @@ describe("gguf", () => {
 		expect(tensorInfos[0]).toMatchObject({
 			name: "token_embd.weight",
 			shape: [2048n, 256128n],
-			type: GGMLQuantizationType.Q4_K,
+			dtype: GGMLQuantizationType.Q4_K,
 		});
 		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
 			name: "blk.9.ffn_norm.weight",
 			shape: [2048n],
-			type: GGMLQuantizationType.F32,
+			dtype: GGMLQuantizationType.F32,
 		});
 	});
 });
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index b400a49d3..38df510de 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -95,7 +95,7 @@ class RangeView {
 	/**
 	 * Check whether we need to fetch a new chunk
 	 */
-	async check(offset: number) {
+	async fetchChunkIfNeeded(offset: number) {
 		if (this.view.byteLength - offset < HTTP_DATA_LEEWAY) {
 			await this.fetchChunk();
 		}
@@ -176,7 +176,7 @@ export interface GGUFTensorInfo {
 	name: string;
 	n_dims: number;
 	shape: bigint[];
-	type: GGMLQuantizationType;
+	dtype: GGMLQuantizationType;
 	offset: bigint;
 }
 
@@ -209,7 +209,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 	let offset = 24;
 
 	for (let i = 0; i < numKv; i++) {
-		await r.check(offset);
+		await r.fetchChunkIfNeeded(offset);
 
 		// read key
 		const keyResult = readString(r.view, offset);
@@ -243,7 +243,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 	const tensorInfos: GGUFTensorInfo[] = [];
 
 	for (let i = 0; i < tensorCount; i++) {
-		await r.check(offset);
+		await r.fetchChunkIfNeeded(offset);
 
 		// read tensor name
 		const keyResult = readString(r.view, offset);
@@ -267,7 +267,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 			name: keyResult.value,
 			n_dims: nDims,
 			shape,
-			type,
+			dtype: type,
 			offset: tensorOffset,
 		});
 	}

From 8ec3643c9c03581524f9361d3caf39fe1a797510 Mon Sep 17 00:00:00 2001
From: Mishig <mishig.davaadorj@coloradocollege.edu>
Date: Tue, 12 Mar 2024 09:28:54 -0700
Subject: [PATCH 11/14] Gguf updates (#543)

1. [Use length rather than
newOffset](https://github.com/huggingface/huggingface.js/pull/543/commits/fcab2c96be2e11c02fcdc7e3301c3c30164b75c7)
(discussed
[here](https://github.com/huggingface/huggingface.js/pull/540#discussion_r1519588604))
2. [custom fetch
fn](https://github.com/huggingface/huggingface.js/pull/543/commits/18f93f37ce07f635dcd5cabb7156de4f7f1dc66f)
(discussed
[here](https://github.com/huggingface/huggingface.js/pull/540#discussion_r1519586431))
---
 packages/gguf/src/gguf.ts | 70 ++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index 38df510de..c272e9817 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -60,16 +60,26 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB
 class RangeView {
 	private chunk: number;
 	private buffer: ArrayBuffer;
+	private fetch: typeof fetch;
 
 	readonly view: DataView;
 
-	constructor(public url: string) {
+	constructor(
+		public url: string,
+		params?: {
+			/**
+			 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+			 */
+			fetch?: typeof fetch;
+		}
+	) {
 		this.chunk = 0;
 		/// TODO(fix typing)
 		// eslint-disable-next-line @typescript-eslint/ban-ts-comment
 		// @ts-ignore
 		this.buffer = new ArrayBuffer(0, { maxByteLength: HTTP_TOTAL_MAX_SIZE });
 		this.view = new DataView(this.buffer);
+		this.fetch = params?.fetch ?? fetch;
 	}
 	/**
 	 * Fetch a new chunk from the server
@@ -78,7 +88,7 @@ class RangeView {
 		const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
 		const buf = new Uint8Array(
 			await (
-				await fetch(this.url, {
+				await this.fetch(this.url, {
 					headers: {
 						Range: `bytes=${range[0]}-${range[1]}`,
 					},
@@ -115,54 +125,54 @@ function readVersionedSize(view: DataView, byteOffset: number, version: Version)
 	}
 }
 
-function readString(view: DataView, offset: number): { value: string; newOffset: number } {
+function readString(view: DataView, offset: number): { value: string; length: number } {
 	const length = view.getBigUint64(offset, true);
 	const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
-	return { value, newOffset: offset + 8 + Number(length) };
+	return { value, length: 8 + Number(length) };
 }
 
 function readMetadataValue(
 	view: DataView,
 	type: GGUFValueType,
 	offset: number
-): { value: MetadataValue; newOffset: number } {
+): { value: MetadataValue; length: number } {
 	switch (type) {
 		case GGUFValueType.UINT8:
-			return { value: view.getUint8(offset), newOffset: offset + 1 };
+			return { value: view.getUint8(offset), length: 1 };
 		case GGUFValueType.INT8:
-			return { value: view.getInt8(offset), newOffset: offset + 1 };
+			return { value: view.getInt8(offset), length: 1 };
 		case GGUFValueType.UINT16:
-			return { value: view.getUint16(offset, true), newOffset: offset + 2 };
+			return { value: view.getUint16(offset, true), length: 2 };
 		case GGUFValueType.INT16:
-			return { value: view.getInt16(offset, true), newOffset: offset + 2 };
+			return { value: view.getInt16(offset, true), length: 2 };
 		case GGUFValueType.UINT32:
-			return { value: view.getUint32(offset, true), newOffset: offset + 4 };
+			return { value: view.getUint32(offset, true), length: 4 };
 		case GGUFValueType.INT32:
-			return { value: view.getInt32(offset, true), newOffset: offset + 4 };
+			return { value: view.getInt32(offset, true), length: 4 };
 		case GGUFValueType.FLOAT32:
-			return { value: view.getFloat32(offset, true), newOffset: offset + 4 };
+			return { value: view.getFloat32(offset, true), length: 4 };
 		case GGUFValueType.BOOL:
-			return { value: view.getUint8(offset) !== 0, newOffset: offset + 1 };
+			return { value: view.getUint8(offset) !== 0, length: 1 };
 		case GGUFValueType.STRING:
 			return readString(view, offset);
 		case GGUFValueType.ARRAY: {
 			const arrayType = view.getUint32(offset, true);
 			const arrayLength = view.getBigUint64(offset + 4, true);
-			let arrayOffset = offset + 12;
+			let length = 12;
 			const arrayValues: MetadataValue[] = [];
 			for (let i = 0; i < arrayLength; i++) {
-				const { value, newOffset } = readMetadataValue(view, arrayType, arrayOffset);
+				const { value, length: _length } = readMetadataValue(view, arrayType, offset + length);
 				arrayValues.push(value);
-				arrayOffset = newOffset;
+				length += _length;
 			}
-			return { value: arrayValues, newOffset: arrayOffset };
+			return { value: arrayValues, length };
 		}
 		case GGUFValueType.UINT64:
-			return { value: view.getBigUint64(offset, true), newOffset: offset + 8 };
+			return { value: view.getBigUint64(offset, true), length: 8 };
 		case GGUFValueType.INT64:
-			return { value: view.getBigInt64(offset, true), newOffset: offset + 8 };
+			return { value: view.getBigInt64(offset, true), length: 8 };
 		case GGUFValueType.FLOAT64:
-			return { value: view.getFloat64(offset, true), newOffset: offset + 8 };
+			return { value: view.getFloat64(offset, true), length: 8 };
 	}
 }
 
@@ -185,8 +195,16 @@ export interface GGUFParseOutput {
 	tensorInfos: GGUFTensorInfo[];
 }
 
-export async function gguf(url: string): Promise<GGUFParseOutput> {
-	const r = new RangeView(url);
+export async function gguf(
+	url: string,
+	params?: {
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	}
+): Promise<GGUFParseOutput> {
+	const r = new RangeView(url, params);
 	await r.fetchChunk();
 
 	if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) {
@@ -213,7 +231,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 
 		// read key
 		const keyResult = readString(r.view, offset);
-		offset = keyResult.newOffset;
+		offset += keyResult.length;
 
 		// read value type
 		const valueType = r.view.getUint32(offset, true);
@@ -223,7 +241,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 			throw new Error("Unsupported metadata type: " + valueType);
 		}
 
-		let valueResult: { value: MetadataValue; newOffset: number } | undefined;
+		let valueResult: ReturnType<typeof readMetadataValue> | undefined;
 		while (!valueResult) {
 			try {
 				// read value
@@ -236,7 +254,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 				}
 			}
 		}
-		offset = valueResult.newOffset;
+		offset += valueResult.length;
 		metadata[keyResult.value] = valueResult.value;
 	}
 
@@ -247,7 +265,7 @@ export async function gguf(url: string): Promise<GGUFParseOutput> {
 
 		// read tensor name
 		const keyResult = readString(r.view, offset);
-		offset = keyResult.newOffset;
+		offset += keyResult.length;
 
 		const nDims = r.view.getUint32(offset, true);
 		offset += 4;

From 12d28d947c2c887504871a13ef46f59d28bf2161 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Wed, 13 Mar 2024 09:34:06 +0100
Subject: [PATCH 12/14] GGUF parser: support big-endian files (#545)

The important snippet is:

```ts
const [littleEndian, version] = (() => {
	/// https://github.com/ggerganov/llama.cpp/issues/3957
	/// Assume this code is always running on little-endian
	/// but wants to be able to parse both endianness
	const version = r.view.getUint32(4, true);
	if (version & 65535) {
		return [true, version];
	} else {
		return [false, r.view.getUint32(4, false)];
	}
})();
```

from https://github.com/ggerganov/llama.cpp/issues/3957 and thanks to
@ggerganov
[comment](https://github.com/huggingface/huggingface.js/pull/540/files#r1521103912)
---
 packages/gguf/src/gguf.spec.ts | 42 ++++++++++++++++
 packages/gguf/src/gguf.ts      | 91 +++++++++++++++++++++++-----------
 2 files changed, 103 insertions(+), 30 deletions(-)

diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
index a1fdf2b21..165009322 100644
--- a/packages/gguf/src/gguf.spec.ts
+++ b/packages/gguf/src/gguf.spec.ts
@@ -6,6 +6,9 @@ const URL_MISTRAL_7B =
 	"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf";
 const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf";
 
+const URL_BIG_ENDIAN =
+	"https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16-big-endian.gguf";
+
 describe("gguf", () => {
 	it("should parse a llama2 7b", async () => {
 		const { metadata, tensorInfos } = await gguf(URL_LLAMA);
@@ -13,6 +16,7 @@ describe("gguf", () => {
 		/// metadata
 
 		expect(metadata).toMatchObject({
+			// partial list, do not exhaustively list (tokenizer is quite big for instance)
 			version: 2,
 			tensor_count: 291n,
 			kv_count: 19n,
@@ -48,6 +52,7 @@ describe("gguf", () => {
 		]);
 
 		/// Tensor infos
+		/// By convention we test the first and last tensor.
 
 		expect(tensorInfos.length).toEqual(291);
 		expect(tensorInfos[0]).toMatchObject({
@@ -136,4 +141,41 @@ describe("gguf", () => {
 			dtype: GGMLQuantizationType.F32,
 		});
 	});
+
+	it("should parse a big-endian file", async () => {
+		const { metadata, tensorInfos } = await gguf(URL_BIG_ENDIAN);
+
+		/// metadata
+
+		expect(metadata).toMatchObject({
+			version: 3,
+			tensor_count: 197n,
+			kv_count: 23n,
+			"general.architecture": "bert",
+			"general.file_type": GGMLQuantizationType.F16,
+			"general.name": "bge-small-en-v1.5",
+			"bert.attention.causal": false,
+			"bert.attention.head_count": 12,
+			"bert.attention.layer_norm_epsilon": 9.999999960041972e-13,
+			"bert.block_count": 12,
+			"bert.context_length": 512,
+			"bert.embedding_length": 384,
+			"bert.feed_forward_length": 1536,
+			"bert.pooling_type": 2,
+		});
+
+		/// Tensor infos
+
+		expect(tensorInfos.length).toEqual(197);
+		expect(tensorInfos[0]).toMatchObject({
+			name: "token_embd_norm.bias",
+			shape: [384n],
+			dtype: GGMLQuantizationType.F32,
+		});
+		expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
+			name: "blk.9.ffn_down.weight",
+			shape: [1536n, 384n],
+			dtype: GGMLQuantizationType.F16,
+		});
+	});
 });
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index c272e9817..3398c3223 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -4,6 +4,12 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];
 type Version = 1 | 2 | 3;
 const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
 
+/**
+ * Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
+ * Your executor might do little-endian byte order, so it might be
+ * check for 0x46554747 and letting the endianness cancel out.
+ * Consider being *very* explicit about the byte order here.
+ */
 const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF"
 
 export enum GGMLQuantizationType {
@@ -112,21 +118,25 @@ class RangeView {
 	}
 }
 
-function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint {
+/**
+ * Note: A good article about binary data in JS: https://javascript.info/arraybuffer-binary-arrays
+ */
+
+function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): bigint {
 	switch (version) {
 		case 1: {
-			const n = view.getUint32(byteOffset, true);
+			const n = view.getUint32(byteOffset, littleEndian);
 			return BigInt(n);
 		}
 		case 2:
 		case 3: {
-			return view.getBigUint64(byteOffset, true);
+			return view.getBigUint64(byteOffset, littleEndian);
 		}
 	}
 }
 
-function readString(view: DataView, offset: number): { value: string; length: number } {
-	const length = view.getBigUint64(offset, true);
+function readString(view: DataView, offset: number, littleEndian: boolean): { value: string; length: number } {
+	const length = view.getBigUint64(offset, littleEndian);
 	const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
 	return { value, length: 8 + Number(length) };
 }
@@ -134,7 +144,8 @@ function readString(view: DataView, offset: number): { value: string; length: nu
 function readMetadataValue(
 	view: DataView,
 	type: GGUFValueType,
-	offset: number
+	offset: number,
+	littleEndian: boolean
 ): { value: MetadataValue; length: number } {
 	switch (type) {
 		case GGUFValueType.UINT8:
@@ -142,37 +153,37 @@ function readMetadataValue(
 		case GGUFValueType.INT8:
 			return { value: view.getInt8(offset), length: 1 };
 		case GGUFValueType.UINT16:
-			return { value: view.getUint16(offset, true), length: 2 };
+			return { value: view.getUint16(offset, littleEndian), length: 2 };
 		case GGUFValueType.INT16:
-			return { value: view.getInt16(offset, true), length: 2 };
+			return { value: view.getInt16(offset, littleEndian), length: 2 };
 		case GGUFValueType.UINT32:
-			return { value: view.getUint32(offset, true), length: 4 };
+			return { value: view.getUint32(offset, littleEndian), length: 4 };
 		case GGUFValueType.INT32:
-			return { value: view.getInt32(offset, true), length: 4 };
+			return { value: view.getInt32(offset, littleEndian), length: 4 };
 		case GGUFValueType.FLOAT32:
-			return { value: view.getFloat32(offset, true), length: 4 };
+			return { value: view.getFloat32(offset, littleEndian), length: 4 };
 		case GGUFValueType.BOOL:
 			return { value: view.getUint8(offset) !== 0, length: 1 };
 		case GGUFValueType.STRING:
-			return readString(view, offset);
+			return readString(view, offset, littleEndian);
 		case GGUFValueType.ARRAY: {
-			const arrayType = view.getUint32(offset, true);
-			const arrayLength = view.getBigUint64(offset + 4, true);
+			const arrayType = view.getUint32(offset, littleEndian);
+			const arrayLength = view.getBigUint64(offset + 4, littleEndian);
 			let length = 12;
 			const arrayValues: MetadataValue[] = [];
 			for (let i = 0; i < arrayLength; i++) {
-				const { value, length: _length } = readMetadataValue(view, arrayType, offset + length);
+				const { value, length: _length } = readMetadataValue(view, arrayType, offset + length, littleEndian);
 				arrayValues.push(value);
 				length += _length;
 			}
 			return { value: arrayValues, length };
 		}
 		case GGUFValueType.UINT64:
-			return { value: view.getBigUint64(offset, true), length: 8 };
+			return { value: view.getBigUint64(offset, littleEndian), length: 8 };
 		case GGUFValueType.INT64:
-			return { value: view.getBigInt64(offset, true), length: 8 };
+			return { value: view.getBigInt64(offset, littleEndian), length: 8 };
 		case GGUFValueType.FLOAT64:
-			return { value: view.getFloat64(offset, true), length: 8 };
+			return { value: view.getFloat64(offset, littleEndian), length: 8 };
 	}
 }
 
@@ -207,16 +218,36 @@ export async function gguf(
 	const r = new RangeView(url, params);
 	await r.fetchChunk();
 
-	if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) {
+	const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => {
+		for (let i = 0; i < header.length; i++) {
+			if (header[i] !== buffer[i]) {
+				return false;
+			}
+		}
+		return true;
+	};
+
+	if (!checkBuffer(new Uint8Array(r.view.buffer.slice(0, 4)), ggufMagicNumber)) {
 		throw new Error("not a valid gguf file: not starting with GGUF magic number");
 	}
 
-	const version = r.view.getUint32(4, true);
+	const [littleEndian, version] = (() => {
+		/// https://github.com/ggerganov/llama.cpp/issues/3957
+		/// Assume this code is always running on little-endian
+		/// but wants to be able to parse both endianness
+		const version = r.view.getUint32(4, true);
+		if (version & 65535) {
+			return [true, version];
+		} else {
+			return [false, r.view.getUint32(4, false)];
+		}
+	})();
+
 	if (!isVersion(version)) {
 		throw new Error(`not a valid gguf file: unsupported version "${version}"`);
 	}
-	const tensorCount = readVersionedSize(r.view, 8, version);
-	const numKv = readVersionedSize(r.view, 16, version);
+	const tensorCount = readVersionedSize(r.view, 8, version, littleEndian);
+	const numKv = readVersionedSize(r.view, 16, version, littleEndian);
 
 	const metadata: GGUFMetadata = {
 		version,
@@ -230,11 +261,11 @@ export async function gguf(
 		await r.fetchChunkIfNeeded(offset);
 
 		// read key
-		const keyResult = readString(r.view, offset);
+		const keyResult = readString(r.view, offset, littleEndian);
 		offset += keyResult.length;
 
 		// read value type
-		const valueType = r.view.getUint32(offset, true);
+		const valueType = r.view.getUint32(offset, littleEndian);
 		offset += 4;
 
 		if (!isGGUFValueType(valueType)) {
@@ -245,7 +276,7 @@ export async function gguf(
 		while (!valueResult) {
 			try {
 				// read value
-				valueResult = readMetadataValue(r.view, valueType, offset);
+				valueResult = readMetadataValue(r.view, valueType, offset, littleEndian);
 			} catch (err) {
 				if (err instanceof RangeError) {
 					await r.fetchChunk();
@@ -264,21 +295,21 @@ export async function gguf(
 		await r.fetchChunkIfNeeded(offset);
 
 		// read tensor name
-		const keyResult = readString(r.view, offset);
+		const keyResult = readString(r.view, offset, littleEndian);
 		offset += keyResult.length;
 
-		const nDims = r.view.getUint32(offset, true);
+		const nDims = r.view.getUint32(offset, littleEndian);
 		offset += 4;
 
 		const shape: bigint[] = [];
 		for (let dim = 0; dim < nDims; dim++) {
-			shape.push(r.view.getBigUint64(offset, true));
+			shape.push(r.view.getBigUint64(offset, littleEndian));
 			offset += 8;
 		}
 
-		const type = r.view.getUint32(offset, true);
+		const type = r.view.getUint32(offset, littleEndian);
 		offset += 4;
-		const tensorOffset = r.view.getBigUint64(offset, true);
+		const tensorOffset = r.view.getBigUint64(offset, littleEndian);
 		offset += 8;
 
 		tensorInfos.push({

From 007c451bb94021d6a181b0523988cc9ac1b9b6fa Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Wed, 13 Mar 2024 09:28:26 +0000
Subject: [PATCH 13/14] use current versions of model weights

---
 packages/gguf/src/gguf.spec.ts | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
index 165009322..b919e2881 100644
--- a/packages/gguf/src/gguf.spec.ts
+++ b/packages/gguf/src/gguf.spec.ts
@@ -1,13 +1,12 @@
 import { describe, expect, it } from "vitest";
 import { GGMLQuantizationType, gguf } from "./gguf";
 
-const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf";
+const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
 const URL_MISTRAL_7B =
-	"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf";
-const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf";
-
+	"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/3a6fbf4/mistral-7b-instruct-v0.2.Q5_K_M.gguf";
+const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/a0b140b/gemma-2b-it-q4_k_m.gguf";
 const URL_BIG_ENDIAN =
-	"https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16-big-endian.gguf";
+	"https://huggingface.co/ggml-org/models/resolve/1213976/bert-bge-small/ggml-model-f16-big-endian.gguf";
 
 describe("gguf", () => {
 	it("should parse a llama2 7b", async () => {

From 11b6b8971192226e621ba1e1674305e14ac80959 Mon Sep 17 00:00:00 2001
From: Mishig <mishig.davaadorj@coloradocollege.edu>
Date: Wed, 13 Mar 2024 02:29:03 -0700
Subject: [PATCH 14/14] Update packages/gguf/src/gguf.ts

---
 packages/gguf/src/gguf.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index 3398c3223..80ce89600 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -1,5 +1,5 @@
-type MetadataBaseValue = string | number | bigint | boolean;
-type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
+export type MetadataBaseValue = string | number | bigint | boolean;
+export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
 
 type Version = 1 | 2 | 3;
 const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;