From fba88a40bcc3e53f06c4fd4506d2963d15404da1 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 15:02:09 +0100 Subject: [PATCH 01/14] scaffolding from CONTRIBUTING.md --- .github/workflows/gguf-publish.yml | 63 ++++++++++++++++++++++++++++++ packages/gguf/.prettierignore | 4 ++ packages/gguf/README.md | 10 +++++ packages/gguf/package.json | 51 ++++++++++++++++++++++++ packages/gguf/pnpm-lock.yaml | 1 + packages/gguf/src/gguf.spec.ts | 0 packages/gguf/src/gguf.ts | 0 packages/gguf/src/index.ts | 0 packages/gguf/tsconfig.json | 18 +++++++++ packages/gguf/tsup.config.ts | 26 ++++++++++++ pnpm-workspace.yaml | 1 + 11 files changed, 174 insertions(+) create mode 100644 .github/workflows/gguf-publish.yml create mode 100644 packages/gguf/.prettierignore create mode 100644 packages/gguf/README.md create mode 100644 packages/gguf/package.json create mode 100644 packages/gguf/pnpm-lock.yaml create mode 100644 packages/gguf/src/gguf.spec.ts create mode 100644 packages/gguf/src/gguf.ts create mode 100644 packages/gguf/src/index.ts create mode 100644 packages/gguf/tsconfig.json create mode 100644 packages/gguf/tsup.config.ts diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml new file mode 100644 index 000000000..7091e0dcc --- /dev/null +++ b/.github/workflows/gguf-publish.yml @@ -0,0 +1,63 @@ +name: GGUF - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/gguf + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "18" + cache: "pnpm" + cache-dependency-path: | + packages/gguf/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @hugginface/gguf $BUMPED_VERSION" + git tag "gguf-v$BUMPED_VERSION" + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: git pull --rebase && git push --follow-tags + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "18" + registry-url: "https://npm.pkg.github.com" + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/packages/gguf/.prettierignore b/packages/gguf/.prettierignore new file mode 100644 index 000000000..cac0c6949 --- /dev/null +++ b/packages/gguf/.prettierignore @@ -0,0 +1,4 @@ +pnpm-lock.yaml +# In order to avoid code samples to have tabs, they don't display well on npm +README.md +dist \ No newline at end of file diff --git a/packages/gguf/README.md b/packages/gguf/README.md new file mode 100644 index 000000000..0ca108483 --- /dev/null +++ b/packages/gguf/README.md @@ -0,0 +1,10 @@ +# `@huggingface/gguf` + +A GGUF parser that works on remotely hosted files. + +## Spec + +https://github.com/ggerganov/ggml/blob/master/docs/gguf.md + +## Acknowledgements + diff --git a/packages/gguf/package.json b/packages/gguf/package.json new file mode 100644 index 000000000..d82b2c6a1 --- /dev/null +++ b/packages/gguf/package.json @@ -0,0 +1,51 @@ +{ + "name": "@huggingface/gguf", + "packageManager": "pnpm@8.10.5", + "version": "0.0.1", + "description": "a GGUF parser that works on remotely hosted files", + "repository": "https://github.com/huggingface/huggingface.js.git", + "publishConfig": { + "access": "public" + }, + "main": "./dist/index.js", + "module": "./dist/index.mjs", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "require": "./dist/index.js", + "import": "./dist/index.mjs" + } + }, + "browser": { + "./dist/index.js": "./dist/browser/index.js", + "./dist/index.mjs": "./dist/browser/index.mjs" + }, + "engines": { + "node": ">=18" + }, + "source": "index.ts", + "scripts": { + "lint": "eslint --quiet --fix --ext .cjs,.ts .", + "lint:check": "eslint --ext .cjs,.ts .", + "format": "prettier --write .", + "format:check": "prettier --check .", + "prepublishOnly": "pnpm run build", + "build": "tsup src/index.ts --format cjs,esm --clean --dts", + "test": "vitest run", + "check": "tsc" + }, + "files": [ + "dist", + "src", + "tsconfig.json" + ], + "keywords": [ + "huggingface", + "hub", + "gguf" + ], + "author": "Hugging Face", + "license": "MIT", + "devDependencies": {} +} diff --git a/packages/gguf/pnpm-lock.yaml b/packages/gguf/pnpm-lock.yaml new file mode 100644 index 000000000..7a06cc796 --- /dev/null +++ b/packages/gguf/pnpm-lock.yaml @@ -0,0 +1 @@ +lockfileVersion: '6.0' diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/gguf/src/index.ts b/packages/gguf/src/index.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/gguf/tsconfig.json b/packages/gguf/tsconfig.json new file mode 100644 index 000000000..37823efde --- /dev/null +++ b/packages/gguf/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "allowSyntheticDefaultImports": true, + "lib": ["ES2022", "DOM"], + "module": "CommonJS", + "moduleResolution": "node", + "target": "ES2022", + "forceConsistentCasingInFileNames": true, + "strict": true, + "noImplicitAny": true, + "strictNullChecks": true, + "skipLibCheck": true, + "noImplicitOverride": true, + "outDir": "./dist" + }, + "include": ["src"], + "exclude": ["dist"] +} diff --git a/packages/gguf/tsup.config.ts b/packages/gguf/tsup.config.ts new file mode 100644 index 000000000..6203927ca --- /dev/null +++ b/packages/gguf/tsup.config.ts @@ -0,0 +1,26 @@ +import type { Options } from "tsup"; + +const baseConfig: Options = { + entry: ["./index.ts"], + format: ["cjs", "esm"], + outDir: "dist", + clean: true, + dts: { + resolve: true, + }, +}; + +const nodeConfig: Options = { + ...baseConfig, + platform: "node", +}; + +const browserConfig: Options = { + ...baseConfig, + platform: "browser", + target: "es2018", + splitting: true, + outDir: "dist/browser", +}; + +export default [nodeConfig, browserConfig]; diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index c28561b18..fe48f2587 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -6,5 +6,6 @@ packages: - "packages/agents" - "packages/languages" - "packages/tasks" + - "packages/gguf" - "packages/jinja" - "packages/widgets" From e624cb046cc8eac42641ef31921ac5f8e561600c Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 19:05:36 +0100 Subject: [PATCH 02/14] initial import --- packages/gguf/README.md | 2 + packages/gguf/src/gguf.spec.ts | 139 +++++++++++++++++++++ packages/gguf/src/gguf.ts | 221 +++++++++++++++++++++++++++++++++ packages/gguf/src/index.ts | 1 + 4 files changed, 363 insertions(+) diff --git a/packages/gguf/README.md b/packages/gguf/README.md index 0ca108483..cf27353e6 100644 --- a/packages/gguf/README.md +++ b/packages/gguf/README.md @@ -6,5 +6,7 @@ A GGUF parser that works on remotely hosted files. https://github.com/ggerganov/ggml/blob/master/docs/gguf.md +Reference implementation (Python): https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/gguf_reader.py + ## Acknowledgements diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index e69de29bb..6c19c4a34 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -0,0 +1,139 @@ +import { describe, expect, it } from "vitest"; +import { GGMLQuantizationType, gguf } from "./gguf"; + +const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"; +const URL_MISTRAL_7B = + "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf"; +const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf"; + +describe("gguf", () => { + it("should parse a llama2 7b", async () => { + const { metadata, tensorInfos } = await gguf(URL_LLAMA); + + /// metadata + + expect(metadata).toMatchObject({ + version: 2, + tensor_count: 291n, + kv_count: 19n, + "general.architecture": "llama", + "general.file_type": 10, + "general.name": "LLaMA v2", + "general.quantization_version": 2, + "llama.attention.head_count": 32, + "llama.attention.head_count_kv": 32, + "llama.attention.layer_norm_rms_epsilon": 9.999999974752427e-7, + "llama.block_count": 32, + "llama.context_length": 4096, + "llama.embedding_length": 4096, + "llama.feed_forward_length": 11008, + "llama.rope.dimension_count": 128, + }); + + const tokens = metadata["tokenizer.ggml.tokens"]; + if (!Array.isArray(tokens)) { + throw new Error(); + } + expect(tokens.slice(0, 10)).toEqual([ + "", + "", + "", + "<0x00>", + "<0x01>", + "<0x02>", + "<0x03>", + "<0x04>", + "<0x05>", + "<0x06>", + ]); + + /// Tensor infos + + expect(tensorInfos.length).toEqual(291); + expect(tensorInfos[0]).toMatchObject({ + name: "token_embd.weight", + shape: [4096n, 32000n], + type: GGMLQuantizationType.Q2_K, + }); + expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ + name: "output_norm.weight", + shape: [4096n], + type: GGMLQuantizationType.F32, + }); + }); + + it("should parse a mistral 7b", async () => { + const { metadata, tensorInfos } = await gguf(URL_MISTRAL_7B); + + /// metadata + + expect(metadata).toMatchObject({ + version: 3, + tensor_count: 291n, + kv_count: 24n, + "general.architecture": "llama", + "general.file_type": 17, + "general.name": "mistralai_mistral-7b-instruct-v0.2", + "general.quantization_version": 2, + "llama.attention.head_count": 32, + "llama.attention.head_count_kv": 8, + "llama.attention.layer_norm_rms_epsilon": 0.000009999999747378752, + "llama.block_count": 32, + "llama.context_length": 32768, + "llama.embedding_length": 4096, + "llama.feed_forward_length": 14336, + "llama.rope.dimension_count": 128, + }); + + /// Tensor infos + + expect(tensorInfos.length).toEqual(291); + expect(tensorInfos[0]).toMatchObject({ + name: "token_embd.weight", + shape: [4096n, 32000n], + type: GGMLQuantizationType.Q5_K, + }); + expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ + name: "output.weight", + shape: [4096n, 32000n], + type: GGMLQuantizationType.Q6_K, + }); + }); + + it("should parse a gemma 2b", async () => { + const { metadata, tensorInfos } = await gguf(URL_GEMMA_2B); + + /// metadata + + expect(metadata).toMatchObject({ + version: 3, + tensor_count: 164n, + kv_count: 21n, + "general.architecture": "gemma", + "general.file_type": GGMLQuantizationType.Q8_K, // 15 + "general.name": "gemma-2b-it", + "general.quantization_version": 2, + "gemma.attention.head_count": 8, + "gemma.attention.head_count_kv": 1, + "gemma.attention.layer_norm_rms_epsilon": 9.999999974752427e-7, + "gemma.block_count": 18, + "gemma.context_length": 8192, + "gemma.embedding_length": 2048, + "gemma.feed_forward_length": 16384, + }); + + /// Tensor infos + + expect(tensorInfos.length).toEqual(164); + expect(tensorInfos[0]).toMatchObject({ + name: "token_embd.weight", + shape: [2048n, 256128n], + type: GGMLQuantizationType.Q4_K, + }); + expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ + name: "blk.9.ffn_norm.weight", + shape: [2048n], + type: GGMLQuantizationType.F32, + }); + }); +}); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index e69de29bb..3b468610e 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -0,0 +1,221 @@ +type MetadataBaseValue = string | number | bigint | boolean; +type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. + +type Version = 1 | 2 | 3; +const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3; + +const ggufMagicNumber = [0x47, 0x47, 0x55, 0x46]; /// "GGUF" + +export enum GGMLQuantizationType { + F32 = 0, + F16 = 1, + Q4_0 = 2, + Q4_1 = 3, + Q5_0 = 6, + Q5_1 = 7, + Q8_0 = 8, + Q8_1 = 9, + Q2_K = 10, + Q3_K = 11, + Q4_K = 12, + Q5_K = 13, + Q6_K = 14, + Q8_K = 15, + IQ2_XXS = 16, + IQ2_XS = 17, + IQ3_XXS = 18, + IQ1_S = 19, + IQ4_NL = 20, + IQ3_S = 21, + IQ2_S = 22, + IQ4_XS = 23, +} + +enum GGUFValueType { + UINT8 = 0, + INT8 = 1, + UINT16 = 2, + INT16 = 3, + UINT32 = 4, + INT32 = 5, + FLOAT32 = 6, + BOOL = 7, + STRING = 8, + ARRAY = 9, + UINT64 = 10, + INT64 = 11, + FLOAT64 = 12, +} +function isGGUFValueType(n: number): n is GGUFValueType { + return typeof GGUFValueType[n] === "string"; +} + +const HTTP_CHUNK_SIZE = 60 * 10 ** 6; + +async function rangeFromUrl(url: string, range: [number, number]): Promise { + const buf = await ( + await fetch(url, { + headers: { + Range: `bytes=${range[0]}-${range[1]}`, + }, + }) + ).arrayBuffer(); + return new DataView(buf); +} + +function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint { + switch (version) { + case 1: { + const n = view.getUint32(byteOffset, true); + return BigInt(n); + } + case 2: + case 3: { + return view.getBigUint64(byteOffset, true); + } + } +} + +function readString(view: DataView, offset: number): { value: string; newOffset: number } { + const length = view.getBigUint64(offset, true); + const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length))); + return { value, newOffset: offset + 8 + Number(length) }; +} + +function readMetadataValue( + view: DataView, + type: GGUFValueType, + offset: number +): { value: MetadataValue; newOffset: number } { + switch (type) { + case GGUFValueType.UINT8: + return { value: view.getUint8(offset), newOffset: offset + 1 }; + case GGUFValueType.INT8: + return { value: view.getInt8(offset), newOffset: offset + 1 }; + case GGUFValueType.UINT16: + return { value: view.getUint16(offset, true), newOffset: offset + 2 }; + case GGUFValueType.INT16: + return { value: view.getInt16(offset, true), newOffset: offset + 2 }; + case GGUFValueType.UINT32: + return { value: view.getUint32(offset, true), newOffset: offset + 4 }; + case GGUFValueType.INT32: + return { value: view.getInt32(offset, true), newOffset: offset + 4 }; + case GGUFValueType.FLOAT32: + return { value: view.getFloat32(offset, true), newOffset: offset + 4 }; + case GGUFValueType.BOOL: + return { value: view.getUint8(offset) !== 0, newOffset: offset + 1 }; + case GGUFValueType.STRING: + return readString(view, offset); + case GGUFValueType.ARRAY: { + const arrayType = view.getUint32(offset, true); + const arrayLength = view.getBigUint64(offset + 4, true); + let arrayOffset = offset + 12; + const arrayValues: MetadataValue[] = []; + for (let i = 0; i < arrayLength; i++) { + const { value, newOffset } = readMetadataValue(view, arrayType, arrayOffset); + arrayValues.push(value); + arrayOffset = newOffset; + } + return { value: arrayValues, newOffset: arrayOffset }; + } + case GGUFValueType.UINT64: + return { value: view.getBigUint64(offset, true), newOffset: offset + 8 }; + case GGUFValueType.INT64: + return { value: view.getBigInt64(offset, true), newOffset: offset + 8 }; + case GGUFValueType.FLOAT64: + return { value: view.getFloat64(offset, true), newOffset: offset + 8 }; + } +} + +export type GGUFMetadata = { + version: Version; + tensor_count: bigint; + kv_count: bigint; +} & Record; + +export interface GGUFTensorInfo { + name: string; + n_dims: number; + shape: bigint[]; + type: GGMLQuantizationType; + offset: bigint; +} + +export interface GGUFParseOutput { + metadata: GGUFMetadata; + tensorInfos: GGUFTensorInfo[]; +} + +export async function gguf(url: string): Promise { + const view = await rangeFromUrl(url, [0, HTTP_CHUNK_SIZE - 1]); + if (view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) { + throw new Error("not a valid gguf file: no gguf magic number"); + } + + const version = view.getUint32(4, true); + if (!isVersion(version)) { + throw new Error("not a valid gguf file: unsupported version"); + } + const tensorCount = readVersionedSize(view, 8, version); + const numKv = readVersionedSize(view, 16, version); + + const metadata: GGUFMetadata = { + version, + tensor_count: tensorCount, + kv_count: numKv, + }; + // initial offset after header + let offset = 24; + + for (let i = 0; i < numKv; i++) { + // read key + const keyResult = readString(view, offset); + offset = keyResult.newOffset; + + // read value type + const valueType = view.getUint32(offset, true); + offset += 4; + + if (!isGGUFValueType(valueType)) { + throw new Error("Unsupported metadata type: " + valueType); + } + + // read value + const valueResult = readMetadataValue(view, valueType, offset); + offset = valueResult.newOffset; + + metadata[keyResult.value] = valueResult.value; + } + + const tensorInfos: GGUFTensorInfo[] = []; + + for (let i = 0; i < tensorCount; i++) { + // read tensor name + const keyResult = readString(view, offset); + offset = keyResult.newOffset; + + const nDims = view.getUint32(offset, true); + offset += 4; + + const shape: bigint[] = []; + for (let dim = 0; dim < nDims; dim++) { + shape.push(view.getBigUint64(offset, true)); + offset += 8; + } + + const type = view.getUint32(offset, true); + offset += 4; + const tensorOffset = view.getBigUint64(offset, true); + offset += 8; + + tensorInfos.push({ + name: keyResult.value, + n_dims: nDims, + shape, + type, + offset: tensorOffset, + }); + } + + return { metadata, tensorInfos }; +} diff --git a/packages/gguf/src/index.ts b/packages/gguf/src/index.ts index e69de29bb..bc0ba0958 100644 --- a/packages/gguf/src/index.ts +++ b/packages/gguf/src/index.ts @@ -0,0 +1 @@ +export * from "./gguf"; From 1877baba139077bf61a8293a09987c37b73adf45 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 20:15:42 +0100 Subject: [PATCH 03/14] fetch ranges of HTTP data when needed --- packages/gguf/src/gguf.ts | 92 +++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 24 deletions(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 3b468610e..914b9b413 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -50,17 +50,55 @@ function isGGUFValueType(n: number): n is GGUFValueType { return typeof GGUFValueType[n] === "string"; } -const HTTP_CHUNK_SIZE = 60 * 10 ** 6; - -async function rangeFromUrl(url: string, range: [number, number]): Promise { - const buf = await ( - await fetch(url, { - headers: { - Range: `bytes=${range[0]}-${range[1]}`, - }, - }) - ).arrayBuffer(); - return new DataView(buf); +const HTTP_CHUNK_SIZE = 2 * 10 ** 6; /// 2MB +const HTTP_DATA_LEEWAY = 1 * 10 ** 6; /// 1MB + +/** + * Internal stateful instance to fetch ranges of HTTP data when needed + */ +class RangeView { + private chunk: number; + private buffer: ArrayBuffer; + + readonly view: DataView; + + constructor(public url: string) { + this.chunk = 0; + /// TODO(fix typing) + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + this.buffer = new ArrayBuffer(0, { maxByteLength: 50 * 10 ** 6 }); + this.view = new DataView(this.buffer); + } + /** + * Fetch a new chunk from the server + */ + async fetchChunk() { + const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1]; + const buf = new Uint8Array( + await ( + await fetch(this.url, { + headers: { + Range: `bytes=${range[0]}-${range[1]}`, + }, + }) + ).arrayBuffer() + ); + /// TODO(fix typing) + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + this.buffer.resize((this.chunk + 1) * HTTP_CHUNK_SIZE); + new Uint8Array(this.buffer).set(buf, this.chunk * HTTP_CHUNK_SIZE); + this.chunk += 1; + } + /** + * Check whether we need to fetch a new chunk + */ + async check(offset: number) { + if (this.view.byteLength - offset < HTTP_DATA_LEEWAY) { + await this.fetchChunk(); + } + } } function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint { @@ -147,17 +185,19 @@ export interface GGUFParseOutput { } export async function gguf(url: string): Promise { - const view = await rangeFromUrl(url, [0, HTTP_CHUNK_SIZE - 1]); - if (view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) { + const r = new RangeView(url); + await r.fetchChunk(); + + if (r.view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) { throw new Error("not a valid gguf file: no gguf magic number"); } - const version = view.getUint32(4, true); + const version = r.view.getUint32(4, true); if (!isVersion(version)) { throw new Error("not a valid gguf file: unsupported version"); } - const tensorCount = readVersionedSize(view, 8, version); - const numKv = readVersionedSize(view, 16, version); + const tensorCount = readVersionedSize(r.view, 8, version); + const numKv = readVersionedSize(r.view, 16, version); const metadata: GGUFMetadata = { version, @@ -168,12 +208,14 @@ export async function gguf(url: string): Promise { let offset = 24; for (let i = 0; i < numKv; i++) { + await r.check(offset); + // read key - const keyResult = readString(view, offset); + const keyResult = readString(r.view, offset); offset = keyResult.newOffset; // read value type - const valueType = view.getUint32(offset, true); + const valueType = r.view.getUint32(offset, true); offset += 4; if (!isGGUFValueType(valueType)) { @@ -181,7 +223,7 @@ export async function gguf(url: string): Promise { } // read value - const valueResult = readMetadataValue(view, valueType, offset); + const valueResult = readMetadataValue(r.view, valueType, offset); offset = valueResult.newOffset; metadata[keyResult.value] = valueResult.value; @@ -190,22 +232,24 @@ export async function gguf(url: string): Promise { const tensorInfos: GGUFTensorInfo[] = []; for (let i = 0; i < tensorCount; i++) { + await r.check(offset); + // read tensor name - const keyResult = readString(view, offset); + const keyResult = readString(r.view, offset); offset = keyResult.newOffset; - const nDims = view.getUint32(offset, true); + const nDims = r.view.getUint32(offset, true); offset += 4; const shape: bigint[] = []; for (let dim = 0; dim < nDims; dim++) { - shape.push(view.getBigUint64(offset, true)); + shape.push(r.view.getBigUint64(offset, true)); offset += 8; } - const type = view.getUint32(offset, true); + const type = r.view.getUint32(offset, true); offset += 4; - const tensorOffset = view.getBigUint64(offset, true); + const tensorOffset = r.view.getBigUint64(offset, true); offset += 8; tensorInfos.push({ From abfe90994043cfab1f599cb095edd9678958291b Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 20:30:09 +0100 Subject: [PATCH 04/14] Yay! it's working now --- packages/gguf/src/gguf.ts | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 914b9b413..2d4ea6230 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -51,7 +51,7 @@ function isGGUFValueType(n: number): n is GGUFValueType { } const HTTP_CHUNK_SIZE = 2 * 10 ** 6; /// 2MB -const HTTP_DATA_LEEWAY = 1 * 10 ** 6; /// 1MB +const HTTP_DATA_LEEWAY = 5 * 10 ** 5; /// 500kb /** * Internal stateful instance to fetch ranges of HTTP data when needed @@ -222,10 +222,20 @@ export async function gguf(url: string): Promise { throw new Error("Unsupported metadata type: " + valueType); } - // read value - const valueResult = readMetadataValue(r.view, valueType, offset); + let valueResult: { value: MetadataValue; newOffset: number } | undefined; + while (!valueResult) { + try { + // read value + valueResult = readMetadataValue(r.view, valueType, offset); + } catch (err) { + if (err instanceof RangeError) { + await r.fetchChunk(); + } else { + throw err; + } + } + } offset = valueResult.newOffset; - metadata[keyResult.value] = valueResult.value; } From 97f5d256d5122d2db1896a67cfd13ebef6bcfa27 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 20:34:28 +0100 Subject: [PATCH 05/14] Acknowledgements --- packages/gguf/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/gguf/README.md b/packages/gguf/README.md index cf27353e6..d2e831107 100644 --- a/packages/gguf/README.md +++ b/packages/gguf/README.md @@ -8,5 +8,10 @@ https://github.com/ggerganov/ggml/blob/master/docs/gguf.md Reference implementation (Python): https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/gguf_reader.py -## Acknowledgements +## Acknowledgements & Inspirations + +- https://github.com/hyparam/hyllama by @platypii (MIT license) +- https://github.com/ahoylabs/gguf.js by @biw @dkogut1996 @spencekim (MIT license) + +🔥❤️ From 8e289220884447a5cf525e07db960212dc384144 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 23:03:28 +0100 Subject: [PATCH 06/14] browser compat? --- packages/gguf/src/gguf.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 2d4ea6230..f0b7dcc46 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -4,7 +4,7 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; type Version = 1 | 2 | 3; const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3; -const ggufMagicNumber = [0x47, 0x47, 0x55, 0x46]; /// "GGUF" +const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF" export enum GGMLQuantizationType { F32 = 0, @@ -188,8 +188,8 @@ export async function gguf(url: string): Promise { const r = new RangeView(url); await r.fetchChunk(); - if (r.view.getUint32(0, true) !== Buffer.from(ggufMagicNumber).readInt32LE()) { - throw new Error("not a valid gguf file: no gguf magic number"); + if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) { + throw new Error("not a valid gguf file: not starting with GGUF magic number"); } const version = r.view.getUint32(4, true); From 312aef0710b29428453013a10d33ac5e3698e357 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 8 Mar 2024 23:07:23 +0100 Subject: [PATCH 07/14] `ArrayBuffer.resize` requires Node 20 --- .github/workflows/agents-publish.yml | 4 ++-- .github/workflows/gguf-publish.yml | 4 ++-- .github/workflows/hub-publish.yml | 4 ++-- .github/workflows/inference-publish.yml | 4 ++-- .github/workflows/jinja-publish.yml | 4 ++-- .github/workflows/languages-publish.yml | 4 ++-- .github/workflows/lint.yml | 2 +- .github/workflows/tasks-publish.yml | 4 ++-- .github/workflows/test.yml | 2 +- .github/workflows/widgets-publish.yml | 4 ++-- packages/gguf/package.json | 2 +- 11 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/agents-publish.yml b/.github/workflows/agents-publish.yml index cc29c1ebf..08108b585 100644 --- a/.github/workflows/agents-publish.yml +++ b/.github/workflows/agents-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/agents/pnpm-lock.yaml @@ -59,7 +59,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml index 7091e0dcc..dec6cd891 100644 --- a/.github/workflows/gguf-publish.yml +++ b/.github/workflows/gguf-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/gguf/pnpm-lock.yaml @@ -56,7 +56,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/hub-publish.yml b/.github/workflows/hub-publish.yml index 9229d2309..4a75fe7af 100644 --- a/.github/workflows/hub-publish.yml +++ b/.github/workflows/hub-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/hub/pnpm-lock.yaml @@ -59,7 +59,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/inference-publish.yml b/.github/workflows/inference-publish.yml index 4deb63fc3..bbf0a9c0f 100644 --- a/.github/workflows/inference-publish.yml +++ b/.github/workflows/inference-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/inference/pnpm-lock.yaml @@ -59,7 +59,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/jinja-publish.yml b/.github/workflows/jinja-publish.yml index d6495ca85..5057d4ed5 100644 --- a/.github/workflows/jinja-publish.yml +++ b/.github/workflows/jinja-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/jinja/pnpm-lock.yaml @@ -56,7 +56,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/languages-publish.yml b/.github/workflows/languages-publish.yml index a5d0a2d59..5dca90f89 100644 --- a/.github/workflows/languages-publish.yml +++ b/.github/workflows/languages-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/languages/pnpm-lock.yaml @@ -56,7 +56,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e941ec8f4..6b43df9fa 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -29,7 +29,7 @@ jobs: - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: "**/pnpm-lock.yaml" - run: | diff --git a/.github/workflows/tasks-publish.yml b/.github/workflows/tasks-publish.yml index a9eaac92e..4c8b4567e 100644 --- a/.github/workflows/tasks-publish.yml +++ b/.github/workflows/tasks-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/tasks/pnpm-lock.yaml @@ -56,7 +56,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 42a019081..70e1e1257 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -29,7 +29,7 @@ jobs: - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: "**/pnpm-lock.yaml" - run: | diff --git a/.github/workflows/widgets-publish.yml b/.github/workflows/widgets-publish.yml index f4652ffe9..7b431ad14 100644 --- a/.github/workflows/widgets-publish.yml +++ b/.github/workflows/widgets-publish.yml @@ -33,7 +33,7 @@ jobs: - run: corepack enable - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" cache: "pnpm" cache-dependency-path: | packages/widgets/pnpm-lock.yaml @@ -168,7 +168,7 @@ jobs: # hack - reuse actions/setup-node@v3 just to set a new registry - uses: actions/setup-node@v3 with: - node-version: "18" + node-version: "20" registry-url: "https://npm.pkg.github.com" - run: pnpm publish --no-git-checks . env: diff --git a/packages/gguf/package.json b/packages/gguf/package.json index d82b2c6a1..c4d214c04 100644 --- a/packages/gguf/package.json +++ b/packages/gguf/package.json @@ -22,7 +22,7 @@ "./dist/index.mjs": "./dist/browser/index.mjs" }, "engines": { - "node": ">=18" + "node": ">=20" }, "source": "index.ts", "scripts": { From e3030fd0d2b1fb5b3d395c6f23bc1dbf01e09726 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Mon, 11 Mar 2024 12:47:16 +0100 Subject: [PATCH 08/14] Update packages/gguf/src/gguf.ts Co-authored-by: Mishig --- packages/gguf/src/gguf.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index f0b7dcc46..e1833c752 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -194,7 +194,7 @@ export async function gguf(url: string): Promise { const version = r.view.getUint32(4, true); if (!isVersion(version)) { - throw new Error("not a valid gguf file: unsupported version"); + throw new Error(`not a valid gguf file: unsupported version "${version}"`); } const tensorCount = readVersionedSize(r.view, 8, version); const numKv = readVersionedSize(r.view, 16, version); From 4eac1cde8fecb7abb31bf22620ee1e0b6655c896 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Mon, 11 Mar 2024 12:51:09 +0100 Subject: [PATCH 09/14] set this as a const --- packages/gguf/src/gguf.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index e1833c752..b400a49d3 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -52,6 +52,7 @@ function isGGUFValueType(n: number): n is GGUFValueType { const HTTP_CHUNK_SIZE = 2 * 10 ** 6; /// 2MB const HTTP_DATA_LEEWAY = 5 * 10 ** 5; /// 500kb +const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB /** * Internal stateful instance to fetch ranges of HTTP data when needed @@ -67,7 +68,7 @@ class RangeView { /// TODO(fix typing) // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore - this.buffer = new ArrayBuffer(0, { maxByteLength: 50 * 10 ** 6 }); + this.buffer = new ArrayBuffer(0, { maxByteLength: HTTP_TOTAL_MAX_SIZE }); this.view = new DataView(this.buffer); } /** From 9366d4ae27a0ff6e289b7f63280ff56d0c80a715 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Mon, 11 Mar 2024 12:53:10 +0100 Subject: [PATCH 10/14] review from @mishig25 --- packages/gguf/src/gguf.spec.ts | 12 ++++++------ packages/gguf/src/gguf.ts | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 6c19c4a34..a1fdf2b21 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -53,12 +53,12 @@ describe("gguf", () => { expect(tensorInfos[0]).toMatchObject({ name: "token_embd.weight", shape: [4096n, 32000n], - type: GGMLQuantizationType.Q2_K, + dtype: GGMLQuantizationType.Q2_K, }); expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ name: "output_norm.weight", shape: [4096n], - type: GGMLQuantizationType.F32, + dtype: GGMLQuantizationType.F32, }); }); @@ -91,12 +91,12 @@ describe("gguf", () => { expect(tensorInfos[0]).toMatchObject({ name: "token_embd.weight", shape: [4096n, 32000n], - type: GGMLQuantizationType.Q5_K, + dtype: GGMLQuantizationType.Q5_K, }); expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ name: "output.weight", shape: [4096n, 32000n], - type: GGMLQuantizationType.Q6_K, + dtype: GGMLQuantizationType.Q6_K, }); }); @@ -128,12 +128,12 @@ describe("gguf", () => { expect(tensorInfos[0]).toMatchObject({ name: "token_embd.weight", shape: [2048n, 256128n], - type: GGMLQuantizationType.Q4_K, + dtype: GGMLQuantizationType.Q4_K, }); expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ name: "blk.9.ffn_norm.weight", shape: [2048n], - type: GGMLQuantizationType.F32, + dtype: GGMLQuantizationType.F32, }); }); }); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index b400a49d3..38df510de 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -95,7 +95,7 @@ class RangeView { /** * Check whether we need to fetch a new chunk */ - async check(offset: number) { + async fetchChunkIfNeeded(offset: number) { if (this.view.byteLength - offset < HTTP_DATA_LEEWAY) { await this.fetchChunk(); } @@ -176,7 +176,7 @@ export interface GGUFTensorInfo { name: string; n_dims: number; shape: bigint[]; - type: GGMLQuantizationType; + dtype: GGMLQuantizationType; offset: bigint; } @@ -209,7 +209,7 @@ export async function gguf(url: string): Promise { let offset = 24; for (let i = 0; i < numKv; i++) { - await r.check(offset); + await r.fetchChunkIfNeeded(offset); // read key const keyResult = readString(r.view, offset); @@ -243,7 +243,7 @@ export async function gguf(url: string): Promise { const tensorInfos: GGUFTensorInfo[] = []; for (let i = 0; i < tensorCount; i++) { - await r.check(offset); + await r.fetchChunkIfNeeded(offset); // read tensor name const keyResult = readString(r.view, offset); @@ -267,7 +267,7 @@ export async function gguf(url: string): Promise { name: keyResult.value, n_dims: nDims, shape, - type, + dtype: type, offset: tensorOffset, }); } From 8ec3643c9c03581524f9361d3caf39fe1a797510 Mon Sep 17 00:00:00 2001 From: Mishig Date: Tue, 12 Mar 2024 09:28:54 -0700 Subject: [PATCH 11/14] Gguf updates (#543) 1. [Use length rather than newOffset](https://github.com/huggingface/huggingface.js/pull/543/commits/fcab2c96be2e11c02fcdc7e3301c3c30164b75c7) (discussed [here](https://github.com/huggingface/huggingface.js/pull/540#discussion_r1519588604)) 2. [custom fetch fn](https://github.com/huggingface/huggingface.js/pull/543/commits/18f93f37ce07f635dcd5cabb7156de4f7f1dc66f) (discussed [here](https://github.com/huggingface/huggingface.js/pull/540#discussion_r1519586431)) --- packages/gguf/src/gguf.ts | 70 ++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 38df510de..c272e9817 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -60,16 +60,26 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB class RangeView { private chunk: number; private buffer: ArrayBuffer; + private fetch: typeof fetch; readonly view: DataView; - constructor(public url: string) { + constructor( + public url: string, + params?: { + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; + } + ) { this.chunk = 0; /// TODO(fix typing) // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore this.buffer = new ArrayBuffer(0, { maxByteLength: HTTP_TOTAL_MAX_SIZE }); this.view = new DataView(this.buffer); + this.fetch = params?.fetch ?? fetch; } /** * Fetch a new chunk from the server @@ -78,7 +88,7 @@ class RangeView { const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1]; const buf = new Uint8Array( await ( - await fetch(this.url, { + await this.fetch(this.url, { headers: { Range: `bytes=${range[0]}-${range[1]}`, }, @@ -115,54 +125,54 @@ function readVersionedSize(view: DataView, byteOffset: number, version: Version) } } -function readString(view: DataView, offset: number): { value: string; newOffset: number } { +function readString(view: DataView, offset: number): { value: string; length: number } { const length = view.getBigUint64(offset, true); const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length))); - return { value, newOffset: offset + 8 + Number(length) }; + return { value, length: 8 + Number(length) }; } function readMetadataValue( view: DataView, type: GGUFValueType, offset: number -): { value: MetadataValue; newOffset: number } { +): { value: MetadataValue; length: number } { switch (type) { case GGUFValueType.UINT8: - return { value: view.getUint8(offset), newOffset: offset + 1 }; + return { value: view.getUint8(offset), length: 1 }; case GGUFValueType.INT8: - return { value: view.getInt8(offset), newOffset: offset + 1 }; + return { value: view.getInt8(offset), length: 1 }; case GGUFValueType.UINT16: - return { value: view.getUint16(offset, true), newOffset: offset + 2 }; + return { value: view.getUint16(offset, true), length: 2 }; case GGUFValueType.INT16: - return { value: view.getInt16(offset, true), newOffset: offset + 2 }; + return { value: view.getInt16(offset, true), length: 2 }; case GGUFValueType.UINT32: - return { value: view.getUint32(offset, true), newOffset: offset + 4 }; + return { value: view.getUint32(offset, true), length: 4 }; case GGUFValueType.INT32: - return { value: view.getInt32(offset, true), newOffset: offset + 4 }; + return { value: view.getInt32(offset, true), length: 4 }; case GGUFValueType.FLOAT32: - return { value: view.getFloat32(offset, true), newOffset: offset + 4 }; + return { value: view.getFloat32(offset, true), length: 4 }; case GGUFValueType.BOOL: - return { value: view.getUint8(offset) !== 0, newOffset: offset + 1 }; + return { value: view.getUint8(offset) !== 0, length: 1 }; case GGUFValueType.STRING: return readString(view, offset); case GGUFValueType.ARRAY: { const arrayType = view.getUint32(offset, true); const arrayLength = view.getBigUint64(offset + 4, true); - let arrayOffset = offset + 12; + let length = 12; const arrayValues: MetadataValue[] = []; for (let i = 0; i < arrayLength; i++) { - const { value, newOffset } = readMetadataValue(view, arrayType, arrayOffset); + const { value, length: _length } = readMetadataValue(view, arrayType, offset + length); arrayValues.push(value); - arrayOffset = newOffset; + length += _length; } - return { value: arrayValues, newOffset: arrayOffset }; + return { value: arrayValues, length }; } case GGUFValueType.UINT64: - return { value: view.getBigUint64(offset, true), newOffset: offset + 8 }; + return { value: view.getBigUint64(offset, true), length: 8 }; case GGUFValueType.INT64: - return { value: view.getBigInt64(offset, true), newOffset: offset + 8 }; + return { value: view.getBigInt64(offset, true), length: 8 }; case GGUFValueType.FLOAT64: - return { value: view.getFloat64(offset, true), newOffset: offset + 8 }; + return { value: view.getFloat64(offset, true), length: 8 }; } } @@ -185,8 +195,16 @@ export interface GGUFParseOutput { tensorInfos: GGUFTensorInfo[]; } -export async function gguf(url: string): Promise { - const r = new RangeView(url); +export async function gguf( + url: string, + params?: { + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; + } +): Promise { + const r = new RangeView(url, params); await r.fetchChunk(); if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) { @@ -213,7 +231,7 @@ export async function gguf(url: string): Promise { // read key const keyResult = readString(r.view, offset); - offset = keyResult.newOffset; + offset += keyResult.length; // read value type const valueType = r.view.getUint32(offset, true); @@ -223,7 +241,7 @@ export async function gguf(url: string): Promise { throw new Error("Unsupported metadata type: " + valueType); } - let valueResult: { value: MetadataValue; newOffset: number } | undefined; + let valueResult: ReturnType | undefined; while (!valueResult) { try { // read value @@ -236,7 +254,7 @@ export async function gguf(url: string): Promise { } } } - offset = valueResult.newOffset; + offset += valueResult.length; metadata[keyResult.value] = valueResult.value; } @@ -247,7 +265,7 @@ export async function gguf(url: string): Promise { // read tensor name const keyResult = readString(r.view, offset); - offset = keyResult.newOffset; + offset += keyResult.length; const nDims = r.view.getUint32(offset, true); offset += 4; From 12d28d947c2c887504871a13ef46f59d28bf2161 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Wed, 13 Mar 2024 09:34:06 +0100 Subject: [PATCH 12/14] GGUF parser: support big-endian files (#545) The important snippet is: ```ts const [littleEndian, version] = (() => { /// https://github.com/ggerganov/llama.cpp/issues/3957 /// Assume this code is always running on little-endian /// but wants to be able to parse both endianness const version = r.view.getUint32(4, true); if (version & 65535) { return [true, version]; } else { return [false, r.view.getUint32(4, false)]; } })(); ``` from https://github.com/ggerganov/llama.cpp/issues/3957 and thanks to @ggerganov [comment](https://github.com/huggingface/huggingface.js/pull/540/files#r1521103912) --- packages/gguf/src/gguf.spec.ts | 42 ++++++++++++++++ packages/gguf/src/gguf.ts | 91 +++++++++++++++++++++++----------- 2 files changed, 103 insertions(+), 30 deletions(-) diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index a1fdf2b21..165009322 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -6,6 +6,9 @@ const URL_MISTRAL_7B = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf"; const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf"; +const URL_BIG_ENDIAN = + "https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16-big-endian.gguf"; + describe("gguf", () => { it("should parse a llama2 7b", async () => { const { metadata, tensorInfos } = await gguf(URL_LLAMA); @@ -13,6 +16,7 @@ describe("gguf", () => { /// metadata expect(metadata).toMatchObject({ + // partial list, do not exhaustively list (tokenizer is quite big for instance) version: 2, tensor_count: 291n, kv_count: 19n, @@ -48,6 +52,7 @@ describe("gguf", () => { ]); /// Tensor infos + /// By convention we test the first and last tensor. expect(tensorInfos.length).toEqual(291); expect(tensorInfos[0]).toMatchObject({ @@ -136,4 +141,41 @@ describe("gguf", () => { dtype: GGMLQuantizationType.F32, }); }); + + it("should parse a big-endian file", async () => { + const { metadata, tensorInfos } = await gguf(URL_BIG_ENDIAN); + + /// metadata + + expect(metadata).toMatchObject({ + version: 3, + tensor_count: 197n, + kv_count: 23n, + "general.architecture": "bert", + "general.file_type": GGMLQuantizationType.F16, + "general.name": "bge-small-en-v1.5", + "bert.attention.causal": false, + "bert.attention.head_count": 12, + "bert.attention.layer_norm_epsilon": 9.999999960041972e-13, + "bert.block_count": 12, + "bert.context_length": 512, + "bert.embedding_length": 384, + "bert.feed_forward_length": 1536, + "bert.pooling_type": 2, + }); + + /// Tensor infos + + expect(tensorInfos.length).toEqual(197); + expect(tensorInfos[0]).toMatchObject({ + name: "token_embd_norm.bias", + shape: [384n], + dtype: GGMLQuantizationType.F32, + }); + expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({ + name: "blk.9.ffn_down.weight", + shape: [1536n, 384n], + dtype: GGMLQuantizationType.F16, + }); + }); }); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index c272e9817..3398c3223 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -4,6 +4,12 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; type Version = 1 | 2 | 3; const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3; +/** + * Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`. + * Your executor might do little-endian byte order, so it might be + * check for 0x46554747 and letting the endianness cancel out. + * Consider being *very* explicit about the byte order here. + */ const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF" export enum GGMLQuantizationType { @@ -112,21 +118,25 @@ class RangeView { } } -function readVersionedSize(view: DataView, byteOffset: number, version: Version): bigint { +/** + * Note: A good article about binary data in JS: https://javascript.info/arraybuffer-binary-arrays + */ + +function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): bigint { switch (version) { case 1: { - const n = view.getUint32(byteOffset, true); + const n = view.getUint32(byteOffset, littleEndian); return BigInt(n); } case 2: case 3: { - return view.getBigUint64(byteOffset, true); + return view.getBigUint64(byteOffset, littleEndian); } } } -function readString(view: DataView, offset: number): { value: string; length: number } { - const length = view.getBigUint64(offset, true); +function readString(view: DataView, offset: number, littleEndian: boolean): { value: string; length: number } { + const length = view.getBigUint64(offset, littleEndian); const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length))); return { value, length: 8 + Number(length) }; } @@ -134,7 +144,8 @@ function readString(view: DataView, offset: number): { value: string; length: nu function readMetadataValue( view: DataView, type: GGUFValueType, - offset: number + offset: number, + littleEndian: boolean ): { value: MetadataValue; length: number } { switch (type) { case GGUFValueType.UINT8: @@ -142,37 +153,37 @@ function readMetadataValue( case GGUFValueType.INT8: return { value: view.getInt8(offset), length: 1 }; case GGUFValueType.UINT16: - return { value: view.getUint16(offset, true), length: 2 }; + return { value: view.getUint16(offset, littleEndian), length: 2 }; case GGUFValueType.INT16: - return { value: view.getInt16(offset, true), length: 2 }; + return { value: view.getInt16(offset, littleEndian), length: 2 }; case GGUFValueType.UINT32: - return { value: view.getUint32(offset, true), length: 4 }; + return { value: view.getUint32(offset, littleEndian), length: 4 }; case GGUFValueType.INT32: - return { value: view.getInt32(offset, true), length: 4 }; + return { value: view.getInt32(offset, littleEndian), length: 4 }; case GGUFValueType.FLOAT32: - return { value: view.getFloat32(offset, true), length: 4 }; + return { value: view.getFloat32(offset, littleEndian), length: 4 }; case GGUFValueType.BOOL: return { value: view.getUint8(offset) !== 0, length: 1 }; case GGUFValueType.STRING: - return readString(view, offset); + return readString(view, offset, littleEndian); case GGUFValueType.ARRAY: { - const arrayType = view.getUint32(offset, true); - const arrayLength = view.getBigUint64(offset + 4, true); + const arrayType = view.getUint32(offset, littleEndian); + const arrayLength = view.getBigUint64(offset + 4, littleEndian); let length = 12; const arrayValues: MetadataValue[] = []; for (let i = 0; i < arrayLength; i++) { - const { value, length: _length } = readMetadataValue(view, arrayType, offset + length); + const { value, length: _length } = readMetadataValue(view, arrayType, offset + length, littleEndian); arrayValues.push(value); length += _length; } return { value: arrayValues, length }; } case GGUFValueType.UINT64: - return { value: view.getBigUint64(offset, true), length: 8 }; + return { value: view.getBigUint64(offset, littleEndian), length: 8 }; case GGUFValueType.INT64: - return { value: view.getBigInt64(offset, true), length: 8 }; + return { value: view.getBigInt64(offset, littleEndian), length: 8 }; case GGUFValueType.FLOAT64: - return { value: view.getFloat64(offset, true), length: 8 }; + return { value: view.getFloat64(offset, littleEndian), length: 8 }; } } @@ -207,16 +218,36 @@ export async function gguf( const r = new RangeView(url, params); await r.fetchChunk(); - if (r.view.getUint32(0, true) !== new DataView(ggufMagicNumber.buffer).getUint32(0, true)) { + const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => { + for (let i = 0; i < header.length; i++) { + if (header[i] !== buffer[i]) { + return false; + } + } + return true; + }; + + if (!checkBuffer(new Uint8Array(r.view.buffer.slice(0, 4)), ggufMagicNumber)) { throw new Error("not a valid gguf file: not starting with GGUF magic number"); } - const version = r.view.getUint32(4, true); + const [littleEndian, version] = (() => { + /// https://github.com/ggerganov/llama.cpp/issues/3957 + /// Assume this code is always running on little-endian + /// but wants to be able to parse both endianness + const version = r.view.getUint32(4, true); + if (version & 65535) { + return [true, version]; + } else { + return [false, r.view.getUint32(4, false)]; + } + })(); + if (!isVersion(version)) { throw new Error(`not a valid gguf file: unsupported version "${version}"`); } - const tensorCount = readVersionedSize(r.view, 8, version); - const numKv = readVersionedSize(r.view, 16, version); + const tensorCount = readVersionedSize(r.view, 8, version, littleEndian); + const numKv = readVersionedSize(r.view, 16, version, littleEndian); const metadata: GGUFMetadata = { version, @@ -230,11 +261,11 @@ export async function gguf( await r.fetchChunkIfNeeded(offset); // read key - const keyResult = readString(r.view, offset); + const keyResult = readString(r.view, offset, littleEndian); offset += keyResult.length; // read value type - const valueType = r.view.getUint32(offset, true); + const valueType = r.view.getUint32(offset, littleEndian); offset += 4; if (!isGGUFValueType(valueType)) { @@ -245,7 +276,7 @@ export async function gguf( while (!valueResult) { try { // read value - valueResult = readMetadataValue(r.view, valueType, offset); + valueResult = readMetadataValue(r.view, valueType, offset, littleEndian); } catch (err) { if (err instanceof RangeError) { await r.fetchChunk(); @@ -264,21 +295,21 @@ export async function gguf( await r.fetchChunkIfNeeded(offset); // read tensor name - const keyResult = readString(r.view, offset); + const keyResult = readString(r.view, offset, littleEndian); offset += keyResult.length; - const nDims = r.view.getUint32(offset, true); + const nDims = r.view.getUint32(offset, littleEndian); offset += 4; const shape: bigint[] = []; for (let dim = 0; dim < nDims; dim++) { - shape.push(r.view.getBigUint64(offset, true)); + shape.push(r.view.getBigUint64(offset, littleEndian)); offset += 8; } - const type = r.view.getUint32(offset, true); + const type = r.view.getUint32(offset, littleEndian); offset += 4; - const tensorOffset = r.view.getBigUint64(offset, true); + const tensorOffset = r.view.getBigUint64(offset, littleEndian); offset += 8; tensorInfos.push({ From 007c451bb94021d6a181b0523988cc9ac1b9b6fa Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 13 Mar 2024 09:28:26 +0000 Subject: [PATCH 13/14] use current versions of model weights --- packages/gguf/src/gguf.spec.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 165009322..b919e2881 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -1,13 +1,12 @@ import { describe, expect, it } from "vitest"; import { GGMLQuantizationType, gguf } from "./gguf"; -const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"; +const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf"; const URL_MISTRAL_7B = - "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf"; -const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf"; - + "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/3a6fbf4/mistral-7b-instruct-v0.2.Q5_K_M.gguf"; +const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/a0b140b/gemma-2b-it-q4_k_m.gguf"; const URL_BIG_ENDIAN = - "https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16-big-endian.gguf"; + "https://huggingface.co/ggml-org/models/resolve/1213976/bert-bge-small/ggml-model-f16-big-endian.gguf"; describe("gguf", () => { it("should parse a llama2 7b", async () => { From 11b6b8971192226e621ba1e1674305e14ac80959 Mon Sep 17 00:00:00 2001 From: Mishig Date: Wed, 13 Mar 2024 02:29:03 -0700 Subject: [PATCH 14/14] Update packages/gguf/src/gguf.ts --- packages/gguf/src/gguf.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 3398c3223..80ce89600 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -1,5 +1,5 @@ -type MetadataBaseValue = string | number | bigint | boolean; -type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. +export type MetadataBaseValue = string | number | bigint | boolean; +export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. type Version = 1 | 2 | 3; const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;