Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gguf] Add types #562

Merged
merged 3 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
67 changes: 5 additions & 62 deletions packages/gguf/src/gguf.ts
@@ -1,7 +1,9 @@
export type MetadataBaseValue = string | number | bigint | boolean;
export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGUFValueType } from "./types";

export type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
export { GGUFValueType, GGMLQuantizationType } from "./types";

Comment on lines +4 to +5
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it a correct way to re-export ?

type Version = 1 | 2 | 3;
const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;

/**
Expand All @@ -12,46 +14,6 @@ const isVersion = (version: number): version is Version => version === 1 || vers
*/
const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF"

export enum GGMLQuantizationType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2_K = 10,
Q3_K = 11,
Q4_K = 12,
Q5_K = 13,
Q6_K = 14,
Q8_K = 15,
IQ2_XXS = 16,
IQ2_XS = 17,
IQ3_XXS = 18,
IQ1_S = 19,
IQ4_NL = 20,
IQ3_S = 21,
IQ2_S = 22,
IQ4_XS = 23,
}

enum GGUFValueType {
UINT8 = 0,
INT8 = 1,
UINT16 = 2,
INT16 = 3,
UINT32 = 4,
INT32 = 5,
FLOAT32 = 6,
BOOL = 7,
STRING = 8,
ARRAY = 9,
UINT64 = 10,
INT64 = 11,
FLOAT64 = 12,
}
function isGGUFValueType(n: number): n is GGUFValueType {
return typeof GGUFValueType[n] === "string";
}
Expand Down Expand Up @@ -185,25 +147,6 @@ function readMetadataValue(
}
}

export type GGUFMetadata = {
version: Version;
tensor_count: bigint;
kv_count: bigint;
} & Record<string, MetadataValue>;

export interface GGUFTensorInfo {
name: string;
n_dims: number;
shape: bigint[];
dtype: GGMLQuantizationType;
offset: bigint;
}

export interface GGUFParseOutput {
metadata: GGUFMetadata;
tensorInfos: GGUFTensorInfo[];
}

export async function gguf(
url: string,
params?: {
Expand Down
141 changes: 141 additions & 0 deletions packages/gguf/src/types.ts
@@ -0,0 +1,141 @@
export type MetadataBaseValue = string | number | bigint | boolean;
export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.

export type Version = 1 | 2 | 3;

export enum GGMLQuantizationType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2_K = 10,
Q3_K = 11,
Q4_K = 12,
Q5_K = 13,
Q6_K = 14,
Q8_K = 15,
IQ2_XXS = 16,
IQ2_XS = 17,
IQ3_XXS = 18,
IQ1_S = 19,
IQ4_NL = 20,
IQ3_S = 21,
IQ2_S = 22,
IQ4_XS = 23,
}
Comment on lines +6 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enums are not strictly speaking types as they expose objects in the runtime.

End of not super useful pedantic note haha. cc @coyotte508


export enum GGUFValueType {
UINT8 = 0,
INT8 = 1,
UINT16 = 2,
INT16 = 3,
UINT32 = 4,
INT32 = 5,
FLOAT32 = 6,
BOOL = 7,
STRING = 8,
ARRAY = 9,
UINT64 = 10,
INT64 = 11,
FLOAT64 = 12,
}

export const ARCHITECTURES = [
"llama",
"mpt",
"gptneox",
"gptj",
"gpt2",
"bloom",
"falcon",
"gemma",
"rwkv",
"whisper",
Comment on lines +48 to +57
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"llama",
"mpt",
"gptneox",
"gptj",
"gpt2",
"bloom",
"falcon",
"gemma",
"rwkv",
"whisper",
"llama",
"falcon",
"baichuan",
"gpt2",
"gptj",
"gptneox",
"mpt",
"starcoder",
"persimmon",
"refact",
"bert",
"nomic-bert",
"bloom",
"stablelm",
"qwen",
"qwen2",
"phi2",
"plamo",
"codeshell",
"orion",
"internlm2",
"minicpm",
"gemma",
"starcoder2",
"mamba",

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(optional, but it's the current list from the llama.cpp source of truth IIUC)

] as const;

export type Architecture = (typeof ARCHITECTURES)[number];

interface General {
"general.architecture": Architecture;
"general.name": string;
"general.file_type": number;
"general.quantization_version": number;
}

type Attention<TArchitecture extends Architecture> =
| { [K in `${TArchitecture}.attention.head_count`]: number }
| { [K in `${TArchitecture}.attention.head_count_kv`]: number }
| { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number }
| { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number }
| { [K in `${TArchitecture}.attention.alibi_bias_max`]: number }
| { [K in `${TArchitecture}.attention.clip_kqv`]: number }
| { [K in `${TArchitecture}.attention.use_norm`]: number };

type Rope<TArchitecture extends Architecture> =
| { [K in `${TArchitecture}.rope.dimension_count`]: number }
| { [K in `${TArchitecture}.rope.freq_base`]: number }
| { [K in `${TArchitecture}.rope.scale`]: number }
| { [K in `${TArchitecture}.rope.scale_linear`]: number };

type ModelBase<
TArchitecture extends
| Architecture
| `encoder.${Extract<Architecture, "whisper">}`
| `decoder.${Extract<Architecture, "whisper">}`,
> =
| { [K in `${TArchitecture}.layer_count`]: number }
| { [K in `${TArchitecture}.feed_forward_length`]: number }
| { [K in `${TArchitecture}.context_length`]: number }
| { [K in `${TArchitecture}.embedding_length`]: number }
| { [K in `${TArchitecture}.block_count`]: number };

type MOE<TArchitecture extends Architecture> =
| { [K in `${TArchitecture}.expert_count`]: number }
| { [K in `${TArchitecture}.expert_used_count`]: number };

interface Tokenizer {
"tokenizer.ggml.model": Architecture;
"tokenizer.ggml.tokens": string[];
"tokenizer.ggml.scores": number[];
"tokenizer.ggml.token_type": number[];
"tokenizer.ggml.bos_token_id": number;
"tokenizer.ggml.eos_token_id": number;
"tokenizer.ggml.add_bos_token": boolean;
"tokenizer.chat_template": string;
}

type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">;
type TransformerLLM = ModelBase<TransformerLLMArchitecture> &
MOE<TransformerLLMArchitecture> &
Attention<TransformerLLMArchitecture> &
Rope<TransformerLLMArchitecture>;

export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
export type LLM = TransformerLLM | RWKV;
export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
export type Model = (LLM | Whisper) & Partial<Tokenizer>;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very neat types (though they make my head hurt a bit, lol)


export type GGUFMetadata = {
version: Version;
tensor_count: bigint;
kv_count: bigint;
} & Partial<General> &
Partial<Model> &
Record<string, MetadataValue>;

export interface GGUFTensorInfo {
name: string;
n_dims: number;
shape: bigint[];
dtype: GGMLQuantizationType;
offset: bigint;
}

export interface GGUFParseOutput {
metadata: GGUFMetadata;
tensorInfos: GGUFTensorInfo[];
}