Skip to content

Commit e745ba5

Browse files
author
Mishig
authored
[gguf] Add types (#562)
GGUF add types. Follow up to #540 (comment). No any kind of validation, just types cc: @biw also
1 parent 3bd9297 commit e745ba5

File tree

2 files changed

+146
-62
lines changed

2 files changed

+146
-62
lines changed

packages/gguf/src/gguf.ts

Lines changed: 5 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
export type MetadataBaseValue = string | number | bigint | boolean;
2-
export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
1+
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
2+
import { GGUFValueType } from "./types";
3+
4+
export type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
5+
export { GGUFValueType, GGMLQuantizationType } from "./types";
36

4-
type Version = 1 | 2 | 3;
57
const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
68

79
/**
@@ -12,46 +14,6 @@ const isVersion = (version: number): version is Version => version === 1 || vers
1214
*/
1315
const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF"
1416

15-
export enum GGMLQuantizationType {
16-
F32 = 0,
17-
F16 = 1,
18-
Q4_0 = 2,
19-
Q4_1 = 3,
20-
Q5_0 = 6,
21-
Q5_1 = 7,
22-
Q8_0 = 8,
23-
Q8_1 = 9,
24-
Q2_K = 10,
25-
Q3_K = 11,
26-
Q4_K = 12,
27-
Q5_K = 13,
28-
Q6_K = 14,
29-
Q8_K = 15,
30-
IQ2_XXS = 16,
31-
IQ2_XS = 17,
32-
IQ3_XXS = 18,
33-
IQ1_S = 19,
34-
IQ4_NL = 20,
35-
IQ3_S = 21,
36-
IQ2_S = 22,
37-
IQ4_XS = 23,
38-
}
39-
40-
enum GGUFValueType {
41-
UINT8 = 0,
42-
INT8 = 1,
43-
UINT16 = 2,
44-
INT16 = 3,
45-
UINT32 = 4,
46-
INT32 = 5,
47-
FLOAT32 = 6,
48-
BOOL = 7,
49-
STRING = 8,
50-
ARRAY = 9,
51-
UINT64 = 10,
52-
INT64 = 11,
53-
FLOAT64 = 12,
54-
}
5517
function isGGUFValueType(n: number): n is GGUFValueType {
5618
return typeof GGUFValueType[n] === "string";
5719
}
@@ -185,25 +147,6 @@ function readMetadataValue(
185147
}
186148
}
187149

188-
export type GGUFMetadata = {
189-
version: Version;
190-
tensor_count: bigint;
191-
kv_count: bigint;
192-
} & Record<string, MetadataValue>;
193-
194-
export interface GGUFTensorInfo {
195-
name: string;
196-
n_dims: number;
197-
shape: bigint[];
198-
dtype: GGMLQuantizationType;
199-
offset: bigint;
200-
}
201-
202-
export interface GGUFParseOutput {
203-
metadata: GGUFMetadata;
204-
tensorInfos: GGUFTensorInfo[];
205-
}
206-
207150
export async function gguf(
208151
url: string,
209152
params?: {

packages/gguf/src/types.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
export type MetadataBaseValue = string | number | bigint | boolean;
2+
export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
3+
4+
export type Version = 1 | 2 | 3;
5+
6+
export enum GGMLQuantizationType {
7+
F32 = 0,
8+
F16 = 1,
9+
Q4_0 = 2,
10+
Q4_1 = 3,
11+
Q5_0 = 6,
12+
Q5_1 = 7,
13+
Q8_0 = 8,
14+
Q8_1 = 9,
15+
Q2_K = 10,
16+
Q3_K = 11,
17+
Q4_K = 12,
18+
Q5_K = 13,
19+
Q6_K = 14,
20+
Q8_K = 15,
21+
IQ2_XXS = 16,
22+
IQ2_XS = 17,
23+
IQ3_XXS = 18,
24+
IQ1_S = 19,
25+
IQ4_NL = 20,
26+
IQ3_S = 21,
27+
IQ2_S = 22,
28+
IQ4_XS = 23,
29+
}
30+
31+
export enum GGUFValueType {
32+
UINT8 = 0,
33+
INT8 = 1,
34+
UINT16 = 2,
35+
INT16 = 3,
36+
UINT32 = 4,
37+
INT32 = 5,
38+
FLOAT32 = 6,
39+
BOOL = 7,
40+
STRING = 8,
41+
ARRAY = 9,
42+
UINT64 = 10,
43+
INT64 = 11,
44+
FLOAT64 = 12,
45+
}
46+
47+
export const ARCHITECTURES = [
48+
"llama",
49+
"mpt",
50+
"gptneox",
51+
"gptj",
52+
"gpt2",
53+
"bloom",
54+
"falcon",
55+
"gemma",
56+
"rwkv",
57+
"whisper",
58+
] as const;
59+
60+
export type Architecture = (typeof ARCHITECTURES)[number];
61+
62+
interface General {
63+
"general.architecture": Architecture;
64+
"general.name": string;
65+
"general.file_type": number;
66+
"general.quantization_version": number;
67+
}
68+
69+
type Attention<TArchitecture extends Architecture> =
70+
| { [K in `${TArchitecture}.attention.head_count`]: number }
71+
| { [K in `${TArchitecture}.attention.head_count_kv`]: number }
72+
| { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number }
73+
| { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number }
74+
| { [K in `${TArchitecture}.attention.alibi_bias_max`]: number }
75+
| { [K in `${TArchitecture}.attention.clip_kqv`]: number }
76+
| { [K in `${TArchitecture}.attention.use_norm`]: number };
77+
78+
type Rope<TArchitecture extends Architecture> =
79+
| { [K in `${TArchitecture}.rope.dimension_count`]: number }
80+
| { [K in `${TArchitecture}.rope.freq_base`]: number }
81+
| { [K in `${TArchitecture}.rope.scale`]: number }
82+
| { [K in `${TArchitecture}.rope.scale_linear`]: number };
83+
84+
type ModelBase<
85+
TArchitecture extends
86+
| Architecture
87+
| `encoder.${Extract<Architecture, "whisper">}`
88+
| `decoder.${Extract<Architecture, "whisper">}`,
89+
> =
90+
| { [K in `${TArchitecture}.layer_count`]: number }
91+
| { [K in `${TArchitecture}.feed_forward_length`]: number }
92+
| { [K in `${TArchitecture}.context_length`]: number }
93+
| { [K in `${TArchitecture}.embedding_length`]: number }
94+
| { [K in `${TArchitecture}.block_count`]: number };
95+
96+
type MOE<TArchitecture extends Architecture> =
97+
| { [K in `${TArchitecture}.expert_count`]: number }
98+
| { [K in `${TArchitecture}.expert_used_count`]: number };
99+
100+
interface Tokenizer {
101+
"tokenizer.ggml.model": Architecture;
102+
"tokenizer.ggml.tokens": string[];
103+
"tokenizer.ggml.scores": number[];
104+
"tokenizer.ggml.token_type": number[];
105+
"tokenizer.ggml.bos_token_id": number;
106+
"tokenizer.ggml.eos_token_id": number;
107+
"tokenizer.ggml.add_bos_token": boolean;
108+
"tokenizer.chat_template": string;
109+
}
110+
111+
type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">;
112+
type TransformerLLM = ModelBase<TransformerLLMArchitecture> &
113+
MOE<TransformerLLMArchitecture> &
114+
Attention<TransformerLLMArchitecture> &
115+
Rope<TransformerLLMArchitecture>;
116+
117+
export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
118+
export type LLM = TransformerLLM | RWKV;
119+
export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
120+
export type Model = (LLM | Whisper) & Partial<Tokenizer>;
121+
122+
export type GGUFMetadata = {
123+
version: Version;
124+
tensor_count: bigint;
125+
kv_count: bigint;
126+
} & Partial<General> &
127+
Partial<Model> &
128+
Record<string, MetadataValue>;
129+
130+
export interface GGUFTensorInfo {
131+
name: string;
132+
n_dims: number;
133+
shape: bigint[];
134+
dtype: GGMLQuantizationType;
135+
offset: bigint;
136+
}
137+
138+
export interface GGUFParseOutput {
139+
metadata: GGUFMetadata;
140+
tensorInfos: GGUFTensorInfo[];
141+
}

0 commit comments

Comments
 (0)