|
| 1 | +export type MetadataBaseValue = string | number | bigint | boolean; |
| 2 | +export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. |
| 3 | + |
| 4 | +export type Version = 1 | 2 | 3; |
| 5 | + |
| 6 | +export enum GGMLQuantizationType { |
| 7 | + F32 = 0, |
| 8 | + F16 = 1, |
| 9 | + Q4_0 = 2, |
| 10 | + Q4_1 = 3, |
| 11 | + Q5_0 = 6, |
| 12 | + Q5_1 = 7, |
| 13 | + Q8_0 = 8, |
| 14 | + Q8_1 = 9, |
| 15 | + Q2_K = 10, |
| 16 | + Q3_K = 11, |
| 17 | + Q4_K = 12, |
| 18 | + Q5_K = 13, |
| 19 | + Q6_K = 14, |
| 20 | + Q8_K = 15, |
| 21 | + IQ2_XXS = 16, |
| 22 | + IQ2_XS = 17, |
| 23 | + IQ3_XXS = 18, |
| 24 | + IQ1_S = 19, |
| 25 | + IQ4_NL = 20, |
| 26 | + IQ3_S = 21, |
| 27 | + IQ2_S = 22, |
| 28 | + IQ4_XS = 23, |
| 29 | +} |
| 30 | + |
| 31 | +export enum GGUFValueType { |
| 32 | + UINT8 = 0, |
| 33 | + INT8 = 1, |
| 34 | + UINT16 = 2, |
| 35 | + INT16 = 3, |
| 36 | + UINT32 = 4, |
| 37 | + INT32 = 5, |
| 38 | + FLOAT32 = 6, |
| 39 | + BOOL = 7, |
| 40 | + STRING = 8, |
| 41 | + ARRAY = 9, |
| 42 | + UINT64 = 10, |
| 43 | + INT64 = 11, |
| 44 | + FLOAT64 = 12, |
| 45 | +} |
| 46 | + |
| 47 | +export const ARCHITECTURES = [ |
| 48 | + "llama", |
| 49 | + "mpt", |
| 50 | + "gptneox", |
| 51 | + "gptj", |
| 52 | + "gpt2", |
| 53 | + "bloom", |
| 54 | + "falcon", |
| 55 | + "gemma", |
| 56 | + "rwkv", |
| 57 | + "whisper", |
| 58 | +] as const; |
| 59 | + |
| 60 | +export type Architecture = (typeof ARCHITECTURES)[number]; |
| 61 | + |
| 62 | +interface General { |
| 63 | + "general.architecture": Architecture; |
| 64 | + "general.name": string; |
| 65 | + "general.file_type": number; |
| 66 | + "general.quantization_version": number; |
| 67 | +} |
| 68 | + |
| 69 | +type Attention<TArchitecture extends Architecture> = |
| 70 | + | { [K in `${TArchitecture}.attention.head_count`]: number } |
| 71 | + | { [K in `${TArchitecture}.attention.head_count_kv`]: number } |
| 72 | + | { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } |
| 73 | + | { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number } |
| 74 | + | { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } |
| 75 | + | { [K in `${TArchitecture}.attention.clip_kqv`]: number } |
| 76 | + | { [K in `${TArchitecture}.attention.use_norm`]: number }; |
| 77 | + |
| 78 | +type Rope<TArchitecture extends Architecture> = |
| 79 | + | { [K in `${TArchitecture}.rope.dimension_count`]: number } |
| 80 | + | { [K in `${TArchitecture}.rope.freq_base`]: number } |
| 81 | + | { [K in `${TArchitecture}.rope.scale`]: number } |
| 82 | + | { [K in `${TArchitecture}.rope.scale_linear`]: number }; |
| 83 | + |
| 84 | +type ModelBase< |
| 85 | + TArchitecture extends |
| 86 | + | Architecture |
| 87 | + | `encoder.${Extract<Architecture, "whisper">}` |
| 88 | + | `decoder.${Extract<Architecture, "whisper">}`, |
| 89 | +> = |
| 90 | + | { [K in `${TArchitecture}.layer_count`]: number } |
| 91 | + | { [K in `${TArchitecture}.feed_forward_length`]: number } |
| 92 | + | { [K in `${TArchitecture}.context_length`]: number } |
| 93 | + | { [K in `${TArchitecture}.embedding_length`]: number } |
| 94 | + | { [K in `${TArchitecture}.block_count`]: number }; |
| 95 | + |
| 96 | +type MOE<TArchitecture extends Architecture> = |
| 97 | + | { [K in `${TArchitecture}.expert_count`]: number } |
| 98 | + | { [K in `${TArchitecture}.expert_used_count`]: number }; |
| 99 | + |
| 100 | +interface Tokenizer { |
| 101 | + "tokenizer.ggml.model": Architecture; |
| 102 | + "tokenizer.ggml.tokens": string[]; |
| 103 | + "tokenizer.ggml.scores": number[]; |
| 104 | + "tokenizer.ggml.token_type": number[]; |
| 105 | + "tokenizer.ggml.bos_token_id": number; |
| 106 | + "tokenizer.ggml.eos_token_id": number; |
| 107 | + "tokenizer.ggml.add_bos_token": boolean; |
| 108 | + "tokenizer.chat_template": string; |
| 109 | +} |
| 110 | + |
| 111 | +type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">; |
| 112 | +type TransformerLLM = ModelBase<TransformerLLMArchitecture> & |
| 113 | + MOE<TransformerLLMArchitecture> & |
| 114 | + Attention<TransformerLLMArchitecture> & |
| 115 | + Rope<TransformerLLMArchitecture>; |
| 116 | + |
| 117 | +export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; |
| 118 | +export type LLM = TransformerLLM | RWKV; |
| 119 | +export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">; |
| 120 | +export type Model = (LLM | Whisper) & Partial<Tokenizer>; |
| 121 | + |
| 122 | +export type GGUFMetadata = { |
| 123 | + version: Version; |
| 124 | + tensor_count: bigint; |
| 125 | + kv_count: bigint; |
| 126 | +} & Partial<General> & |
| 127 | + Partial<Model> & |
| 128 | + Record<string, MetadataValue>; |
| 129 | + |
| 130 | +export interface GGUFTensorInfo { |
| 131 | + name: string; |
| 132 | + n_dims: number; |
| 133 | + shape: bigint[]; |
| 134 | + dtype: GGMLQuantizationType; |
| 135 | + offset: bigint; |
| 136 | +} |
| 137 | + |
| 138 | +export interface GGUFParseOutput { |
| 139 | + metadata: GGUFMetadata; |
| 140 | + tensorInfos: GGUFTensorInfo[]; |
| 141 | +} |
0 commit comments