Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LoRA support #820

Merged
merged 15 commits into from
Apr 17, 2023
Merged
Prev Previous commit
Next Next commit
Rebase to master
  • Loading branch information
slaren committed Apr 16, 2023
commit 57627f0e5faa5bb6b8fd456ccbe1aaf83cff695f
58 changes: 31 additions & 27 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,34 @@ static void dequantize_row_q4_1(const void * restrict vx, float * restrict y, in
#endif
}

static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);

static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = {
.dequantize_row_q = dequantize_row_q4_0,
.quantize_row_q = quantize_row_q4_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
.quantize_row_q_dot = quantize_row_q8_0,
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
},
[GGML_TYPE_Q4_1] = {
.dequantize_row_q = dequantize_row_q4_1,
.quantize_row_q = quantize_row_q4_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
.quantize_row_q_dot = quantize_row_q4_1,
.vec_dot_q = ggml_vec_dot_q4_1,
},
// TODO: GGML_TYPE_Q8_0
};

// For internal test use
quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
GGML_ASSERT(i < GGML_TYPE_COUNT);
return quantize_fns[i];
}


//
// simd mappings
//
Expand Down Expand Up @@ -5910,12 +5938,12 @@ static void ggml_compute_forward_add_q_f32(
const int64_t ne03 = src0->ne[3];

//const int64_t ne10 = src1->ne[0];
const int64_t ne11 = src1->ne[1];
//const int64_t ne11 = src1->ne[1];
const int64_t ne12 = src1->ne[2];
const int64_t ne13 = src1->ne[3];

const int64_t ne0 = dst->ne[0];
const int64_t ne1 = dst->ne[1];
//const int64_t ne0 = dst->ne[0];
//const int64_t ne1 = dst->ne[1];
const int64_t ne2 = dst->ne[2];
const int64_t ne3 = dst->ne[3];

Expand Down Expand Up @@ -7307,30 +7335,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
//}
}

static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = {
.dequantize_row_q = dequantize_row_q4_0,
.quantize_row_q = quantize_row_q4_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
.quantize_row_q_dot = quantize_row_q8_0,
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
},
[GGML_TYPE_Q4_1] = {
.dequantize_row_q = dequantize_row_q4_1,
.quantize_row_q = quantize_row_q4_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
.quantize_row_q_dot = quantize_row_q4_1,
.vec_dot_q = ggml_vec_dot_q4_1,
},
// TODO: GGML_TYPE_Q8_0
};

// For internal test use
quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
GGML_ASSERT(i < GGML_TYPE_COUNT);
return quantize_fns[i];
}

static void ggml_compute_forward_mul_mat_q_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
Expand Down
4 changes: 2 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1896,8 +1896,8 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];

if (tensor->ne[0] != loraA->ne[1]) {
fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
if (tensor->ne[0] != loraA->ne[1] || tensor->ne[1] != loraB->ne[1]) {
fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
" are you sure that this adapter is for this model?\n", __func__, tensor->ne[0], loraA->ne[1]);
return 1;
}
Expand Down