Skip to content

Commit a17bcdf

Browse files
ngxsonarthw
authored andcommitted
examples : Fix llama-export-lora example (ggml-org#8607)
* fix export-lora example * add more logging * reject merging subset * better check * typo
1 parent b7e8bad commit a17bcdf

File tree

5 files changed

+351
-409
lines changed

5 files changed

+351
-409
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1322,7 +1322,7 @@ llama-finetune: examples/finetune/finetune.cpp \
13221322
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
13231323

13241324
llama-export-lora: examples/export-lora/export-lora.cpp \
1325-
$(OBJ_GGML) common/log.h
1325+
$(OBJ_ALL)
13261326
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13271327
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
13281328

common/common.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -694,11 +694,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
694694
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
695695
return true;
696696
}
697-
if (arg == "--lora-base") {
698-
CHECK_ARG
699-
params.lora_base = argv[i];
700-
return true;
701-
}
702697
if (arg == "--control-vector") {
703698
CHECK_ARG
704699
params.control_vectors.push_back({ 1.0f, argv[i], });
@@ -1274,6 +1269,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
12741269
CHECK_ARG
12751270
params.out_file = argv[i];
12761271
params.cvector_outfile = argv[i];
1272+
params.lora_outfile = argv[i];
12771273
return true;
12781274
}
12791275
if (arg == "-ofreq" || arg == "--output-frequency") {
@@ -1583,9 +1579,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
15831579
options.push_back({ "*", " --override-kv KEY=TYPE:VALUE",
15841580
"advanced option to override model metadata by key. may be specified multiple times.\n"
15851581
"types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false" });
1586-
options.push_back({ "*", " --lora FNAME", "apply LoRA adapter (implies --no-mmap)" });
1587-
options.push_back({ "*", " --lora-scaled FNAME S", "apply LoRA adapter with user defined scaling S (implies --no-mmap)" });
1588-
options.push_back({ "*", " --lora-base FNAME", "optional model to use as a base for the layers modified by the LoRA adapter" });
1582+
options.push_back({ "*", " --lora FNAME", "apply LoRA adapter (can be repeated to use multiple adapters)" });
1583+
options.push_back({ "*", " --lora-scaled FNAME S", "apply LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)" });
15891584
options.push_back({ "*", " --control-vector FNAME", "add a control vector\n"
15901585
"note: this argument can be repeated to add multiple control vectors" });
15911586
options.push_back({ "*", " --control-vector-scaled FNAME SCALE",
@@ -1676,6 +1671,13 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
16761671
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
16771672
options.push_back({ "cvector", " --method {pca,mean}", "dimensionality reduction method to be used (default: pca)" });
16781673

1674+
options.push_back({ "export-lora" });
1675+
options.push_back({ "export-lora", "-m, --model", "model path from which to load base model (default '%s')", params.model.c_str() });
1676+
options.push_back({ "export-lora", " --lora FNAME", "path to LoRA adapter (can be repeated to use multiple adapters)" });
1677+
options.push_back({ "export-lora", " --lora-scaled FNAME S", "path to LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)" });
1678+
options.push_back({ "*", "-t, --threads N", "number of threads to use during computation (default: %d)", params.n_threads });
1679+
options.push_back({ "export-lora", "-o, --output FNAME", "output file (default: '%s')", params.lora_outfile.c_str() });
1680+
16791681
printf("usage: %s [options]\n", argv[0]);
16801682

16811683
for (const auto & o : options) {
@@ -3166,7 +3168,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
31663168
}
31673169
fprintf(stream, " - %s: %f\n", std::get<0>(la).c_str(), std::get<1>(la));
31683170
}
3169-
fprintf(stream, "lora_base: %s\n", params.lora_base.c_str());
31703171
fprintf(stream, "main_gpu: %d # default: 0\n", params.main_gpu);
31713172
fprintf(stream, "min_keep: %d # default: 0 (disabled)\n", sparams.min_keep);
31723173
fprintf(stream, "mirostat: %d # default: 0 (disabled)\n", sparams.mirostat);

common/common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ struct gpt_params {
128128

129129
// TODO: avoid tuple, use struct
130130
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
131-
std::string lora_base = ""; // base model path for the lora adapter
132131

133132
std::vector<llama_control_vector_load_info> control_vectors; // control vector with user defined scale
134133

@@ -255,6 +254,8 @@ struct gpt_params {
255254
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
256255

257256
bool spm_infill = false; // suffix/prefix/middle pattern for infill
257+
258+
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
258259
};
259260

260261
void gpt_params_handle_hf_token(gpt_params & params);

examples/export-lora/README.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,11 @@ Apply LORA adapters to base model and export the resulting model.
66
usage: llama-export-lora [options]
77
88
options:
9-
-h, --help show this help message and exit
10-
-m FNAME, --model-base FNAME model path from which to load base model (default '')
11-
-o FNAME, --model-out FNAME path to save exported model (default '')
12-
-l FNAME, --lora FNAME apply LoRA adapter
13-
-s FNAME S, --lora-scaled FNAME S apply LoRA adapter with user defined scaling S
14-
-t N, --threads N number of threads to use during computation (default: 4)
9+
-m, --model model path from which to load base model (default '')
10+
--lora FNAME path to LoRA adapter (can be repeated to use multiple adapters)
11+
--lora-scaled FNAME S path to LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)
12+
-t, --threads N number of threads to use during computation (default: 4)
13+
-o, --output FNAME output file (default: 'ggml-lora-merged-f16.gguf')
1514
```
1615

1716
For example:
@@ -20,7 +19,7 @@ For example:
2019
./bin/llama-export-lora \
2120
-m open-llama-3b-v2-q8_0.gguf \
2221
-o open-llama-3b-v2-q8_0-english2tokipona-chat.gguf \
23-
-l lora-open-llama-3b-v2-q8_0-english2tokipona-chat-LATEST.bin
22+
--lora lora-open-llama-3b-v2-q8_0-english2tokipona-chat-LATEST.bin
2423
```
2524

26-
Multiple LORA adapters can be applied by passing multiple `-l FN` or `-s FN S` command line parameters.
25+
Multiple LORA adapters can be applied by passing multiple `--lora FNAME` or `--lora-scaled FNAME S` command line parameters.

0 commit comments

Comments
 (0)