diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b6dd1a4972c5..4263b4700b5b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kern set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF) set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") -option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIPBLAS "llama: use hipBLAS" ON) option(LLAMA_K_QUANTS "llama: use k-quants" ON) @@ -339,4 +339,3 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas") set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) target_link_libraries(${TARGET} PUBLIC ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) - diff --git a/Makefile b/Makefile index 12d63ec40eeee..6a4b998208810 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ endif # keep standard at C11 and C++11 CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -O3 -DNDEBUG -std=c++11 -fPIC +CXXFLAGS = -I. -I./examples -I./include -I./include/CL -I./otherarch -I./otherarch/tools -O3 -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS LDFLAGS = # these are used on windows, to build some libraries with extra old device compatibility @@ -53,7 +53,11 @@ NONECFLAGS = OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas CLBLAST_FLAGS = -DGGML_USE_CLBLAST FAILSAFE_FLAGS = -DUSE_FAILSAFE -CUBLAS_FLAGS = -DGGML_USE_CUBLAS +ifdef LLAMA_CUBLAS + CUBLAS_FLAGS = -DGGML_USE_CUBLAS +else + CUBLAS_FLAGS = +endif CUBLASLD_FLAGS = CUBLAS_OBJS = diff --git a/examples/common.cpp b/examples/common.cpp index 5addd10a13fe9..3278a064346b4 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -110,7 +110,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - params.seed = std::stoi(argv[i]); + params.seed = std::stoul(argv[i]); } else if (arg == "-t" || arg == "--threads") { if (++i >= argc) { invalid_param = true; diff --git a/examples/common.h b/examples/common.h index 9d213d6d04149..66e5672917996 100644 --- a/examples/common.h +++ b/examples/common.h @@ -22,7 +22,7 @@ int32_t get_num_physical_cores(); struct gpt_params { - int32_t seed = -1; // RNG seed + uint32_t seed = -1; // RNG seed int32_t n_threads = get_num_physical_cores(); int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 512; // context size diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 3cd5bb794957c..2b7eb39c51ff5 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -24,11 +24,11 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); - if (params.seed < 0) { + if (params.seed == LLAMA_DEFAULT_SEED) { params.seed = time(NULL); } - fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + fprintf(stderr, "%s: seed = %u\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.random_prompt) { diff --git a/examples/main/README.md b/examples/main/README.md index 9ba1eb3849b2e..37538613042b0 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -242,7 +242,7 @@ Example usage: `--logit-bias 29905-inf` ### RNG Seed -- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed). +- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, -1 = random seed). The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run. diff --git a/examples/main/main.cpp b/examples/main/main.cpp index bcdc98d611250..3a171925ba510 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -94,11 +94,11 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); - if (params.seed < 0) { + if (params.seed == LLAMA_DEFAULT_SEED) { params.seed = time(NULL); } - fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + fprintf(stderr, "%s: seed = %u\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.random_prompt) { diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index f8a6cb516d067..dd54ed3c4bd6c 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -136,11 +136,11 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); - if (params.seed < 0) { + if (params.seed == LLAMA_DEFAULT_SEED) { params.seed = time(NULL); } - fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + fprintf(stderr, "%s: seed = %u\n", __func__, params.seed); std::mt19937 rng(params.seed); if (params.random_prompt) { diff --git a/examples/server/README.md b/examples/server/README.md index fa95c00441bc2..ba4b2fec9d1df 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -152,7 +152,7 @@ node . `mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1). - `seed`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed). + `seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed). `ignore_eos`: Ignore end of stream token and continue generating (default: false). diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index a05881d1640e7..05bfa80167968 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -2768,7 +2768,7 @@ void train_print_usage(int /*argc*/, char ** argv, const struct train_params * p fprintf(stderr, " --checkpoint-in FNAME path from which to load training checkpoint (default '%s')\n", params->fn_checkpoint_in); fprintf(stderr, " --checkpoint-out FNAME path to save training checkpoint (default '%s')\n", params->fn_checkpoint_out); fprintf(stderr, " --model-out FNAME path to save ggml model (default '%s')\n", params->fn_model_out); - fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n"); + fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for -1)\n"); fprintf(stderr, " -c N, --ctx N Context size used during training (default %d)\n", params->n_ctx); fprintf(stderr, " --embd N Embedding size used for new models (default %d)\n", params->n_embd); fprintf(stderr, " --mult N Mult size used for new models, influences feedforward size. (default %d)\n", params->n_mult); @@ -3034,10 +3034,10 @@ int main(int argc, char ** argv) { return 1; } - if (params.seed < 0) { + if (params.seed == LLAMA_DEFAULT_SEED) { params.seed = time(NULL); } - printf("%s: seed: %d\n", __func__, params.seed); + printf("%s: seed: %u\n", __func__, params.seed); srand(params.seed); struct llama_context_params llama_params = llama_context_default_params(); diff --git a/klite.embd b/klite.embd index ba58cd79c3ee2..a00d2d94dc2b8 100644 --- a/klite.embd +++ b/klite.embd @@ -1,6 +1,6 @@