diff --git a/common/common.h b/common/common.h index a10f65aac4d3b..9809d2bf1d41a 100644 --- a/common/common.h +++ b/common/common.h @@ -80,6 +80,10 @@ struct gpt_params { int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 float mirostat_tau = 5.00f; // target entropy float mirostat_eta = 0.10f; // learning rate + + // DynaTemp! + float dynatemp_range = 0.0f; // enables DynaTemp if greater than 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range + // // sampling parameters struct llama_sampling_params sparams; diff --git a/common/sampling.h b/common/sampling.h index f16ef97e34a10..f3898a33c72c2 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -25,6 +25,7 @@ typedef struct llama_sampling_params { int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 float mirostat_tau = 5.00f; // target entropy float mirostat_eta = 0.10f; // learning rate + bool dynatemp_range = 0.00f; // dynamic temperature range bool penalize_nl = true; // consider newlines as a repeatable token std::string samplers_sequence = "kfypmt"; // top_k, tail_free, typical_p, top_p, min_p, temp diff --git a/expose.h b/expose.h index e97d47e080840..55d815e890a31 100644 --- a/expose.h +++ b/expose.h @@ -81,7 +81,9 @@ struct generation_inputs const char * grammar; const bool grammar_retain_state; const bool quiet = false; + const float dynatemp_range = 0.0f; const logit_bias logit_biases[logit_bias_max]; + }; struct generation_outputs { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 12db20b201be4..b6660c2408dd6 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -482,7 +482,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar } int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng, -int mirostat, float mirostat_tau, float mirostat_eta, const std::vector & sampler_order, llama_grammar * grammar) +int mirostat, float mirostat_tau, float mirostat_eta, const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range) { int id = 0; std::vector candidates; @@ -541,7 +541,19 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector0) + { + float dynatemp_min = temp - dynatemp_range; + float dynatemp_max = temp + dynatemp_range; + //do not allow negative values + dynatemp_min = dynatemp_min<0?0:dynatemp_min; + dynatemp_max = dynatemp_max<0?0:dynatemp_max; + llama_sample_entropy(nullptr, &candidates_p, temp, dynatemp_min, dynatemp_max); + } + else + { + sample_temperature(&candidates_p, temp); + } break; case KCPP_SAMPLER_REP_PEN: sample_rep_pen(n_ctx, rep_pen_range, rep_pen, presence_penalty, &candidates_p); @@ -1480,6 +1492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o } std::string addedmemory = inputs.memory; + kcpp_params->prompt = inputs.prompt; kcpp_params->seed = inputs.seed; kcpp_params->n_predict = inputs.max_length; @@ -1495,10 +1508,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o kcpp_params->mirostat = inputs.mirostat; kcpp_params->mirostat_eta = inputs.mirostat_eta; kcpp_params->mirostat_tau = inputs.mirostat_tau; + kcpp_params->dynatemp_range = inputs.dynatemp_range; kcpp_params->n_ctx = inputs.max_context_length; kcpp_params->n_batch = n_batch; kcpp_params->n_threads = n_threads; kcpp_params->n_threads_batch = n_blasthreads; + bool stream_sse = inputs.stream_sse; bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1; @@ -1889,6 +1904,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o const float presence_penalty = kcpp_params->presence_penalty; const float typical_p = kcpp_params->typical_p; const float tfs_z = kcpp_params->tfs_z; + const float dynatemp_range = kcpp_params->dynatemp_range; if (!startedsampling) { @@ -1944,7 +1960,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, presence_penalty, top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng, - kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar); + kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range); if (grammar != nullptr) { grammar_accept_token(file_format, n_vocab, grammar, id); diff --git a/kcpp_docs.embd b/kcpp_docs.embd index 1e5ca85d26ff9..e6034f0d76001 100644 --- a/kcpp_docs.embd +++ b/kcpp_docs.embd @@ -139,6 +139,12 @@ "description": "If true, prevents the EOS token from being generated (Ban EOS). For unbantokens, set this to false.", "type": "boolean" }, + "dynatemp_range": { + "default": 0, + "description": "If greater than 0, uses dynamic temperature. Dynamic temperature range will be between Temp+Range and Temp-Range. If less or equal to 0 , uses static temperature.", + "exclusiveMinimum": 0, + "type": "number" + }, "mirostat": { "description": "KoboldCpp ONLY. Sets the mirostat mode, 0=disabled, 1=mirostat_v1, 2=mirostat_v2", "minimum": 0, @@ -876,4 +882,4 @@ - \ No newline at end of file + diff --git a/klite.embd b/klite.embd index 195fe0f0c0b54..954fbf3d0ad4c 100644 --- a/klite.embd +++ b/klite.embd @@ -6,7 +6,7 @@ It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. -Current version: 103 +Current version: 104 -Concedo --> @@ -3404,6 +3404,7 @@ Current version: 103 rep_pen_range: 320, rep_pen_slope: 0.7, temperature: 0.7, + dynatemp_range: 0.0, top_p: 0.92, min_p: 0.00, presence_penalty: 0.00, @@ -3426,6 +3427,7 @@ Current version: 103 preset: "[Default]", description: "Known Working Settings.", temp: defaultsettings.temperature, + dynatemp_range: defaultsettings.dynatemp_range, genamt: defaultsettings.max_length, top_k: defaultsettings.top_k, top_p: defaultsettings.top_p, @@ -3443,6 +3445,7 @@ Current version: 103 preset: "Inverted Mirror", description: "Good defaults with a different sampler order.", temp: defaultsettings.temperature, + dynatemp_range: defaultsettings.dynatemp_range, genamt: defaultsettings.max_length, top_k: defaultsettings.top_k, top_p: defaultsettings.top_p, @@ -3456,7 +3459,7 @@ Current version: 103 rep_pen_slope: defaultsettings.rep_pen_slope, sampler_order: [0, 1, 2, 3, 4, 5, 6] }, - {"preset":"Godlike","description":"Makes AI give a descriptive and sensual output.","temp":0.7,"genamt":120,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"Mayday","description":"Wacky plot, creativity from AI, crazy stories you want AI to weird out.","temp":1.05,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Good Winds","description":"Let AI direct the plot, but still stay logical.","temp":0.7,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Liminal Drift","description":"Drives coherent dialogue, responses, and behavior, sometimes surreal situations arise based on information already present in the story.","temp":0.66,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]},{"preset":"TavernAI","description":"Preset used in TavernAI.","temp":0.79,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.95,"rep_pen":1.19,"rep_pen_range":1024,"rep_pen_slope":0.9,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Storywriter 6B","description":"Optimized settings for relevant output.","genamt":120,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4],"temp":0.72,"tfs":1,"top_a":0,"top_k":0,"top_p":0.73,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Coherent Creativity 6B","description":"A good balance between coherence, creativity, and quality of prose.","genamt":120,"rep_pen":1.2,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.51,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Luna Moth 6B","description":"A great degree of creativity without losing coherency.","temp":1.5,"genamt":120,"top_k":85,"top_p":0.24,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pleasing Results 6B","description":"Expectable output with alternative context settings.","temp":0.44,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":6.8,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Genesis 13B","description":"Stable and logical, but with scattered creativity.","temp":0.63,"genamt":120,"top_k":0,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.98,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,2,0,3,5,1,4]},{"preset":"Basic Coherence 13B","description":"Keep things on track.","temp":0.59,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.87,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.3,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Ouroboros 13B","description":"Versatile, conforms well to poems, lists, chat, etc.","temp":1.07,"genamt":120,"top_k":100,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.93,"rep_pen":1.05,"rep_pen_range":404,"rep_pen_slope":0.8,"sampler_order":[6,0,5,3,2,1,4]},{"preset":"Ace of Spades 13B","description":"Expressive, while still staying focused.","temp":1.15,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.8,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":7,"sampler_order":[6,3,2,0,5,1,4]},{"preset":"Low Rider 13B","description":"Reliable, aimed at story development.","temp":0.94,"genamt":120,"top_k":12,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.94,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pro Writer 13B","description":"Optimal setting for readability, based on AI-powered mass statistical analysis of Euterpe output.","temp":1.35,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.69,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,3,2,5,0,1,4]},{"preset":"Default 20B","description":"Good starting settings for NeoX 20B.","temp":0.6,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.04,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"genamt":120,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]} + {"preset":"Godlike","description":"Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"Mayday","description":"Wacky plot, creativity from AI, crazy stories you want AI to weird out.","temp":1.05,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Good Winds","description":"Let AI direct the plot, but still stay logical.","temp":0.7,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Liminal Drift","description":"Drives coherent dialogue, responses, and behavior, sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]},{"preset":"TavernAI","description":"Preset used in TavernAI.","temp":0.79,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.95,"rep_pen":1.19,"rep_pen_range":1024,"rep_pen_slope":0.9,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Storywriter 6B","description":"Optimized settings for relevant output.","genamt":120,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4],"temp":0.72,"dynatemp_range":0.0,"tfs":1,"top_a":0,"top_k":0,"top_p":0.73,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Coherent Creativity 6B","description":"A good balance between coherence, creativity, and quality of prose.","genamt":120,"rep_pen":1.2,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.51,"dynatemp_range":0.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Luna Moth 6B","description":"A great degree of creativity without losing coherency.","temp":1.5,"dynatemp_range":0.0,"genamt":120,"top_k":85,"top_p":0.24,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pleasing Results 6B","description":"Expectable output with alternative context settings.","temp":0.44,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":6.8,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Genesis 13B","description":"Stable and logical, but with scattered creativity.","temp":0.63,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.98,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,2,0,3,5,1,4]},{"preset":"Basic Coherence 13B","description":"Keep things on track.","temp":0.59,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.87,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.3,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Ouroboros 13B","description":"Versatile, conforms well to poems, lists, chat, etc.","temp":1.07,"dynatemp_range":0.0,"genamt":120,"top_k":100,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.93,"rep_pen":1.05,"rep_pen_range":404,"rep_pen_slope":0.8,"sampler_order":[6,0,5,3,2,1,4]},{"preset":"Ace of Spades 13B","description":"Expressive, while still staying focused.","temp":1.15,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.8,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":7,"sampler_order":[6,3,2,0,5,1,4]},{"preset":"Low Rider 13B","description":"Reliable, aimed at story development.","temp":0.94,"dynatemp_range":0.0,"genamt":120,"top_k":12,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.94,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pro Writer 13B","description":"Optimal setting for readability, based on AI-powered mass statistical analysis of Euterpe output.","temp":1.35,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.69,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,3,2,5,0,1,4]},{"preset":"Default 20B","description":"Good starting settings for NeoX 20B.","temp":0.6,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.04,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]} ]; function polyfills() @@ -7253,6 +7256,7 @@ Current version: 103 document.getElementById("instruct_starttag").value = localsettings.instruct_starttag; document.getElementById("instruct_endtag").value = localsettings.instruct_endtag; document.getElementById("min_p").value = localsettings.min_p; + document.getElementById("dynatemp_range").value = localsettings.dynatemp_range; document.getElementById("presence_penalty").value = localsettings.presence_penalty; document.getElementById("sampler_seed").value = localsettings.sampler_seed; document.getElementById("top_k").value = localsettings.top_k; @@ -7371,6 +7375,7 @@ Current version: 103 document.getElementById("max_length").value = document.getElementById("max_length_slide").value = found.genamt; document.getElementById("presence_penalty").value = found.presence_penalty; document.getElementById("min_p").value = found.min_p; + document.getElementById("dynatemp_range").value = found.dynatemp_range; document.getElementById("top_k").value = found.top_k; document.getElementById("top_p").value = document.getElementById("top_p_slide").value = found.top_p; document.getElementById("top_a").value = found.top_a; @@ -7505,6 +7510,7 @@ Current version: 103 } localsettings.sampler_seed = document.getElementById("sampler_seed").value; localsettings.min_p = document.getElementById("min_p").value; + localsettings.dynatemp_range = document.getElementById("dynatemp_range").value; localsettings.presence_penalty = document.getElementById("presence_penalty").value; localsettings.top_k = document.getElementById("top_k").value; localsettings.top_a = document.getElementById("top_a").value; @@ -7569,6 +7575,7 @@ Current version: 103 localsettings.rep_pen_slope = cleannum(localsettings.rep_pen_slope, 0, 20); localsettings.top_p = cleannum(localsettings.top_p, 0.002, 1); localsettings.min_p = cleannum(localsettings.min_p, 0.0, 1); + localsettings.dynatemp_range = cleannum(localsettings.dynatemp_range, 0.0, 2.0); localsettings.presence_penalty = cleannum(localsettings.presence_penalty, -2, 2); localsettings.top_k = cleannum(Math.floor(localsettings.top_k), 0, 300); localsettings.top_a = cleannum(localsettings.top_a, 0, 1); @@ -8800,6 +8807,7 @@ Current version: 103 //also supports min_p, in that it wont crash, so add it on. it will be ignored if not found submit_payload.params.min_p = localsettings.min_p; + submit_payload.params.dynatemp_range = localsettings.dynatemp_range; } //presence pen and logit bias for OAI and newer kcpp if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="") @@ -12491,6 +12499,7 @@ Current version: 103 Seed Min-P PrPen. + DyTmp.R @@ -12500,6 +12509,8 @@ Current version: 103 id="min_p"> + diff --git a/koboldcpp.py b/koboldcpp.py index 55e15415aac24..891c89774036b 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -78,6 +78,7 @@ class generation_inputs(ctypes.Structure): ("grammar", ctypes.c_char_p), ("grammar_retain_state", ctypes.c_bool), ("quiet", ctypes.c_bool), + ("dynatemp_range", ctypes.c_float), ("logit_biases", logit_bias * logit_bias_max)] class generation_outputs(ctypes.Structure): @@ -310,7 +311,7 @@ def load_model(model_filename): ret = handle.load_model(inputs) return ret -def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, logit_biases={}): +def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, logit_biases={}): global maxctx, args, currentusergenkey, totalgens inputs = generation_inputs() outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs)) @@ -338,6 +339,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu inputs.presence_penalty = presence_penalty inputs.stream_sse = stream_sse inputs.quiet = quiet + inputs.dynatemp_range = dynatemp_range inputs.grammar = grammar.encode("UTF-8") inputs.grammar_retain_state = grammar_retain_state inputs.unban_tokens_rt = not use_default_badwordsids @@ -547,7 +549,9 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat genkey=genparams.get('genkey', ''), trimstop=genparams.get('trim_stop', False), quiet=is_quiet, - logit_biases=genparams.get('logit_bias', {})) + dynatemp_range=genparams.get('dynatemp_range', 0.0), + logit_biases=genparams.get('logit_bias', {}) + ) recvtxt = "" if stream_flag: diff --git a/llama.cpp b/llama.cpp index a57437dd52934..cae57c409525e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8510,10 +8510,81 @@ void llama_sample_temp(struct llama_context * ctx, llama_token_data_array * cand } } + void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp) { llama_sample_temp(ctx, candidates_p, temp); } +void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp, float min_temp = 0, float max_temp = 2.0f) { + const int64_t t_start_sample_us = ggml_time_us(); + + llama_sample_softmax(ctx, candidates_p); + + float exponent_val = 1.0f; + + // Calculate entropy of the softmax probabilities + float entropy = 0.0f; + for (size_t i = 0; i < candidates_p->size; ++i) { + float prob = candidates_p->data[i].p; + if (prob > 0.0f) { // Ensure no log(0) + entropy -= prob * logf(prob); + } + } + + // Calculate maximum possible entropy + float max_entropy = -logf(1.0f / candidates_p->size); + + // Guard against division by zero + if (max_entropy == 0.0f) { + max_entropy = 1.0f; // This ensures that normalized_entropy will be 0 when entropy is 0 + } + + // Normalize the entropy + float normalized_entropy = entropy / max_entropy; + + // Map the normalized entropy to the desired temperature range using the power function + float dyn_temp = min_temp + (max_temp - min_temp) * powf(normalized_entropy, exponent_val); + + //todo: Ensure to hide print statements unless debugging! + printf("Your text maxtemp value is: %f\n", max_temp); + // Print the variables + printf("Entropy: %f\n", entropy); + printf("Max Possible Entropy: %f\n", max_entropy); + printf("Normalized Entropy: %f\n", normalized_entropy); + printf("Exponent: %f\n", exponent_val); + printf("Dynamic Temperature (dyn_temp): %f\n", dyn_temp); + + // Apply the dynamically calculated temperature scaling + for (size_t i = 0; i < candidates_p->size; ++i) { + candidates_p->data[i].logit /= dyn_temp; + } + + // Re-compute softmax probabilities after scaling logits with dynamic temperature + double max_l_double = candidates_p->data[0].logit; + double cum_sum_double = 0.0; + for (size_t i = 0; i < candidates_p->size; ++i) { + double p = exp(candidates_p->data[i].logit - max_l_double); + candidates_p->data[i].p = p; // Store the scaled probability + cum_sum_double += p; + } + for (size_t i = 0; i < candidates_p->size; ++i) { + candidates_p->data[i].p /= cum_sum_double; // Re-normalize the probabilities + } + + //todo: Ensure to hide print statements unless debugging! + // Print the updated top 25 probabilities after temperature scaling + printf("\nUpdated Top 25 Probabilities After Dynamic Temperature Scaling (in percentages):\n"); + for (size_t i = 0; i < 25 && i < candidates_p->size; ++i) { + printf("Token %zu: %f%%\n", i + 1, candidates_p->data[i].p * 100.0f); + } + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +// The llama.cpp repetition penalty code goes unused in kobold's API + void llama_sample_repetition_penalties( struct llama_context * ctx, llama_token_data_array * candidates, diff --git a/llama.h b/llama.h index 0d6a25ba0cdc0..aedc748282c66 100644 --- a/llama.h +++ b/llama.h @@ -723,6 +723,15 @@ extern "C" { float p, size_t min_keep); + /// @details DYNATEMP! #TODO KALO + LLAMA_API void llama_sample_entropy( + struct llama_context* ctx, + llama_token_data_array* candidates, + float p, + size_t min_keep, + float min_temp, + float max_temp); + /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. LLAMA_API void llama_sample_tail_free( struct llama_context * ctx,