Full DynaTemp implementation + UI (#600)

* move Dynatemp changes to new branch * fix float header * Properly reintroduce variable expert count Controllable through experts.txt * first pass at DynaTemp UI Checkbox partial implemented, Min and Max Temp implemented * DynaTemp UI Checkbox Trigger DynaTemp on checkbox * DynaTemp UI checkbox edition Hell Yeah! DynaTemp! * Remove greedy dynatemp * Fix race condition caused by debug print * Fixed broken presets and miro Fixes broken presets and mirostat * Remove debug function + HHI temp Also removed unnecessary softmax double precision * Fix whitespace (?) for generate function * epic upstream renaming scheme fix * fix stupid indents * Other cleanup Reintroduce unused rep pen function, move temp functions first before entropy dynamic temp * Slight indent fix * revert batch pyinstaller maker to mainline and also delete experts.txt since adjustable routing is also being removed for the PR * compact dynatemp into a single value dynatemp_range. This is a float which represents the allowed deviation from the min and max temperature when using dynatemp. Thus, if we want a value of dynatemp_min=0.3, dynatemp_max=0.5, then we would simply set temperature=0.4 and dynatemp_range=0.1. Functionally dynatemp would operate the same, but it would simplify usage and make it a single easy to adjust value. --------- Co-authored-by: Alexander Abushady <aabushady214@gmail.com> Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
LostRuins · Jan 6, 2024 · 123bff9 · 123bff9
1 parent 427ba21
commit 123bff9
Show file tree

Hide file tree

Showing 9 changed files with 132 additions and 8 deletions.
diff --git a/common/common.h b/common/common.h
@@ -80,6 +80,10 @@ struct gpt_params {
     int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float   mirostat_tau      = 5.00f; // target entropy
     float   mirostat_eta      = 0.10f; // learning rate
+
+    // DynaTemp!
+    float   dynatemp_range     = 0.0f;  // enables DynaTemp if greater than 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range
+
     // // sampling parameters
     struct llama_sampling_params sparams;
 

diff --git a/common/sampling.h b/common/sampling.h
@@ -25,6 +25,7 @@ typedef struct llama_sampling_params {
     int32_t     mirostat              = 0;        // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float       mirostat_tau          = 5.00f;    // target entropy
     float       mirostat_eta          = 0.10f;    // learning rate
+    bool        dynatemp_range        = 0.00f;    // dynamic temperature range
     bool        penalize_nl           = true;     // consider newlines as a repeatable token
     std::string samplers_sequence     = "kfypmt"; // top_k, tail_free, typical_p, top_p, min_p, temp
 

diff --git a/expose.h b/expose.h
@@ -81,7 +81,9 @@ struct generation_inputs
     const char * grammar;
     const bool grammar_retain_state;
     const bool quiet = false;
+    const float dynatemp_range = 0.0f;
     const logit_bias logit_biases[logit_bias_max];
+
 };
 struct generation_outputs
 {

diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -482,7 +482,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
 }
 
 int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng,
-int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar)
+int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar, float dynatemp_range)
 {
     int id = 0;
     std::vector<llama_token_data> candidates;
@@ -541,7 +541,19 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers
                     llama_sample_typical(nullptr, &candidates_p, typical_p,1);
                     break;
                 case KCPP_SAMPLER_TEMP:
-                    sample_temperature(&candidates_p, temp);
+                    if (dynatemp_range>0)
+                    {
+                        float dynatemp_min = temp - dynatemp_range;
+                        float dynatemp_max = temp + dynatemp_range;
+                        //do not allow negative values
+                        dynatemp_min = dynatemp_min<0?0:dynatemp_min;
+                        dynatemp_max = dynatemp_max<0?0:dynatemp_max;
+                        llama_sample_entropy(nullptr, &candidates_p, temp, dynatemp_min, dynatemp_max);
+                    }
+                    else
+                    {
+                        sample_temperature(&candidates_p, temp);
+                    }
                     break;
                 case KCPP_SAMPLER_REP_PEN:
                     sample_rep_pen(n_ctx, rep_pen_range, rep_pen, presence_penalty, &candidates_p);
@@ -1480,6 +1492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     }
 
     std::string addedmemory = inputs.memory;
+
     kcpp_params->prompt = inputs.prompt;
     kcpp_params->seed = inputs.seed;
     kcpp_params->n_predict = inputs.max_length;
@@ -1495,10 +1508,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     kcpp_params->mirostat = inputs.mirostat;
     kcpp_params->mirostat_eta = inputs.mirostat_eta;
     kcpp_params->mirostat_tau = inputs.mirostat_tau;
+    kcpp_params->dynatemp_range = inputs.dynatemp_range;
     kcpp_params->n_ctx = inputs.max_context_length;
     kcpp_params->n_batch = n_batch;
     kcpp_params->n_threads = n_threads;
     kcpp_params->n_threads_batch = n_blasthreads;
+
     bool stream_sse = inputs.stream_sse;
 
     bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
@@ -1889,6 +1904,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
             const float presence_penalty = kcpp_params->presence_penalty;
             const float typical_p = kcpp_params->typical_p;
             const float tfs_z = kcpp_params->tfs_z;
+            const float dynatemp_range = kcpp_params->dynatemp_range;
 
             if (!startedsampling)
             {
@@ -1944,7 +1960,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
 
             id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, presence_penalty,
             top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng,
-            kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar);
+            kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range);
 
             if (grammar != nullptr) {
                 grammar_accept_token(file_format, n_vocab, grammar, id);

diff --git a/kcpp_docs.embd b/kcpp_docs.embd
@@ -139,6 +139,12 @@
                            "description": "If true, prevents the EOS token from being generated (Ban EOS). For unbantokens, set this to false.",
                            "type": "boolean"
                         },
+                        "dynatemp_range": {
+                           "default": 0,
+                           "description": "If greater than 0, uses dynamic temperature. Dynamic temperature range will be between Temp+Range and Temp-Range. If less or equal to 0 , uses static temperature.",
+                           "exclusiveMinimum": 0,
+                           "type": "number"
+                        },
                         "mirostat": {
                            "description": "KoboldCpp ONLY. Sets the mirostat mode, 0=disabled, 1=mirostat_v1, 2=mirostat_v2",
                            "minimum": 0,
@@ -876,4 +882,4 @@
 
 </body>
 
-</html>
+</html>