Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions DynaTemp.txt

This file was deleted.

3 changes: 2 additions & 1 deletion class.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ def _raw_generate(
genresult = koboldcpp.generate(decoded_prompt,max_new,utils.koboldai_vars.max_length,
gen_settings.temp,int(gen_settings.top_k),gen_settings.top_a,gen_settings.top_p,
gen_settings.typical,gen_settings.tfs,gen_settings.rep_pen,gen_settings.rep_pen_range,
sampler_order=gen_settings.sampler_order,use_default_badwordsids=utils.koboldai_vars.use_default_badwordsids)
sampler_order=gen_settings.sampler_order,use_default_badwordsids=utils.koboldai_vars.use_default_badwordsids,
min_temp=gen_settings.min_temp, max_temp=gen_settings.max_temp, k=gen_settings.k, scp=gen_settings.scp, exponent_val=gen_settings.exponent_val)

outputs = [genresult]
return GenerationResult(
Expand Down
6 changes: 6 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ struct gpt_params {
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
float mirostat_tau = 5.00f; // target entropy
float mirostat_eta = 0.10f; // learning rate
// DynaTemp!
float min_temp = 0.00f; // minimum temperature
float max_temp = 2.00f; // maximum temperature
float k = 25.0f; //
float scp = 0.75f; // sigmoid center point
float exponent_val = 2.00f; // exponent value
// // sampling parameters
struct llama_sampling_params sampling_params;

Expand Down
5 changes: 5 additions & 0 deletions common/sampling.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ typedef struct llama_sampling_params {
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
float mirostat_tau = 5.00f; // target entropy
float mirostat_eta = 0.10f; // learning rate
float min_temp = 0.00f; // minimum temperature
float max_temp = 2.00f; // maximum temperature
float k = 25.00f;
float scp = 0.75f; // sigmoid center point
float exponent_val = 2.00f; // exponent value

bool penalize_nl = true; // consider newlines as a repeatable token

Expand Down
5 changes: 5 additions & 0 deletions expose.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ struct generation_inputs
const bool stream_sse;
const char * grammar;
const bool grammar_retain_state;
const float min_temp;
const float max_temp;
const float k;
const float scp;
const float exponent_val;
};
struct generation_outputs
{
Expand Down
19 changes: 12 additions & 7 deletions gpttype_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,18 +374,18 @@ void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, llama_token_dat
last_n_repeat, rep_pen);
}

void sample_temperature(llama_token_data_array * candidates_p, float temp)
void sample_temperature(llama_token_data_array * candidates_p, float temp, float minTemp, float maxTemp, float k, float sigmoidCenterPoint, float exponentVal)
{
if (temp <= 0)
{
// Imitate greedy sampling
temp = 0.00390625f; //cannot be zero else div0, this is 1/256
llama_sample_temperature(nullptr, candidates_p, temp);
llama_sample_temperature(nullptr, candidates_p, temp, minTemp, maxTemp, k, sigmoidCenterPoint, exponentVal);
llama_sample_top_k(nullptr, candidates_p, 1, 1); //only want first candidate
}
else
{
llama_sample_temperature(nullptr, candidates_p, temp);
llama_sample_temperature(nullptr, candidates_p, temp, minTemp, maxTemp, k, sigmoidCenterPoint, exponentVal);
}
}

Expand Down Expand Up @@ -429,7 +429,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
}

int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float top_k, float top_a, float top_p, float typical_p, float tfs, float temp, std::mt19937 & rng,
int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar)
int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar, float minTemp, float maxTemp, float k, float sigmoidCenterPoint, float exponentVal)
{
int id = 0;
std::vector<llama_token_data> candidates;
Expand All @@ -449,7 +449,7 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers
static float mirostat_mu = 2.0f * mirostat_tau;
const int mirostat_m = 100;
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, &candidates_p);
sample_temperature(&candidates_p, temp);
sample_temperature(&candidates_p, temp, minTemp, maxTemp, k, sigmoidCenterPoint, exponentVal);
if (mirostat == 1)
{
id = sample_token_mirostat(n_vocab, &candidates_p, rng, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
Expand Down Expand Up @@ -481,7 +481,7 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers
llama_sample_typical(nullptr, &candidates_p, typical_p,1);
break;
case KCPP_SAMPLER_TEMP:
sample_temperature(&candidates_p, temp);
sample_temperature(&candidates_p, temp, minTemp, maxTemp, k, sigmoidCenterPoint, exponentVal);
break;
case KCPP_SAMPLER_REP_PEN:
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, &candidates_p);
Expand Down Expand Up @@ -1281,6 +1281,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
params.mirostat = inputs.mirostat;
params.mirostat_eta = inputs.mirostat_eta;
params.mirostat_tau = inputs.mirostat_tau;
params.min_temp = inputs.min_temp;
params.max_temp = inputs.max_temp;
params.k = inputs.k;
params.scp = inputs.scp;
params.exponent_val = inputs.exponent_val;
params.n_ctx = inputs.max_context_length;
params.n_batch = n_batch;
params.n_threads = n_threads;
Expand Down Expand Up @@ -1677,7 +1682,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o

id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty,
top_k, top_a, top_p, typical_p, tfs_z, temp, rng,
params.mirostat, params.mirostat_tau, params.mirostat_eta, sampler_order, grammar);
params.mirostat, params.mirostat_tau, params.mirostat_eta, sampler_order, grammar, params.min_temp, params.max_temp, params.k, params.scp, params.exponent_val);

if (grammar != nullptr) {
grammar_accept_token(file_format, n_vocab, grammar, id);
Expand Down
61 changes: 59 additions & 2 deletions klite.embd
Original file line number Diff line number Diff line change
Expand Up @@ -3301,6 +3301,13 @@ Current version: 84
miro_tau: 5.0,
miro_eta: 0.1,
sampler_order: [6, 0, 1, 3, 4, 2, 5],

// DynaTemp
min_temp: 0,
max_temp: 2,
k: 25,
scp: 0.75,
exponent_val: 2.0
};

var defaultsettings = JSON.parse(JSON.stringify(localsettings));
Expand Down Expand Up @@ -6698,6 +6705,11 @@ Current version: 84
document.getElementById("instruct_starttag").value = localsettings.instruct_starttag;
document.getElementById("instruct_endtag").value = localsettings.instruct_endtag;
document.getElementById("top_k").value = localsettings.top_k;
document.getElementById("min_temp").value = localsettings.min_temp;
document.getElementById("max_temp").value = localsettings.max_temp;
document.getElementById("k").value = localsettings.k;
document.getElementById("scp").value = localsettings.scp;
document.getElementById("exponent_val").value = localsettings.exponent_val;
document.getElementById("top_a").value = localsettings.top_a;
document.getElementById("typ_s").value = localsettings.typ_s;
document.getElementById("tfs_s").value = localsettings.tfs_s;
Expand Down Expand Up @@ -6947,6 +6959,11 @@ Current version: 84
localsettings.miro_type = document.getElementById("miro_type").value;
localsettings.miro_tau = document.getElementById("miro_tau").value;
localsettings.miro_eta = document.getElementById("miro_eta").value;
localsettings.min_temp = document.getElementById("min_temp").value;
localsettings.max_temp = document.getElementById("max_temp").value;
localsettings.k = document.getElementById("k").value;
localsettings.scp = document.getElementById("scp").value;
localsettings.exponent_val = document.getElementById("exponent_val").value;

localsettings.speech_synth = document.getElementById("ttsselect").value;
localsettings.beep_on = (document.getElementById("beep_on").checked?true:false);
Expand Down Expand Up @@ -6993,6 +7010,11 @@ Current version: 84
localsettings.top_k = cleannum(Math.floor(localsettings.top_k), 0, 300);
localsettings.top_a = cleannum(localsettings.top_a, 0, 1);
localsettings.typ_s = cleannum(localsettings.typ_s, 0, 1);
localsettings.min_temp = cleannum(localsettings.min_temp, 0, 2);
localsettings.max_temp = cleannum(localsettings.max_temp, 1.95, 2);
localsettings.k = cleannum(localsettings.k, 0, 50);
localsettings.scp = cleannum(localsettings.scp, 0, 1);
localsettings.exponent_val = cleannum(localsettings.exponent_val, 0, 10);
localsettings.tfs_s = cleannum(localsettings.tfs_s, 0, 1);
localsettings.miro_type = cleannum(localsettings.miro_type, 0, 2);
localsettings.miro_tau = cleannum(localsettings.miro_tau, 0, 30);
Expand Down Expand Up @@ -7214,7 +7236,12 @@ Current version: 84
"tfs": localsettings.tfs_s,
"rep_pen_range": localsettings.rep_pen_range,
"rep_pen_slope": localsettings.rep_pen_slope,
"sampler_order": localsettings.sampler_order
"sampler_order": localsettings.sampler_order,
"min_temp": localsettings.min_temp,
"max_temp": localsettings.max_temp,
"k": localsettings.k,
"scp": localsettings.scp,
"exponent_val": localsettings.exponent_val
},
"models": selected_models.map((m) => { return m.name }),
};
Expand Down Expand Up @@ -7959,7 +7986,12 @@ Current version: 84
"tfs": localsettings.tfs_s,
"rep_pen_range": localsettings.rep_pen_range,
"rep_pen_slope": localsettings.rep_pen_slope,
"sampler_order": localsettings.sampler_order
"sampler_order": localsettings.sampler_order,
"min_temp": localsettings.min_temp,
"max_temp": localsettings.max_temp,
"k": localsettings.k,
"scp": localsettings.scp,
"exponent_val": localsettings.exponent_val
},
"models": selected_models.map((m) => { return m.name }),
};
Expand Down Expand Up @@ -11604,6 +11636,31 @@ Current version: 84
</div>
</div>

<div class="settingitem">
<div class="settinglabel">
<div class="justifyleft settingsmall">DynaTemp <span class="helpicon">?<span class="helptext">Experimental DynaTemp Configuration.</span></span></div>
<table class="settingsmall text-center" style="border-spacing: 4px 2px;
border-collapse: separate;">
<tbody>
<tr>
<th title="Minimum Dynamic Temperature Value. Default to 0">MinTemp</th>
<th title="Maximum Dynamic Temperature Value. Default to 2.0">MaxTemp</th>
<th title="K value. Default to 25.0">K</th>
<th title="Sigmoid Center Point. Defaults to 0.75">SCP</th>
<th title="Exponent Value. Defaults to 2.0">ExponentVal</th>
</tr>
<tr>
<td><input class="" type="text" inputmode="decimal" placeholder="0" value="0" id="min_temp"></td>
<td><input class="" type="text" inputmode="decimal" placeholder="2.0" value="2.0" id="max_temp"></td>
<td><input class="" type="text" inputmode="decimal" placeholder="25.0" value="25.0" id="k"></td>
<td><input class="" type="text" inputmode="decimal" placeholder="0.75" value="0.75" id="scp"></td>
<td><input class="" type="text" inputmode="decimal" placeholder="2.0" value="2.0" id="exponent_val"></td>
</tr>
</tbody>
</table>
</div>
</div>

</div>

</div>
Expand Down
23 changes: 20 additions & 3 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,12 @@ class generation_inputs(ctypes.Structure):
("stop_sequence", ctypes.c_char_p * stop_token_max),
("stream_sse", ctypes.c_bool),
("grammar", ctypes.c_char_p),
("grammar_retain_state", ctypes.c_bool)]
("grammar_retain_state", ctypes.c_bool),
("min_temp", ctypes.c_float),
("max_temp", ctypes.c_float),
("k", ctypes.c_float),
("scp", ctypes.c_float),
("exponent_val", ctypes.c_float)]

class generation_outputs(ctypes.Structure):
_fields_ = [("status", ctypes.c_int),
Expand Down Expand Up @@ -284,7 +289,9 @@ def load_model(model_filename):
ret = handle.load_model(inputs)
return ret

def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey=''):
def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_k=120, top_a=0.0, top_p=0.85, typical_p=1.0, tfs=1.0, rep_pen=1.1, rep_pen_range=128,
mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False,
grammar='', grammar_retain_state=False, genkey='', min_temp=0.0, max_temp=2.0, k=25.0, scp=0.75, exponent_val=2.0):
global maxctx, args, currentusergenkey, totalgens
inputs = generation_inputs()
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
Expand All @@ -301,6 +308,11 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_
inputs.top_k = top_k
inputs.top_a = top_a
inputs.top_p = top_p
inputs.min_temp = min_temp
inputs.max_temp = max_temp
inputs.k = k
inputs.scp = scp
inputs.exponent_val = exponent_val
inputs.typical_p = typical_p
inputs.tfs = tfs
inputs.rep_pen = rep_pen
Expand Down Expand Up @@ -479,7 +491,12 @@ def run_blocking():
stream_sse=stream_flag,
grammar=genparams.get('grammar', ''),
grammar_retain_state = genparams.get('grammar_retain_state', False),
genkey=genparams.get('genkey', ''))
genkey=genparams.get('genkey', ''),
min_temp=genparams.get('min_temp', 0.0),
max_temp=genparams.get('max_temp', 2.0),
k=genparams.get('k', 25.0),
scp=genparams.get('scp', 0.75),
exponent_val=genparams.get('exponent_val', 2.0))

recvtxt = ""
if stream_flag:
Expand Down
Loading