diff --git a/llama2_q4.cu b/llama2_q4.cu index 5acc939..67921f6 100644 --- a/llama2_q4.cu +++ b/llama2_q4.cu @@ -125,7 +125,6 @@ void malloc_weights(TransformerWeights* w, Config* p) { } cudaMalloc((void**)&w->rms_final_weight, p->dim * sizeof(half)); - int head_size = p->dim / p->n_heads; cudaMalloc((void**)&w->wcls, p->vocab_size * p->dim * sizeof(half)); // ensure all mallocs went fine @@ -437,7 +436,7 @@ void free_transformer(Transformer* t) { // ---------------------------------------------------------------------------- // generation loop void generate(Transformer* transformer, Tokenizer* tokenizer, Sampler* sampler, char* prompt, int steps) { - char* empty_prompt = ""; + char empty_prompt[] = ""; if (prompt == NULL) { prompt = empty_prompt; } // encode the (string) prompt into tokens sequence @@ -625,7 +624,8 @@ int main(int argc, char *argv[]) { // default parameters char* checkpoint_path = NULL; // e.g. out/model.bin - char* tokenizer_path = "tokenizer.bin"; + char default_tokenizer_path[] = "tokenizer.bin"; + char* tokenizer_path = default_tokenizer_path; char* dataset_path = NULL; int steps = 0; // number of steps to run for char* prompt = nullptr; // prompt string @@ -633,7 +633,8 @@ int main(int argc, char *argv[]) { float temperature = 0.5f; // 0.0 = greedy deterministic. 1.0 = original. don't set higher float topp = 0.6f; // top-p in nucleus sampling. 1.0 = off. 0.9 works well, but slower unsigned long long rng_seed = 0; // seed rng with time by default - char* mode = "generate"; // generate|chat + char default_mode[] = "generate"; + char* mode = default_mode; // generate|chat char* system_prompt = NULL; // the (optional) system prompt to use in chat mode // poor man's C argparse diff --git a/tokenizer.h b/tokenizer.h index 1302401..d28e5b2 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -130,7 +130,8 @@ void encode(Tokenizer* t, char* text, int8_t bos, int8_t eos, int* tokens, int* // TODO: pretty sure this isn't correct in the general case but I don't have the // energy to read more of the sentencepiece code to figure out what it's doing if (text[0] != '\0') { - int dummy_prefix = str_lookup(" ", t->sorted_vocab, t->vocab_size); + char blank[] = " "; + int dummy_prefix = str_lookup(blank, t->sorted_vocab, t->vocab_size); tokens[(*n_tokens)++] = dummy_prefix; }