Skip to content

Commit

Permalink
updated kobold lite, work on rwkv, added exe path to model load param…
Browse files Browse the repository at this point in the history
…s, added launch parameter
  • Loading branch information
LostRuins committed Apr 18, 2023
1 parent 8e923dc commit c200b67
Show file tree
Hide file tree
Showing 11 changed files with 100,587 additions and 16 deletions.
3 changes: 3 additions & 0 deletions expose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include "expose.h"
#include "model_adapter.cpp"

std::string executable_path = "";

extern "C"
{

Expand All @@ -46,6 +48,7 @@ extern "C"
deviceenv = "KCPP_CLBLAST_DEVICES="+std::to_string(devices);
putenv((char*)platformenv.c_str());
putenv((char*)deviceenv.c_str());
executable_path = inputs.executable_path;

if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3)
{
Expand Down
2 changes: 2 additions & 0 deletions expose.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ struct load_model_inputs
const int max_context_length;
const int batch_size;
const bool f16_kv;
const char *executable_path;
const char *model_filename;
const int n_parts_overwrite = -1;
const bool use_mmap;
Expand All @@ -33,3 +34,4 @@ struct generation_outputs
char text[16384]; //16kb should be enough for any response
};

extern std::string executable_path;
11 changes: 8 additions & 3 deletions gpttype_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
n_batch = 1;

std::string word;
for (int i = 0; i < 20; i++) {
read_rwkv_vocab();
int vocabsiz = rwkv_vocab.size();
for (int i = 0; i < vocabsiz; i++) {
uint32_t len;
word = ('a'+i);
word = rwkv_vocab[i];
vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
}
printf("\nRWKV Vocab: %u\n",vocabsiz);

int vocabsiz = vocab.token_to_id.size();
bool testeval = rwkv_eval(rwkv_context_v1, 0, rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
if(!testeval)
{
Expand Down Expand Up @@ -230,6 +232,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o

// tokenize the prompt
std::vector<gpt_vocab::id> embd_inp = ::gpt_tokenize(vocab, params.prompt);
print_tok_vec(embd_inp);

//truncate to front of the prompt if its too long
int32_t nctx = params.n_ctx;
Expand Down Expand Up @@ -330,7 +333,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o

if(file_format==FileFormat::RWKV_1)
{
printf("\nsiz:%d val:%d\n",embd.size(),embd[0]);
evalres = rwkv_eval(rwkv_context_v1, embd[0], rwkv_context_v1->state_in, rwkv_context_v1->state_out, rwkv_context_v1->logits_out);
memcpy(logits.data(), rwkv_context_v1->logits_out, sizeof(float)*rwkv_vocab.size());
}
else if(file_format==FileFormat::GPT2_1)
{
Expand Down
17 changes: 8 additions & 9 deletions klite.embd

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class load_model_inputs(ctypes.Structure):
("max_context_length", ctypes.c_int),
("batch_size", ctypes.c_int),
("f16_kv", ctypes.c_bool),
("executable_path", ctypes.c_char_p),
("model_filename", ctypes.c_char_p),
("n_parts_overwrite", ctypes.c_int),
("use_mmap", ctypes.c_bool),
Expand Down Expand Up @@ -77,14 +78,15 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
inputs.max_context_length = max_context_length #initial value to use for ctx, can be overwritten
inputs.threads = threads
inputs.n_parts_overwrite = n_parts_overwrite
inputs.f16_kv = True
inputs.f16_kv = True
inputs.use_mmap = use_mmap
inputs.use_smartcontext = use_smartcontext
inputs.blasbatchsize = blasbatchsize
clblastids = 0
if args.useclblast:
clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
inputs.clblast_info = clblastids
inputs.executable_path = (os.path.dirname(os.path.realpath(__file__))+"/").encode("UTF-8")
ret = handle.load_model(inputs)
return ret

Expand Down Expand Up @@ -437,7 +439,12 @@ def main(args):
else:
epurl = f"http://{args.host}:{args.port}" + ("?streaming=1" if args.stream else "")


if args.launch:
try:
import webbrowser as wb
wb.open(epurl)
except:
print("--launch was set, but could not launch web browser automatically.")
print(f"Please connect to custom endpoint at {epurl}")
RunServerMultiThreaded(args.host, args.port, embedded_kailite)

Expand All @@ -451,6 +458,7 @@ def main(args):
portgroup.add_argument("--port", help="Port to listen on", default=defaultport, type=int, action='store')
portgroup.add_argument("port_param", help="Port to listen on (positional)", default=defaultport, nargs="?", type=int, action='store')
parser.add_argument("--host", help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="")
parser.add_argument("--launch", help="Launches a web browser when load is completed.", action='store_true')

#os.environ["OMP_NUM_THREADS"] = '12'
# psutil.cpu_count(logical=False)
Expand Down
2 changes: 1 addition & 1 deletion make_pyinstaller.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe"
3 changes: 2 additions & 1 deletion otherarch/rwkv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
#include <iostream>
#include <unordered_map>


#include "model_adapter.h"

#include "rwkv_vocab.cpp"

// --- Utilities ---

#define FP32_SIZE 4
Expand Down
30 changes: 30 additions & 0 deletions otherarch/rwkv_vocab.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <vector>
#include <string>
#include <fstream>
#include <iostream>

#include "expose.h"

std::vector<std::string> rwkv_vocab;

void read_rwkv_vocab()
{
std::string line;
auto filepath = executable_path+ "rwkv_vocab.embd";
printf("Reading vocab from %s",filepath.c_str());
std::ifstream myfile(filepath);
if (myfile.is_open())
{
while (myfile.good())
{
getline(myfile, line);
rwkv_vocab.push_back(line);
}
myfile.close();
}

else
{
std::cout << "Unable to open RWKV vocab file";
}
}
Loading

0 comments on commit c200b67

Please sign in to comment.