|
3 | 3 | #include "nitro_utils.h" |
4 | 4 | #include <chrono> |
5 | 5 | #include <cstring> |
6 | | -#include <thread> |
| 6 | +#include <drogon/HttpResponse.h> |
7 | 7 | #include <regex> |
| 8 | +#include <thread> |
| 9 | + |
| 10 | +using namespace inferences; |
8 | 11 |
|
9 | 12 | std::string create_return_json(const std::string &id, const std::string &model, |
10 | 13 | const std::string &content, |
@@ -35,7 +38,7 @@ std::string create_return_json(const std::string &id, const std::string &model, |
35 | 38 | return Json::writeString(writer, root); |
36 | 39 | } |
37 | 40 |
|
38 | | -void llamaCPP::asyncHandleHttpRequest( |
| 41 | +void llamaCPP::chatCompletion( |
39 | 42 | const HttpRequestPtr &req, |
40 | 43 | std::function<void(const HttpResponsePtr &)> &&callback) { |
41 | 44 | const auto &jsonBody = req->getJsonObject(); |
@@ -196,3 +199,29 @@ void llamaCPP::asyncHandleHttpRequest( |
196 | 199 | "chat_completions.txt"); |
197 | 200 | callback(resp); |
198 | 201 | } |
| 202 | + |
| 203 | +void llamaCPP::embedding( |
| 204 | + const HttpRequestPtr &req, |
| 205 | + std::function<void(const HttpResponsePtr &)> &&callback) { |
| 206 | + auto lock = llama.lock(); |
| 207 | + |
| 208 | + const auto &jsonBody = req->getJsonObject(); |
| 209 | + |
| 210 | + llama.rewind(); |
| 211 | + llama_reset_timings(llama.ctx); |
| 212 | + if (jsonBody->isMember("content") != 0) { |
| 213 | + llama.prompt = (*jsonBody)["content"].asString(); |
| 214 | + } else { |
| 215 | + llama.prompt = ""; |
| 216 | + } |
| 217 | + llama.params.n_predict = 0; |
| 218 | + llama.loadPrompt(); |
| 219 | + llama.beginCompletion(); |
| 220 | + llama.doCompletion(); |
| 221 | + |
| 222 | + const json data = format_embedding_response(llama); |
| 223 | + auto resp = drogon::HttpResponse::newHttpResponse(); |
| 224 | + resp->setBody(data.dump()); |
| 225 | + resp->setContentTypeString("application/json"); |
| 226 | + callback(resp); |
| 227 | +} |
0 commit comments