|
3 | 3 | #include "build-info.h" |
4 | 4 | #include "grammar-parser.h" |
5 | 5 |
|
6 | | -// #define SERVER_MULTIMODAL_SUPPORT |
| 6 | +#define SERVER_MULTIMODAL_SUPPORT |
7 | 7 |
|
8 | 8 | #ifdef SERVER_MULTIMODAL_SUPPORT |
9 | 9 | #include "../llava/clip.h" |
@@ -78,7 +78,7 @@ std::vector<uint8_t> base64_decode(std::string const& encoded_string) { |
78 | 78 | int i = 0; |
79 | 79 | int j = 0; |
80 | 80 | int in_ = 0; |
81 | | - BYTE char_array_4[4], char_array_3[3]; |
| 81 | + unsigned char char_array_4[4], char_array_3[3]; |
82 | 82 | std::vector<uint8_t> ret; |
83 | 83 | while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { |
84 | 84 | char_array_4[i++] = encoded_string[in_]; in_++; |
@@ -884,9 +884,10 @@ struct llama_server_context |
884 | 884 |
|
885 | 885 | // append prefix of next image |
886 | 886 | batch.n_tokens = 0; |
887 | | - std::vector<llama_token> append_tokens = tokenize( |
888 | | - image_idx >= slot.images.size() ? slot.params.input_suffix : // no more images, then process suffix prompt |
889 | | - slot.images[image_idx].prefix_prompt, true); // has next image |
| 887 | + const auto json_prompt = (image_idx >= slot.images.size()) ? |
| 888 | + slot.params.input_suffix : // no more images, then process suffix prompt |
| 889 | + (json)(slot.images[image_idx].prefix_prompt); |
| 890 | + std::vector<llama_token> append_tokens = tokenize(json_prompt, true); // has next image |
890 | 891 | for (int i = 0; i < append_tokens.size(); ++i) { |
891 | 892 | batch.token [batch.n_tokens] = append_tokens[i]; |
892 | 893 | batch.pos [batch.n_tokens] = slot.n_past; |
|
0 commit comments