@@ -156,6 +156,91 @@ std::string base64_encode(const unsigned char* data, size_t length) {
156156
157157 return encoded;
158158}
159+
160+ struct Utf8SplitResult {
161+ std::string valid;
162+ std::string remainder;
163+ };
164+
165+ Utf8SplitResult extract_complete_utf8 (const std::string& input) {
166+ Utf8SplitResult result;
167+ result.valid .reserve (input.size ());
168+
169+ const std::size_t size = input.size ();
170+ std::size_t i = 0 ;
171+ while (i < size) {
172+ unsigned char c = static_cast <unsigned char >(input[i]);
173+ if (c < 0x80 ) {
174+ result.valid .push_back (static_cast <char >(c));
175+ ++i;
176+ continue ;
177+ }
178+
179+ std::size_t expected = 0 ;
180+ if (c >= 0xC2 && c <= 0xDF ) {
181+ expected = 2 ;
182+ } else if (c >= 0xE0 && c <= 0xEF ) {
183+ expected = 3 ;
184+ } else if (c >= 0xF0 && c <= 0xF4 ) {
185+ expected = 4 ;
186+ } else {
187+ result.valid .push_back (' ?' );
188+ ++i;
189+ continue ;
190+ }
191+
192+ if (i + expected > size) {
193+ result.remainder = input.substr (i);
194+ return result;
195+ }
196+
197+ bool valid_sequence = true ;
198+ for (std::size_t j = 1 ; j < expected; ++j) {
199+ unsigned char continuation = static_cast <unsigned char >(input[i + j]);
200+ if ((continuation & 0xC0 ) != 0x80 ) {
201+ valid_sequence = false ;
202+ break ;
203+ }
204+ }
205+ if (!valid_sequence) {
206+ result.valid .push_back (' ?' );
207+ ++i;
208+ continue ;
209+ }
210+
211+ if (expected == 3 ) {
212+ unsigned char b1 = static_cast <unsigned char >(input[i + 1 ]);
213+ if (c == 0xE0 && b1 < 0xA0 ) {
214+ result.valid .push_back (' ?' );
215+ ++i;
216+ continue ;
217+ }
218+ if (c == 0xED && b1 >= 0xA0 ) {
219+ result.valid .push_back (' ?' );
220+ ++i;
221+ continue ;
222+ }
223+ } else if (expected == 4 ) {
224+ unsigned char b1 = static_cast <unsigned char >(input[i + 1 ]);
225+ if (c == 0xF0 && b1 < 0x90 ) {
226+ result.valid .push_back (' ?' );
227+ ++i;
228+ continue ;
229+ }
230+ if (c == 0xF4 && b1 >= 0x90 ) {
231+ result.valid .push_back (' ?' );
232+ ++i;
233+ continue ;
234+ }
235+ }
236+
237+ result.valid .append (input, i, expected);
238+ i += expected;
239+ }
240+
241+ return result;
242+ }
243+
159244struct CLIOptions {
160245 std::string model_path;
161246 int port = 8000 ;
@@ -361,6 +446,7 @@ struct ServerState {
361446 CtxConfig ctx_config;
362447 CtxConfig default_config;
363448 LogCollector* active_collector = nullptr ;
449+ std::string pending_log_fragment;
364450 bool verbose = false ;
365451};
366452
@@ -1622,13 +1708,28 @@ void sd_server_log_callback(sd_log_level_t level, const char* text, void* user_d
16221708 }
16231709
16241710 ServerState* state = static_cast <ServerState*>(user_data);
1625- std::string message (text);
1626- while (!message.empty () && (message.back () == ' \n ' || message.back () == ' \r ' )) {
1627- message.pop_back ();
1628- }
1711+ std::string message;
1712+ bool only_partial = false ;
16291713
16301714 {
16311715 std::lock_guard<std::mutex> guard (state->log_mutex );
1716+
1717+ std::string combined = state->pending_log_fragment ;
1718+ combined.append (text);
1719+
1720+ Utf8SplitResult sanitized = extract_complete_utf8 (combined);
1721+ state->pending_log_fragment = std::move (sanitized.remainder );
1722+
1723+ message = std::move (sanitized.valid );
1724+ while (!message.empty () && (message.back () == ' \n ' || message.back () == ' \r ' )) {
1725+ message.pop_back ();
1726+ }
1727+
1728+ only_partial = message.empty () && !state->pending_log_fragment .empty ();
1729+ if (only_partial) {
1730+ return ;
1731+ }
1732+
16321733 if (state->active_collector != nullptr ) {
16331734 state->active_collector ->add (level, message);
16341735 return ;
0 commit comments