@@ -376,7 +376,6 @@ struct llama_client_slot
376376
377377    int32_t  num_prompt_tokens           = 0 ;
378378    int32_t  num_prompt_tokens_processed = 0 ;
379-     int32_t  multibyte_pending           = 0 ;
380379
381380    json prompt;
382381    std::string generated_text;
@@ -425,7 +424,6 @@ struct llama_client_slot
425424        stopped_word           = false ;
426425        stopped_limit          = false ;
427426        stopping_word          = " " 
428-         multibyte_pending      = 0 ;
429427        n_past                 = 0 ;
430428        sent_count             = 0 ;
431429        sent_token_probs_index = 0 ;
@@ -992,35 +990,36 @@ struct llama_server_context
992990        slot.generated_text  += token_str;
993991        slot.has_next_token  = true ;
994992
995-         if  (slot.multibyte_pending  > 0 )
993+         //  check if there is incomplete UTF-8 character at the end
994+         bool  incomplete = false ;
995+         for  (unsigned  i = 1 ; i < 5  && i <= slot.generated_text .size (); ++i)
996996        {
997-             slot. multibyte_pending  -= token_str. size ();
998-         } 
999-         else   if  (token_str. size () ==  1 ) 
1000-         { 
1001-             const   char  c = token_str[ 0 ] ;
1002-             //  2-byte characters: 110xxxxx 10xxxxxx 
997+             unsigned   char  c = slot. generated_text [slot. generated_text . size () - i] ;
998+              if  ((c &  0xC0 ) ==  0x80 ) 
999+             { 
1000+                  //  continuation byte: 10xxxxxx 
1001+                  continue ;
1002+             } 
10031003            if  ((c & 0xE0 ) == 0xC0 )
10041004            {
1005-                 slot. multibyte_pending  =  1 ; 
1006-                 //  3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx 
1005+                 //  2-byte character: 110xxxxx ... 
1006+                 incomplete = i <  2 ; 
10071007            }
10081008            else  if  ((c & 0xF0 ) == 0xE0 )
10091009            {
1010-                 slot. multibyte_pending  =  2 ; 
1011-                 //  4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
1010+                 //  3-byte character: 1110xxxx ... 
1011+                 incomplete = i <  3 ; 
10121012            }
10131013            else  if  ((c & 0xF8 ) == 0xF0 )
10141014            {
1015-                 slot.multibyte_pending  = 3 ;
1016-             }
1017-             else 
1018-             {
1019-                 slot.multibyte_pending  = 0 ;
1015+                 //  4-byte character: 11110xxx ...
1016+                 incomplete = i < 4 ;
10201017            }
1018+             //  else 1-byte character or invalid byte
1019+             break ;
10211020        }
10221021
1023-         if  (slot. multibyte_pending  ==  0 )
1022+         if  (!incomplete )
10241023        {
10251024            size_t  pos = std::min (slot.sent_count , slot.generated_text .size ());
10261025            const  std::string str_test = slot.generated_text .substr (pos);
@@ -1055,7 +1054,7 @@ struct llama_server_context
10551054            }
10561055        }
10571056
1058-         if  (slot. multibyte_pending  >  0  && !slot. has_next_token )
1057+         if  (incomplete )
10591058        {
10601059            slot.has_next_token  = true ;
10611060        }
0 commit comments