@@ -182,21 +182,23 @@ def append_output(self, output) -> None:
182182 self .num_cached_tokens = output .num_cached_tokens or 0
183183 self .num_output_tokens += len (output .outputs [0 ].token_ids or [])
184184
185- if len (self .input_messages ) == 0 :
186- output_prompt = output .prompt or ""
187- output_prompt_token_ids = output .prompt_token_ids or []
188- self .input_messages .append (
185+ # only store if enable_response_messages is True, save memory
186+ if self .request .enable_response_messages :
187+ if len (self .input_messages ) == 0 :
188+ output_prompt = output .prompt or ""
189+ output_prompt_token_ids = output .prompt_token_ids or []
190+ self .input_messages .append (
191+ ResponseRawMessageAndToken (
192+ message = output_prompt ,
193+ tokens = output_prompt_token_ids ,
194+ )
195+ )
196+ self .output_messages .append (
189197 ResponseRawMessageAndToken (
190- message = output_prompt ,
191- tokens = output_prompt_token_ids ,
198+ message = output . outputs [ 0 ]. text ,
199+ tokens = output . outputs [ 0 ]. token_ids ,
192200 )
193201 )
194- self .output_messages .append (
195- ResponseRawMessageAndToken (
196- message = output .outputs [0 ].text ,
197- tokens = output .outputs [0 ].token_ids ,
198- )
199- )
200202
201203 def append_tool_output (self , output ) -> None :
202204 raise NotImplementedError ("Should not be called." )
@@ -274,30 +276,31 @@ def append_output(self, output: RequestOutput) -> None:
274276 self .num_cached_tokens = output .num_cached_tokens or 0
275277 self .num_output_tokens += len (output .outputs [0 ].token_ids or [])
276278 self .parser .process (output .outputs [0 ])
277- output_prompt = output .prompt or ""
278- output_prompt_token_ids = output .prompt_token_ids or []
279- if len (self .input_messages ) == 0 :
280- self .input_messages .append (
281- ResponseRawMessageAndToken (
282- message = output_prompt ,
283- tokens = output_prompt_token_ids ,
279+
280+ # only store if enable_response_messages is True, save memory
281+ if self .request .enable_response_messages :
282+ output_prompt = output .prompt or ""
283+ output_prompt_token_ids = output .prompt_token_ids or []
284+ if len (self .input_messages ) == 0 :
285+ self .input_messages .append (
286+ ResponseRawMessageAndToken (
287+ message = output_prompt ,
288+ tokens = output_prompt_token_ids ,
289+ )
290+ )
291+ else :
292+ self .output_messages .append (
293+ ResponseRawMessageAndToken (
294+ message = output_prompt ,
295+ tokens = output_prompt_token_ids ,
296+ )
284297 )
285- )
286- else :
287- # TODO: merge them in properly together
288- # TODO: responsesParser doesn't parse kimi k2 sentences correctly
289298 self .output_messages .append (
290299 ResponseRawMessageAndToken (
291- message = output_prompt ,
292- tokens = output_prompt_token_ids ,
300+ message = output . outputs [ 0 ]. text ,
301+ tokens = output . outputs [ 0 ]. token_ids ,
293302 )
294303 )
295- self .output_messages .append (
296- ResponseRawMessageAndToken (
297- message = output .outputs [0 ].text ,
298- tokens = output .outputs [0 ].token_ids ,
299- )
300- )
301304
302305 def append_tool_output (self , output : list [ResponseInputOutputItem ]) -> None :
303306 self .parser .response_messages .extend (output )
0 commit comments