Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
Vivicai1005 committed Feb 6, 2024
1 parent 2ed9537 commit 150e390
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions other_infer/exllamav2_hf_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,14 @@ def main(
)
inputs = {k: v.to(device) for k, v in inputs.items()}
print('=' * 100)
answer = ""
for text in generate_stream(model, tokenizer, inputs['input_ids'], inputs['attention_mask'],
generation_config=generation_config):
print(text, end='', flush=True)
answer += text
print('')
toc = time.perf_counter()
num_tok = len(tokenizer.encode(answer))
print(
f"\n[time: {res_time:0.4f} sec, speed: {num_tok / res_time:0.4f} tok/sec]"
)
Expand Down

0 comments on commit 150e390

Please sign in to comment.