Skip to content

Commit a095658

Browse files
authored
Fix: Fix the bugs of allocating workspace (NVIDIA#746,NVIDIA#747)
1 parent 0b34777 commit a095658

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

FasterTransformer/v3.0/fastertransformer/decoding_beamsearch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ class DecodingBeamsearch
114114
int finished_buf_size = args_.batch_size_ * args_.beam_width_; //type bool
115115
int finished_count_size = (int)(ceil(1 / 32.)) * 32; // type int
116116

117-
int topk_ids_buf_size = args_.batch_size_ * args_.beam_width_ * (ceil)((args_.beam_width_ * args_.vocab_size_ * 1.0) / 1024.0); // type int
118-
int topk_val_buf_size = args_.batch_size_ * args_.beam_width_ * args_.beam_width_; // type float
117+
int topk_ids_buf_size = args_.batch_size_ * args_.beam_width_ * (ceil)((args_.beam_width_ * args_.vocab_size_ * 1.0) / 1024.0) * 8; // type int
118+
int topk_val_buf_size = args_.batch_size_ * args_.beam_width_ * args_.beam_width_ * 8; // type float
119119
int storage_size_per_beam = 2 * args_.beam_width_ + SMALL_TOP_K_SOFTMAX_MAX_VOC_PARTS * (2 * MAX_K + 2);
120120
args_.temp_storage_size_ = args_.batch_size_ * args_.beam_width_ * storage_size_per_beam; // type float
121121

0 commit comments

Comments
 (0)