We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6fbe229 commit 61d50cfCopy full SHA for 61d50cf
colossalai/shardformer/policies/gpt2.py
@@ -40,10 +40,8 @@ def preprocess(self):
40
self.model.resize_token_embeddings(new_vocab_size)
41
else:
42
# Make vocab_size divisible by `make_vocab_size_divisible_by` to select a faster CUDA kernel operator.
43
- new_vocab_size = vocab_size
44
multiple = self.shard_config.make_vocab_size_divisible_by
45
- while (new_vocab_size % multiple) != 0:
46
- new_vocab_size += 1
+ new_vocab_size = (vocab_size // multiple + 1) * multiple
47
48
return self.model
49
0 commit comments