Skip to content

Commit

Permalink
Update prepare_starcoder.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ChaosCodes authored Nov 6, 2023
1 parent 454a7ae commit 2fd7bbd
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions scripts/prepare_starcoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def prepare_full(
text_ids = tokenizer.encode(text)
builder.add_array(np.array(text_ids, dtype=builder.dtype))

builder.write_reminder()
# we throw away the final corpus to avoid meaningless corpus filled with bos_ids, see https://github.com/jzhang38/TinyLlama/issues/83 for more details
# builder.write_reminder()


def prepare(
Expand Down Expand Up @@ -102,4 +103,4 @@ def prepare(

if __name__ == "__main__":
from jsonargparse import CLI
CLI(prepare)
CLI(prepare)

0 comments on commit 2fd7bbd

Please sign in to comment.