diff --git a/tensor2tensor/data_generators/enwik8.py b/tensor2tensor/data_generators/enwik8.py index 837e981ad..6f26b86e0 100644 --- a/tensor2tensor/data_generators/enwik8.py +++ b/tensor2tensor/data_generators/enwik8.py @@ -131,10 +131,12 @@ def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split): @registry.register_problem class Enwik8L2k(Enwik8L65k): - """Enwiki8, with examples up to 2048 characters long. Reads the input - byte-wise and chunks it into fragments of maximum length of 2048. Does not - shift byte indices (we do not assume cls or pad are used), - unlike the base class!""" + """Enwiki8, with examples up to 2048 characters long. + + Reads the input byte-wise and chunks it into fragments of maximum + length of 2048. Does not shift byte indices (we do not assume cls or + pad are used), unlike the base class! + """ READ_MODE = "rb"