Skip to content

Commit

Permalink
Internal
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 417830373
  • Loading branch information
tomvdw authored and copybara-github committed Dec 22, 2021
1 parent 43ae886 commit ac907cc
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions tensorflow_datasets/core/dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ def _build_single_dataset(
shuffle_files,
batch_size,
decoders: Optional[TreeDict[decode.partial_decode.DecoderArg]],
read_config,
read_config: read_config_lib.ReadConfig,
as_supervised,
):
"""as_dataset for a single split."""
Expand Down Expand Up @@ -810,7 +810,7 @@ def _as_dataset(
self,
split,
decoders: Optional[TreeDict[decode.partial_decode.DecoderArg]] = None,
read_config=None,
read_config: Optional[read_config_lib.ReadConfig] = None,
shuffle_files=False,
):
"""Constructs a `tf.data.Dataset`.
Expand Down
5 changes: 3 additions & 2 deletions tensorflow_datasets/core/tfrecords_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def make_file_instructions(

# TODO(epot): Should try to merge the instructions together as well as
# performing additional validation. For example, should raise an error
# if there is overlapp between splits (`train[:50]+train[:25]`)
# if there is overlap between splits (`train[:50]+train[:25]`)
# If there is a single shard, `train[:25]+train[50:75]` could be optimized
# into a single `ds.take(25).skip(50-25).take(75-50)`

Expand Down Expand Up @@ -471,8 +471,9 @@ def read_files(
raise ValueError(msg)

# Prepend path to filename
path = self._path
file_instructions = [
f.replace(filename=os.path.join(self._path, f.filename))
f.replace(filename=os.path.join(path, f.filename))
for f in file_instructions
]

Expand Down
11 changes: 5 additions & 6 deletions tensorflow_datasets/core/utils/shard_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,20 @@
sharding needs.
"""

from typing import Any, List, Sequence

import dataclasses
from typing import Any, List, Sequence


@dataclasses.dataclass(eq=True, frozen=True)
class FileInstruction(object):
"""Instruction to read a single shard/file.
Attributes:
filename: The filenames contains the relative path, not absolute.
filename: The filename containing the relative path, not absolute.
skip: Indicates which example read in the shard (`ds.skip().take()`). `0` if
no skipping
take: Indicates how many examples to read (`-1` to read all)
num_examples: `int`, The total number of examples
no skipping.
take: Indicates how many examples to read (`-1` to read all).
num_examples: `int`, The total number of examples.
"""
filename: str
skip: int
Expand Down

0 comments on commit ac907cc

Please sign in to comment.