internal merge of PR #7

PiperOrigin-RevId: 226349799
tensorflow · Dec 20, 2018 · 7e1f230 · 7e1f230
1 parent 21181c9
commit 7e1f230
Show file tree

Hide file tree

Showing 15 changed files with 25 additions and 25 deletions.
diff --git a/docs/api_docs/python/tfds/core/NamedSplit.md b/docs/api_docs/python/tfds/core/NamedSplit.md
@@ -129,7 +129,7 @@ s1, s2, s3, s4 = split.subsplit(weighted=[2, 2, 1, 1])  # 33%, 33%, 16%, 18%
     split.subsplit([1, 1, 2])  # weighted=[1, 1, 2]
     ```
 * <b>`k`</b>: `int` If set, subdivide the split into `k` equal parts.
-* <b>`percent`</b>: `tfds.percent slice`, return a single subplit corresponding to
+* <b>`percent`</b>: `tfds.percent slice`, return a single subsplit corresponding to
     a slice of the original split. For example:
     `split.subsplit(tfds.percent[-20:])  # Last 20% of the dataset`.
 * <b>`weighted`</b>: `list[int]`, return a list of subsplits whose proportions match

diff --git a/docs/api_docs/python/tfds/features/FeaturesDict.md b/docs/api_docs/python/tfds/features/FeaturesDict.md
@@ -26,7 +26,7 @@ Defined in [`core/features/feature.py`](https://github.com/tensorflow/datasets/t
 
 Main feature connector orchestrator.
 
-The encode/decode method of the spec feature will recursivelly encode/decode
+The encode/decode method of the spec feature will recursively encode/decode
 every sub-connector given on the constructor.
 Other features can inherit from this class and call super() in order to get
 nested container.

diff --git a/docs/api_docs/python/tfds/features/text/SubwordTextEncoder.md b/docs/api_docs/python/tfds/features/text/SubwordTextEncoder.md
@@ -100,7 +100,7 @@ Builds a `SubwordTextEncoder` based on the `corpus_generator`.
 * <b>`corpus_generator`</b>: generator yielding `str`, from which subwords will be
     constructed.
 * <b>`target_vocab_size`</b>: `int`, approximate size of the vocabulary to create.
-* <b>`max_subword_length`</b>: `int`, maxmimum length of a subword. Note that memory
+* <b>`max_subword_length`</b>: `int`, maximum length of a subword. Note that memory
     and compute scale quadratically in the length of the longest token.
 * <b>`max_corpus_chars`</b>: `int`, the maximum number of characters to consume from
     `corpus_generator` for the purposes of building the subword vocabulary.

diff --git a/docs/api_docs/python/tfds/file_adapter/TFRecordExampleAdapter.md b/docs/api_docs/python/tfds/file_adapter/TFRecordExampleAdapter.md
@@ -32,7 +32,7 @@ Constraints on generators:
 __init__(example_reading_spec)
 ```
 
-Construcs a TFRecordExampleAdapter.
+Constructs a TFRecordExampleAdapter.
 
 #### Args:
 

diff --git a/tensorflow_datasets/core/dataset_builder.py b/tensorflow_datasets/core/dataset_builder.py
@@ -480,7 +480,7 @@ def _info(self):
   def _download_and_prepare(self, dl_manager):
     """Downloads and prepares dataset for reading.
 
-    This is the internal implementation to overwritte called when user call
+    This is the internal implementation to overwrite called when user calls
     `download_and_prepare`. It should download all required data and generate
     the pre-processed datasets files.
 
@@ -494,7 +494,7 @@ def _download_and_prepare(self, dl_manager):
   def _as_dataset(self, split, shuffle_files=None):
     """Constructs a `tf.data.Dataset`.
 
-    This is the internal implementation to overwritte called when user call
+    This is the internal implementation to overwrite called when user calls
     `as_dataset`. It should read the pre-processed datasets files and generate
     the `tf.data.Dataset` object.
 

diff --git a/tensorflow_datasets/core/dataset_info.py b/tensorflow_datasets/core/dataset_info.py
@@ -248,7 +248,7 @@ def _compute_dynamic_properties(self, builder):
             "%s's info() property specifies split %s, but it "
             "doesn't seem to have been generated. Please ensure "
             "that the data was downloaded for this split and re-run "
-            "download_and_prepare"), self.name, split_name)
+            "download_and_prepare."), self.name, split_name)
         raise
 
     # Set splits to trigger proto update in setter

diff --git a/tensorflow_datasets/core/download/util.py b/tensorflow_datasets/core/download/util.py
@@ -54,7 +54,7 @@ class GenerateMode(enum.Enum):
 
 
 def build_synchronize_decorator():
-  """Returns a decorator which prevent concurents calls to functions.
+  """Returns a decorator which prevents concurrent calls to functions.
 
   Usage:
     synchronized = build_synchronize_decorator()

diff --git a/tensorflow_datasets/core/features/feature.py b/tensorflow_datasets/core/features/feature.py
@@ -330,7 +330,7 @@ def load_metadata(self, data_dir, feature_name):
 class FeaturesDict(FeatureConnector):
   """Main feature connector orchestrator.
 
-  The encode/decode method of the spec feature will recursivelly encode/decode
+  The encode/decode method of the spec feature will recursively encode/decode
   every sub-connector given on the constructor.
   Other features can inherit from this class and call super() in order to get
   nested container.

diff --git a/tensorflow_datasets/core/features/text/subword_text_encoder.py b/tensorflow_datasets/core/features/text/subword_text_encoder.py
@@ -268,7 +268,7 @@ def build_from_corpus(cls,
       corpus_generator: generator yielding `str`, from which subwords will be
         constructed.
       target_vocab_size: `int`, approximate size of the vocabulary to create.
-      max_subword_length: `int`, maxmimum length of a subword. Note that memory
+      max_subword_length: `int`, maximum length of a subword. Note that memory
         and compute scale quadratically in the length of the longest token.
       max_corpus_chars: `int`, the maximum number of characters to consume from
         `corpus_generator` for the purposes of building the subword vocabulary.

diff --git a/tensorflow_datasets/core/proto/README.md b/tensorflow_datasets/core/proto/README.md
@@ -1,6 +1,6 @@
 # Protobuf
 
-This page describe how to update the protobuff generated python file. By
+This page describe how to update the protobuf generated python file. By
 default, the protobuf is already compiled into python file so you won't have to
 do anything. Those steps are required only if you update the `.proto` file. The
 instruction are for linux.

diff --git a/tensorflow_datasets/core/splits.py b/tensorflow_datasets/core/splits.py
@@ -133,7 +133,7 @@ def subsplit(self, arg=None, k=None, percent=None, weighted=None):   # pylint: d
         split.subsplit([1, 1, 2])  # weighted=[1, 1, 2]
         ```
       k: `int` If set, subdivide the split into `k` equal parts.
-      percent: `tfds.percent slice`, return a single subplit corresponding to
+      percent: `tfds.percent slice`, return a single subsplit corresponding to
         a slice of the original split. For example:
         `split.subsplit(tfds.percent[-20:])  # Last 20% of the dataset`.
       weighted: `list[int]`, return a list of subsplits whose proportions match
@@ -346,7 +346,7 @@ class SplitReadInstruction(object):
 
   Similarly to SplitDescriptor nodes, this object can be composed with itself,
   but the resolution happens instantaneously, instead of keeping track of the
-  tree, such as all instuctions are compiled and flattened in a single
+  tree, such as all instructions are compiled and flattened in a single
   SplitReadInstruction object containing the list of files and slice to use.
 
   Once resolved, the instructions can be accessed with:
@@ -373,7 +373,7 @@ def add(self, sliced_split):
   def __add__(self, other):
     """Merging split together."""
     # Will raise error if a split has already be added (NonMutableDict)
-    # TODO(epot): If a split is already added but there is no overlapp between
+    # TODO(epot): If a split is already added but there is no overlap between
     # the slices, should merge the slices (ex: [:10] + [80:])
     split_instruction = SplitReadInstruction()
     split_instruction._splits.update(self._splits)   # pylint: disable=protected-access

diff --git a/tensorflow_datasets/core/utils/py_utils.py b/tensorflow_datasets/core/utils/py_utils.py
@@ -68,15 +68,15 @@ def zip_dict(*dicts):
 class NonMutableDict(dict):
   """Dict where keys can only be added but not modified.
 
-  Will raise an error if the user try to overwritte one key. The error message
+  Will raise an error if the user try to overwrite one key. The error message
   can be customized during construction. It will be formatted using {key} for
-  the overwritted key.
+  the overwritten key.
   """
 
   def __init__(self, *args, **kwargs):
     self._error_msg = kwargs.pop(
         "error_msg",
-        "Try to overwritte existing key: {key}",
+        "Try to overwrite existing key: {key}",
     )
     if kwargs:
       raise ValueError("NonMutableDict cannot be initialized with kwargs.")
@@ -118,7 +118,7 @@ def __get__(self, obj, objtype=None):
 
 
 def map_nested(function, data_struct, dict_only=False, map_tuple=False):
-  """Apply a function recursivelly to each element of a nested data struct."""
+  """Apply a function recursively to each element of a nested data struct."""
 
   # Could add support for more exotic data_struct, like OrderedDict
   if isinstance(data_struct, dict):
@@ -163,7 +163,7 @@ def as_proto_cls(proto_cls):
   """Simulate proto inheritance.
 
   By default, protobuf do not support direct inheritance, so this decorator
-  simulate inheriance to the class to which it is applied.
+  simulates inheritance to the class to which it is applied.
 
   Example:
 

diff --git a/tensorflow_datasets/core/utils/tf_utils.py b/tensorflow_datasets/core/utils/tf_utils.py
@@ -51,9 +51,9 @@ class TFGraphRunner(object):
   Limitations:
    * Currently the graph runner only support function with single input
      and output. Support for more complex function could be added and should be
-     relativelly straighforward.
-   * A different graph is created for each input shape, so isn't really addapted
-     for dynamic batch size.
+     relatively straightforward.
+   * A different graph is created for each input shape, so it isn't really
+     adapted for dynamic batch size.
 
   Usage:
     graph_runner = TFGraphRunner()
@@ -136,7 +136,7 @@ def assert_shape_match(shape1, shape2):
 
   Args:
     shape1 (tuple): Static shape
-    shape2 (tuple): Dyncamic shape (can contains None)
+    shape2 (tuple): Dynamic shape (can contain None)
   """
   shape1 = tf.TensorShape(shape1)
   shape2 = tf.TensorShape(shape2)

diff --git a/tensorflow_datasets/scripts/download_and_prepare.py b/tensorflow_datasets/scripts/download_and_prepare.py
@@ -16,7 +16,7 @@
 r"""Script to call download_and_prepare on DatasetBuilder.
 
 This is just a standalone script to generate a specific dataset. This can be
-used if you don't want to separate download/generation of dataset from acual
+used if you don't want to separate download/generation of dataset from actual
 usage.
 
 By default, the dataset is generated in the default location

diff --git a/...tasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/README.md b/...tasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/README.md
@@ -1,5 +1,5 @@
 Files in this directory have been created manually.
-They exibit the following properties seen on original dataset:
+They exhibit the following properties seen on original dataset:
 
  - a: notch
    1. none