Initial renames for TF 2.0 compatibility

PiperOrigin-RevId: 230017293
gdcollectdata · Jan 19, 2019 · 107d343 · 107d343
1 parent 54f5b66
commit 107d343
Show file tree

Hide file tree

Showing 56 changed files with 283 additions and 258 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -10,14 +10,7 @@ python:
   - "2.7"
   - "3.6"
 env:
-  global:
-    - TF_LATEST="1.10.*"
   matrix:
-    # We test against recent versions of TensorFlow and tf-nightly.
-    # If updating, also update TF_LATEST above
-    # TODO(rsepassi): Enable recent versions on release
-    # - TF_VERSION="1.9.*"
-    # - TF_VERSION="1.10.*"
     - TF_VERSION="tf-nightly"
 install:
   - ./oss_scripts/oss_pip_install.sh

diff --git a/docs/add_dataset.md b/docs/add_dataset.md
@@ -12,7 +12,7 @@ then this document is for you.
     *   [Manual download / extraction](#manual-download-extraction)
 *   [Specifying how the data should be split](#specifying-how-the-data-should-be-split)
 *   [Reading downloaded data and generating serialized dataset](#reading-downloaded-data-and-generating-serialized-dataset)
-    *   [File access and tf.gfile](#file-access-and-tfgfile)
+    *   [File access and tf.io.gfile](#file-access-and-tfgfile)
 *   [Dataset configuration](#dataset-configuration)
 *   [Create your own FeatureConnector](#create-your-own-featureconnector)
 *   [Adding the dataset to `tensorflow/datasets`](#adding-the-dataset-to-tensorflowdatasets)
@@ -230,11 +230,12 @@ jpeg content into the TFRecord file automatically.
 
 If you've implemented the test harness, your builder test should now pass.
 
-### File access and `tf.gfile`
+### File access and `tf.io.gfile`
 
-In order to support Cloud storage systems, all file access must use `tf.gfile`
-or other TensorFlow file APIs (for example, `tf.python_io`). Python built-ins
-for file operations (e.g. `open`, `os.rename`, `gzip`, etc.) must be avoided.
+In order to support Cloud storage systems, all file access must use
+`tf.io.gfile` or other TensorFlow file APIs (for example, `tf.python_io`).
+Python built-ins for file operations (e.g. `open`, `os.rename`, `gzip`, etc.)
+must be avoided.
 
 ## Dataset configuration
 

diff --git a/oss_scripts/oss_pip_install.sh b/oss_scripts/oss_pip_install.sh
@@ -6,13 +6,20 @@ set -e  # fail and exit on any command erroring
 : "${TF_VERSION:?}"
 
 # Install ffmpeg for Audio FeatureConnector tests
-sudo add-apt-repository -y ppa:mc3man/trusty-media
-sudo apt-get -qq update
-sudo apt-get install -y ffmpeg
+FFMPEG=$(command -v ffmpeg)
+if [[ -z "$FFMPEG" ]]
+then
+  sudo add-apt-repository -y ppa:mc3man/trusty-media
+  sudo apt-get -qq update
+  sudo apt-get install -y ffmpeg
+fi
 
 if [[ "$TF_VERSION" == "tf-nightly"  ]]
 then
-  pip install tf-nightly;
+  pip install -q tf-nightly;
+elif [[ "$TF_VERSION" == "tf2"  ]]
+then
+  pip install -q "tf-nightly-2.0-preview"
 else
   pip install -q "tensorflow==$TF_VERSION"
 fi

diff --git a/setup.py b/setup.py
@@ -32,6 +32,7 @@
 DOCLINES = __doc__.split('\n')
 
 REQUIRED_PKGS = [
+    'absl-py',
     'future',
     'promise',
     'protobuf>=3.6.1',
@@ -47,7 +48,6 @@
 ]
 
 TESTS_REQUIRE = [
-    'absl-py',
     'jupyter',
     'pytest',
 ]
@@ -77,7 +77,7 @@
 ]
 
 DATASET_EXTRAS = {
-    'librispeech': ['pydub'],
+    'librispeech': ['pydub'],  # and ffmpeg installed
 }
 
 all_dataset_extras = []

diff --git a/tensorflow_datasets/audio/librispeech.py b/tensorflow_datasets/audio/librispeech.py
@@ -234,7 +234,7 @@ def _generate_examples(self, dirs):
 def _walk_librispeech_dir(directory):
   """Walk a Librispeech directory and yield examples."""
   directory = os.path.join(directory, "LibriSpeech")
-  for path, _, files in tf.gfile.Walk(directory):
+  for path, _, files in tf.io.gfile.walk(directory):
     if not files:
       continue
 
@@ -244,7 +244,7 @@ def _walk_librispeech_dir(directory):
     assert len(transcript_file) == 1
     transcript_file, = transcript_file
     transcripts = {}
-    with tf.gfile.Open(os.path.join(path, transcript_file)) as f:
+    with tf.io.gfile.GFile(os.path.join(path, transcript_file)) as f:
       for line in f:
         line = line.strip()
         key, transcript = line.split(" ", 1)

diff --git a/tensorflow_datasets/core/dataset_builder.py b/tensorflow_datasets/core/dataset_builder.py
@@ -23,6 +23,7 @@
 import os
 import sys
 
+from absl import logging
 import six
 import tensorflow as tf
 
@@ -108,7 +109,7 @@ class DatasetBuilder(object):
   # And then the rest of your input pipeline
   train_dataset = train_dataset.repeat().shuffle(1024).batch(128)
   train_dataset = train_dataset.prefetch(2)
-  features = train_dataset.make_one_shot_iterator().get_next()
+  features = tf.compat.v1.data.make_one_shot_iterator(train_dataset).get_next()
   image, label = features['image'], features['label']
   ```
   """
@@ -160,7 +161,7 @@ def __init__(self, data_dir=None, config=None):
     self._data_dir = self._build_data_dir()
 
     # Use data version (restored from disk)
-    if tf.gfile.Exists(self._data_dir):
+    if tf.io.gfile.exists(self._data_dir):
       # Overwrite the current dataset info with the restored data version.
       self.info.read_from_directory(self._data_dir)
 
@@ -197,10 +198,10 @@ def download_and_prepare(self, download_dir=None, download_config=None):
     """
 
     download_config = download_config or download.DownloadConfig()
-    data_exists = tf.gfile.Exists(self._data_dir)
+    data_exists = tf.io.gfile.exists(self._data_dir)
     if (data_exists and
         download_config.download_mode == REUSE_DATASET_IF_EXISTS):
-      tf.logging.info("Reusing dataset %s (%s)", self.name, self._data_dir)
+      logging.info("Reusing dataset %s (%s)", self.name, self._data_dir)
       return
 
     dl_manager = self._make_download_manager(
@@ -216,7 +217,7 @@ def download_and_prepare(self, download_dir=None, download_config=None):
           "the same version {} already exists. If the dataset has changed, "
           "please update the version number.".format(self.name, self._data_dir,
                                                      self.info.version))
-    tf.logging.info("Generating dataset %s (%s)", self.name, self._data_dir)
+    logging.info("Generating dataset %s (%s)", self.name, self._data_dir)
     self._log_download_bytes()
 
     # Create a tmp dir and rename to self._data_dir on successful exit.
@@ -237,8 +238,8 @@ def download_and_prepare(self, download_dir=None, download_config=None):
         if download_config.compute_stats:
           already_has_stats = bool(self.info.num_examples)
           if already_has_stats:
-            tf.logging.info("Skipping computing stats because they are already "
-                            "populated.")
+            logging.info("Skipping computing stats because they are already "
+                         "populated.")
           else:
             self.info.compute_dynamic_properties()
 
@@ -286,7 +287,7 @@ def as_dataset(self,
       If `batch_size` is -1, will return feature dictionaries containing
       the entire dataset in `tf.Tensor`s instead of a `tf.data.Dataset`.
     """
-    if not tf.gfile.Exists(self._data_dir):
+    if not tf.io.gfile.exists(self._data_dir):
       raise AssertionError(
           ("Dataset %s: could not find data in %s. Please make sure to call "
            "dataset_builder.download_and_prepare(), or pass download=True to "
@@ -365,11 +366,11 @@ def _build_data_dir(self):
 
     def _other_versions_on_disk():
       """Returns previous versions on disk."""
-      if not tf.gfile.Exists(builder_data_dir):
+      if not tf.io.gfile.exists(builder_data_dir):
         return []
 
       version_dirnames = []
-      for dir_name in tf.gfile.ListDirectory(builder_data_dir):
+      for dir_name in tf.io.gfile.listdir(builder_data_dir):
         try:
           version_dirnames.append((utils.Version(dir_name), dir_name))
         except ValueError:  # Invalid version (ex: incomplete data dir)
@@ -382,14 +383,15 @@ def _other_versions_on_disk():
     if version_dirs:
       other_version = version_dirs[0][0]
       if other_version != self._version:
-        tf.logging.warn(
+        warn_msg = (
             "Found a different version {other_version} of dataset {name} in "
             "data_dir {data_dir}. Using currently defined version "
             "{cur_version}.".format(
                 other_version=str(other_version),
                 name=self.name,
                 data_dir=self._data_dir_root,
                 cur_version=str(self._version)))
+        logging.warn(warn_msg)
 
     return version_data_dir
 
@@ -479,8 +481,8 @@ def _create_builder_config(self, builder_config):
     """Create and validate BuilderConfig object."""
     if builder_config is None and self.BUILDER_CONFIGS:
       builder_config = self.BUILDER_CONFIGS[0]
-      tf.logging.info("No config specified, defaulting to first: %s/%s",
-                      self.name, builder_config.name)
+      logging.info("No config specified, defaulting to first: %s/%s", self.name,
+                   builder_config.name)
     if not builder_config:
       return
     if isinstance(builder_config, six.string_types):
@@ -494,7 +496,7 @@ def _create_builder_config(self, builder_config):
       raise ValueError("BuilderConfig must have a name, got %s" % name)
     is_custom = name not in self.builder_configs
     if is_custom:
-      tf.logging.warning("Using custom data configuration %s", name)
+      logging.warning("Using custom data configuration %s", name)
     else:
       if builder_config is not self.builder_configs[name]:
         raise ValueError(
@@ -639,10 +641,9 @@ def _generate_examples(self, **kwargs):
 
   def _download_and_prepare(self, dl_manager, max_examples_per_split=None):
     if max_examples_per_split is not None:
-      tf.logging.warn("Splits capped at %s examples max.",
-                      max_examples_per_split)
-    if not tf.gfile.Exists(self._data_dir):
-      tf.gfile.MakeDirs(self._data_dir)
+      logging.warn("Splits capped at %s examples max.", max_examples_per_split)
+    if not tf.io.gfile.exists(self._data_dir):
+      tf.io.gfile.makedirs(self._data_dir)
 
     # Generate the filenames and write the example on disk
     def make_generator_fn(**kwargs):
@@ -671,7 +672,7 @@ def generator_fn():
               "._split_generator()."
           )
 
-        tf.logging.info("Generating split %s", s.name)
+        logging.info("Generating split %s", s.name)
         split_dict.add(s)
 
       output_files = self._build_split_filenames(

diff --git a/tensorflow_datasets/core/dataset_builder_test.py b/tensorflow_datasets/core/dataset_builder_test.py
@@ -21,6 +21,7 @@
 
 import os
 
+from absl import logging
 from absl.testing import parameterized
 import tensorflow as tf
 from tensorflow_datasets.core import dataset_builder
@@ -103,7 +104,7 @@ def test_shared_generator(self):
 
       written_filepaths = [
           os.path.join(builder._data_dir, fname)
-          for fname in tf.gfile.ListDirectory(builder._data_dir)
+          for fname in tf.io.gfile.listdir(builder._data_dir)
       ]
       # The data_dir contains the cached directory by default
       expected_filepaths = builder._build_split_filenames(
@@ -154,10 +155,10 @@ def test_build_data_dir(self):
       version_dir = os.path.join(builder_data_dir, "1.0.0")
 
       # The dataset folder contains multiple other versions
-      tf.gfile.MakeDirs(os.path.join(builder_data_dir, "14.0.0.invalid"))
-      tf.gfile.MakeDirs(os.path.join(builder_data_dir, "10.0.0"))
-      tf.gfile.MakeDirs(os.path.join(builder_data_dir, "9.0.0"))
-      tf.gfile.MakeDirs(os.path.join(builder_data_dir, "0.1.0"))
+      tf.io.gfile.makedirs(os.path.join(builder_data_dir, "14.0.0.invalid"))
+      tf.io.gfile.makedirs(os.path.join(builder_data_dir, "10.0.0"))
+      tf.io.gfile.makedirs(os.path.join(builder_data_dir, "9.0.0"))
+      tf.io.gfile.makedirs(os.path.join(builder_data_dir, "0.1.0"))
 
       # The builder's version dir is chosen
       self.assertEqual(builder._build_data_dir(), version_dir)
@@ -170,7 +171,7 @@ def test_get_data_dir_with_config(self):
       builder_data_dir = os.path.join(tmp_dir, builder.name, config_name)
       version_data_dir = os.path.join(builder_data_dir, "0.0.1")
 
-      tf.gfile.MakeDirs(version_data_dir)
+      tf.io.gfile.makedirs(version_data_dir)
       self.assertEqual(builder._build_data_dir(), version_data_dir)
 
   def test_config_construction(self):
@@ -201,11 +202,11 @@ def test_with_configs(self):
       data_dir1 = os.path.join(tmp_dir, builder1.name, "plus1", "0.0.1")
       data_dir2 = os.path.join(tmp_dir, builder2.name, "plus2", "0.0.2")
       # Test that subdirectories were created per config
-      self.assertTrue(tf.gfile.Exists(data_dir1))
-      self.assertTrue(tf.gfile.Exists(data_dir2))
+      self.assertTrue(tf.io.gfile.exists(data_dir1))
+      self.assertTrue(tf.io.gfile.exists(data_dir2))
       # 2 train shards, 1 test shard, plus metadata files
-      self.assertGreater(len(tf.gfile.ListDirectory(data_dir1)), 3)
-      self.assertGreater(len(tf.gfile.ListDirectory(data_dir2)), 3)
+      self.assertGreater(len(tf.io.gfile.listdir(data_dir1)), 3)
+      self.assertGreater(len(tf.io.gfile.listdir(data_dir2)), 3)
 
       # Test that the config was used and they didn't collide.
       splits_list = [splits_lib.Split.TRAIN, splits_lib.Split.TEST]

diff --git a/tensorflow_datasets/core/dataset_info.py b/tensorflow_datasets/core/dataset_info.py
@@ -38,6 +38,7 @@
 import os
 import pprint
 
+from absl import logging
 import numpy as np
 import tensorflow as tf
 
@@ -178,8 +179,8 @@ def supervised_keys(self):
   def splits(self):
     if not self._fully_initialized:
       # TODO(epot): Consider raising an error here instead?
-      tf.logging.info("`splits` hasn't been fully initialized, statistics maybe"
-                      " missing.")
+      logging.info("`splits` hasn't been fully initialized, statistics maybe"
+                   " missing.")
     return self._splits.copy()
 
   @splits.setter
@@ -246,11 +247,10 @@ def _compute_dynamic_properties(self, builder):
       except tf.errors.InvalidArgumentError:
         # This means there is no such split, even though it was specified in the
         # info, the least we can do is to log this.
-        tf.logging.error((
-            "%s's info() property specifies split %s, but it "
-            "doesn't seem to have been generated. Please ensure "
-            "that the data was downloaded for this split and re-run "
-            "download_and_prepare."), self.name, split_name)
+        logging.error(("%s's info() property specifies split %s, but it "
+                       "doesn't seem to have been generated. Please ensure "
+                       "that the data was downloaded for this split and re-run "
+                       "download_and_prepare."), self.name, split_name)
         raise
 
     # Set splits to trigger proto update in setter
@@ -275,7 +275,8 @@ def write_to_directory(self, dataset_info_dir):
     if self.features:
       self.features.save_metadata(dataset_info_dir)
 
-    with tf.gfile.Open(self._dataset_info_filename(dataset_info_dir), "w") as f:
+    with tf.io.gfile.GFile(self._dataset_info_filename(dataset_info_dir),
+                           "w") as f:
       f.write(self.as_json)
 
   def read_from_directory(self, dataset_info_dir, from_packaged_data=False):
@@ -302,10 +303,10 @@ def read_from_directory(self, dataset_info_dir, from_packaged_data=False):
     json_filename = self._dataset_info_filename(dataset_info_dir)
 
     # Load the metadata from disk
-    if not tf.gfile.Exists(json_filename):
+    if not tf.io.gfile.exists(json_filename):
       return False
 
-    with tf.gfile.Open(json_filename, "r") as f:
+    with tf.io.gfile.GFile(json_filename, "r") as f:
       dataset_info_json_str = f.read()
 
     # Parse it back into a proto.
@@ -486,7 +487,7 @@ def get_dataset_feature_statistics(builder, split):
     # proto has no support for it.
     maybe_feature_shape = output_shapes_dict[feature_name]
     if not isinstance(maybe_feature_shape, tf.TensorShape):
-      tf.logging.error(
+      logging.error(
           "Statistics generation doesn't work for nested structures yet")
       continue
 

diff --git a/tensorflow_datasets/core/dataset_info_test.py b/tensorflow_datasets/core/dataset_info_test.py
@@ -113,15 +113,15 @@ def test_writing(self):
     info.read_from_directory(_INFO_DIR)
 
     # Read the json file into a string.
-    with tf.gfile.Open(info._dataset_info_filename(_INFO_DIR)) as f:
+    with tf.io.gfile.GFile(info._dataset_info_filename(_INFO_DIR)) as f:
       existing_json = json.load(f)
 
     # Now write to a temp directory.
     with test_utils.tmp_dir(self.get_temp_dir()) as tmp_dir:
       info.write_to_directory(tmp_dir)
 
       # Read the newly written json file into a string.
-      with tf.gfile.Open(info._dataset_info_filename(tmp_dir)) as f:
+      with tf.io.gfile.GFile(info._dataset_info_filename(tmp_dir)) as f:
         new_json = json.load(f)
 
     # Assert what was read and then written and read again is the same.