Update TF documentation (ray-project#5918)

jbn · Oct 16, 2019 · d52a498 · d52a498
1 parent 9f23620
commit d52a498
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 12 deletions.
diff --git a/ci/jenkins_tests/run_multi_node_tests.sh b/ci/jenkins_tests/run_multi_node_tests.sh
@@ -26,6 +26,12 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     python /ray/doc/examples/plot_hyperparameter.py
 
+$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    python /ray/doc/examples/doc_code/torch_example.py
+
+$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    python /ray/doc/examples/doc_code/tf_example.py
+
 ######################## RLLIB TESTS #################################
 
 source $ROOT_DIR/run_rllib_tests.sh
@@ -56,12 +62,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     python /ray/python/ray/experimental/sgd/examples/tune_example.py --num-replicas=2
 
-$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    python /ray/doc/examples/doc_code/torch_example.py
-
-$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    python /ray/doc/examples/doc_code/tf_example.py
-
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     python /ray/python/ray/experimental/sgd/examples/tensorflow_train_example.py
 

diff --git a/doc/examples/doc_code/tf_example.py b/doc/examples/doc_code/tf_example.py
@@ -9,11 +9,11 @@
 
 # yapf: disable
 # __tf_model_start__
-import tensorflow as tf
 from tensorflow.keras import layers
 
 
 def create_keras_model():
+    import tensorflow as tf
     model = tf.keras.Sequential()
     # Adds a densely-connected layer with 64 units to the model:
     model.add(layers.Dense(64, activation="relu", input_shape=(32, )))
@@ -23,7 +23,7 @@ def create_keras_model():
     model.add(layers.Dense(10, activation="softmax"))
 
     model.compile(
-        optimizer=tf.train.RMSPropOptimizer(0.01),
+        optimizer=tf.keras.optimizers.RMSprop(0.01),
         loss=tf.keras.losses.categorical_crossentropy,
         metrics=[tf.keras.metrics.categorical_accuracy])
     return model

diff --git a/doc/source/tf_distributed_training.rst b/doc/source/tf_distributed_training.rst
@@ -1,9 +1,9 @@
-TensorFlow Distributed Training API (Experimental)
-==================================================
+TF Distributed Training
+=======================
 
 Ray's ``TFTrainer`` simplifies distributed model training for Tensorflow. The ``TFTrainer`` is a wrapper around ``MultiWorkerMirroredStrategy`` with a Python API to easily incorporate distributed training into a larger Python application, as opposed to write custom logic of setting environments and starting separate processes.
 
-.. important:: This API has only been tested with TensorFlow2.0rc.
+.. important:: This API has only been tested with TensorFlow2.0rc and is still highly experimental. Please file bug reports if you run into any - thanks!
 
 ----------
 

diff --git a/doc/source/using-ray-with-tensorflow.rst b/doc/source/using-ray-with-tensorflow.rst
@@ -1,7 +1,54 @@
 Best Practices: Ray with Tensorflow
 ===================================
 
-This document describes best practices for using Ray with TensorFlow. Feel free to contribute if you think this document is missing anything.
+This document describes best practices for using the Ray core APIs with TensorFlow. Ray also provides higher-level utilities for working with Tensorflow, such as distributed training APIs (`training tensorflow example`_), Tune for hyperparameter search (`Tune tensorflow example`_), RLlib for reinforcement learning (`RLlib tensorflow example`_).
+
+.. _`training tensorflow example`: tf_distributed_training.html
+.. _`Tune tensorflow example`: https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tf_mnist_example.py
+.. _`RLlib tensorflow example`: rllib-models.html#tensorflow-models
+
+Feel free to contribute if you think this document is missing anything.
+
+
+Common Issues: Pickling
+-----------------------
+
+One common issue with TensorFlow2.0 is a pickling error like the following:
+
+.. code-block::
+
+    File "/home/***/venv/lib/python3.6/site-packages/ray/actor.py", line 322, in remote
+      return self._remote(args=args, kwargs=kwargs)
+    File "/home/***/venv/lib/python3.6/site-packages/ray/actor.py", line 405, in _remote
+      self._modified_class, self._actor_method_names)
+    File "/home/***/venv/lib/python3.6/site-packages/ray/function_manager.py", line 578, in export_actor_class
+      "class": pickle.dumps(Class),
+    File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 1123, in dumps
+      cp.dump(obj)
+    File "/home/***/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 482, in dump
+      return Pickler.dump(self, obj)
+    File "/usr/lib/python3.6/pickle.py", line 409, in dump
+      self.save(obj)
+    File "/usr/lib/python3.6/pickle.py", line 476, in save
+      f(self, obj) # Call unbound method with explicit self
+    File "/usr/lib/python3.6/pickle.py", line 751, in save_tuple
+      save(element)
+    File "/usr/lib/python3.6/pickle.py", line 808, in _batch_appends
+      save(tmp[0])
+    File "/usr/lib/python3.6/pickle.py", line 496, in save
+      rv = reduce(self.proto)
+    TypeError: can't pickle _LazyLoader objects
+
+To resolve this, you should move all instances of ``import tensorflow`` into the Ray actor or function, as follows:
+
+.. code-block::
+
+    def create_model():
+        import tensorflow as tf
+        ...
+
+This issue is caused by side-effects of importing TensorFlow and setting global state.
+
 
 Use Actors for Parallel Models
 ------------------------------