From b59833c3fd91511b33255369016868e4ae6cda2e Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Thu, 24 May 2018 19:12:26 -0700
Subject: [PATCH] Merge changes from github. Revert #18413. Too many internal
 test failures due to the name scope change caused by this change. Revert
 #18192. Cannot use re2::StringPiece internally. Need alternative for set
 call. Will pull and clean this up in a separate change.

PiperOrigin-RevId: 197991247
---
 CONTRIBUTING.md                               |  11 +
 README.md                                     |  39 +-
 RELEASE.md                                    |   7 +-
 SECURITY.md                                   |   2 +-
 configure.py                                  |   3 +
 .../jit/encapsulate_subgraphs_pass.cc         |   2 +-
 tensorflow/compiler/xla/README.md             |   8 +-
 .../xla/service/conditional_simplifier.cc     |   2 +-
 .../compiler/xla/service/copy_insertion.cc    |   2 +-
 .../compiler/xla/service/cpu/ir_function.h    |   4 +-
 .../xla/service/cpu/shape_partition.h         |   2 +-
 .../compiler/xla/service/despecializer.h      |   2 +-
 .../xla/service/gpu/ir_emitter_unnested.h     |   2 +-
 .../compiler/xla/service/hlo_evaluator.cc     |   1 +
 .../xla/service/interpreter/README.md         |   2 +-
 .../compiler/xla/service/layout_assignment.h  |   4 +-
 .../xla/service/reduce_precision_insertion.cc |   2 +-
 .../compiler/xla/service/source_map_util.h    |   2 +-
 tensorflow/compiler/xla/shape_util.h          |   2 +-
 .../compiler/xla/tests/dot_operation_test.cc  |  18 +-
 tensorflow/compiler/xla/tests/tuple_test.cc   |   2 +-
 tensorflow/compiler/xla/xlalogo.png           | Bin 0 -> 46785 bytes
 tensorflow/contrib/autograph/impl/config.py   |   2 +-
 .../autograph/operators/control_flow.py       |   2 +-
 .../python/training/functions/gbdt_batch.py   |   2 +-
 tensorflow/contrib/cmake/CMakeLists.txt       |  29 +-
 tensorflow/contrib/cmake/external/zlib.cmake  |   3 +-
 tensorflow/contrib/cmake/tf_tests.cmake       |   2 +
 .../contrib/data/python/kernel_tests/BUILD    |   4 +
 .../data/python/kernel_tests/resample_test.py | 109 +++-
 tensorflow/contrib/data/python/ops/BUILD      |   2 +
 .../contrib/data/python/ops/resampling.py     | 265 ++++++---
 .../ops/bijectors/cholesky_outer_product.py   |   2 +-
 tensorflow/contrib/eager/README.md            |   2 +-
 tensorflow/contrib/ffmpeg/ffmpeg_lib.h        |   2 +-
 .../python/ops/critical_section_ops.py        |   2 +-
 .../estimator/python/gan_estimator_impl.py    |   7 +-
 .../estimator/python/gan_estimator_test.py    |  11 +
 .../gan/python/estimator/python/head_impl.py  |  45 +-
 .../gan/python/estimator/python/head_test.py  |   7 +-
 .../features/python/conditioning_utils.py     |   2 +-
 tensorflow/contrib/graph_editor/transform.py  |   2 +-
 .../hvx_ops_support_checker_main.cc           |   2 +-
 tensorflow/contrib/image/__init__.py          |   2 +-
 tensorflow/contrib/kfac/examples/convnet.py   |   2 +-
 .../contrib/kfac/python/ops/optimizer.py      |   6 +-
 .../contrib/kfac/python/ops/placement.py      |   2 +-
 .../contrib/layers/python/layers/layers.py    | 142 ++++-
 .../layers/python/layers/layers_test.py       |  15 +-
 .../learn/utils/saved_model_export_utils.py   |   3 +-
 tensorflow/contrib/lite/BUILD                 |   2 -
 tensorflow/contrib/lite/Makefile              |  19 +-
 .../contrib/lite/examples/minimal/minimal.cc  |  71 +++
 tensorflow/contrib/lite/g3doc/rpi.md          |   2 +-
 .../internal/optimized/optimized_ops.h        |   2 +-
 .../internal/reference/reference_ops.h        |   4 +-
 tensorflow/contrib/lite/schema/schema.fbs     |   2 +-
 tensorflow/contrib/lite/schema/schema_v0.fbs  |   2 +-
 tensorflow/contrib/lite/schema/schema_v1.fbs  |   2 +-
 tensorflow/contrib/lite/schema/schema_v2.fbs  |   2 +-
 tensorflow/contrib/lite/schema/schema_v3.fbs  |   4 +-
 .../contrib/lite/testing/generate_examples.py |   4 +-
 .../contrib/lite/testing/tflite_driver.cc     |   4 +-
 .../lite/toco/g3doc/cmdline_examples.md       |   4 +-
 .../contrib/lite/toco/tflite/operator.h       |   4 +-
 tensorflow/contrib/lite/toco/toco_flags.proto |   2 +-
 .../elastic_average_optimizer_test.py         |   2 +-
 .../training/model_average_optimizer_test.py  |   4 +-
 .../contrib/signal/python/ops/window_ops.py   |   4 +-
 .../python/slim/data/tfexample_decoder.py     |   2 +-
 .../contrib/slim/python/slim/learning.py      |   2 +-
 .../tensorboard/db/summary_db_writer.cc       |  22 +-
 .../tensorboard/db/summary_db_writer_test.cc  |  50 ++
 tensorflow/contrib/tensorrt/BUILD             |  55 +-
 .../contrib/tensorrt/convert/convert_graph.cc | 123 +++--
 .../contrib/tensorrt/convert/convert_graph.h  |  10 +
 .../contrib/tensorrt/convert/convert_nodes.cc | 501 ++++++++----------
 .../contrib/tensorrt/convert/convert_nodes.h  |  14 +-
 .../tensorrt/convert/trt_optimization_pass.cc | 246 +++++++++
 .../tensorrt/convert/trt_optimization_pass.h  |  73 +++
 .../tensorrt/custom_plugin_examples/BUILD     | 118 +++++
 .../custom_plugin_examples/__init__.py        |  24 +
 .../tensorrt/custom_plugin_examples/inc_op.py |  32 ++
 .../inc_op_kernel.cu.cc                       |  84 +++
 .../custom_plugin_examples/inc_op_kernel.h    |  35 ++
 .../custom_plugin_examples/inc_op_plugin.cc   |  86 +++
 .../custom_plugin_examples/inc_op_plugin.h    | 102 ++++
 .../custom_plugin_examples/ops/inc_op.cc      |  36 ++
 .../custom_plugin_examples/plugin_test.py     |  95 ++++
 .../contrib/tensorrt/kernels/trt_engine_op.cc |  54 +-
 .../contrib/tensorrt/kernels/trt_engine_op.h  |  11 +-
 tensorflow/contrib/tensorrt/log/trt_logger.h  |   2 +-
 .../contrib/tensorrt/plugin/trt_plugin.cc     | 106 ++++
 .../contrib/tensorrt/plugin/trt_plugin.h      |  74 +++
 .../tensorrt/plugin/trt_plugin_factory.cc     |  78 +++
 .../tensorrt/plugin/trt_plugin_factory.h      | 102 ++++
 .../plugin/trt_plugin_factory_test.cc         | 125 +++++
 .../tensorrt/plugin/trt_plugin_utils.cc       |  42 ++
 .../tensorrt/plugin/trt_plugin_utils.h        |  46 ++
 .../tensorrt/resources/trt_allocator.cc       |  62 +++
 .../tensorrt/resources/trt_allocator.h        |  68 +++
 .../tensorrt/resources/trt_resources.h        |  44 +-
 .../contrib/tensorrt/segment/segment.cc       | 379 +++++++++++--
 tensorflow/contrib/tensorrt/segment/segment.h |  18 +-
 .../contrib/tensorrt/segment/segment_test.cc  |  16 +-
 .../contrib/tensorrt/shape_fn/trt_shfn.cc     |   4 +-
 .../contrib/tensorrt/test/test_tftrt.py       |  64 ++-
 .../tensorrt/test/tf_trt_integration_test.py  |  19 +-
 .../contrib/tpu/python/tpu/tpu_context.py     |   2 +-
 tensorflow/contrib/verbs/README.md            |   2 +-
 tensorflow/core/BUILD                         |   7 +-
 .../base_api/api_def_RegexFullMatch.pbtxt     |  30 ++
 .../python_api/api_def_RegexFullMatch.pbtxt   |   4 +
 tensorflow/core/common_runtime/broadcaster.cc |   4 +-
 .../core/common_runtime/buf_rendezvous.h      |   2 +-
 .../core/common_runtime/ring_reducer.cc       |   2 +-
 .../common_runtime/scoped_allocator_mgr.cc    |   2 +-
 tensorflow/core/debug/debug_io_utils.cc       |   2 +-
 .../rpc/grpc_worker_cache.cc                  |   2 +-
 tensorflow/core/example/example.proto         |   2 +-
 .../example_parser_configuration.proto        |   1 +
 tensorflow/core/example/feature.proto         |   2 +-
 .../framework/allocation_description.proto    |   1 +
 tensorflow/core/framework/api_def.proto       |   1 +
 tensorflow/core/framework/attr_value.proto    |   2 +-
 tensorflow/core/framework/cost_graph.proto    |   2 +-
 .../core/framework/device_attributes.proto    |   1 +
 tensorflow/core/framework/function.proto      |   2 +-
 tensorflow/core/framework/graph.proto         |   2 +-
 .../core/framework/graph_transfer_info.proto  |   2 +-
 tensorflow/core/framework/iterator.proto      |   1 +
 tensorflow/core/framework/kernel_def.proto    |   2 +-
 tensorflow/core/framework/log_memory.proto    |   2 +-
 tensorflow/core/framework/node_def.proto      |   2 +-
 tensorflow/core/framework/op_def.proto        |   2 +-
 tensorflow/core/framework/op_gen_lib.h        |   4 +-
 tensorflow/core/framework/op_kernel.h         |   2 +-
 tensorflow/core/framework/reader_base.proto   |   1 +
 .../remote_fused_graph_execute_info.proto     |   2 +-
 .../core/framework/resource_handle.proto      |   1 +
 tensorflow/core/framework/step_stats.proto    |   2 +-
 tensorflow/core/framework/summary.proto       |   2 +-
 tensorflow/core/framework/tensor.proto        |   2 +-
 .../core/framework/tensor_description.proto   |   2 +-
 tensorflow/core/framework/tensor_shape.proto  |   1 +
 tensorflow/core/framework/tensor_slice.proto  |   1 +
 tensorflow/core/framework/types.proto         |   1 +
 tensorflow/core/framework/variable.proto      |   1 +
 tensorflow/core/framework/versions.proto      |   1 +
 tensorflow/core/graph/mkl_layout_pass_test.cc |  27 +
 tensorflow/core/graph/while_context.h         |   2 +-
 .../core/grappler/costs/graph_properties.cc   |   2 +-
 .../core/grappler/costs/virtual_scheduler.h   |   2 +-
 .../grappler/optimizers/layout_optimizer.cc   |   2 +-
 tensorflow/core/kernels/BUILD                 |   8 +
 .../core/kernels/batch_matmul_op_impl.h       | 106 +++-
 .../core/kernels/batch_matmul_op_real.cc      |   4 +
 .../adaptive_shared_batch_scheduler.h         |   2 +-
 tensorflow/core/kernels/conv_grad_ops_3d.cc   |   4 +-
 tensorflow/core/kernels/conv_ops_gpu_3.cu.cc  |   2 +-
 tensorflow/core/kernels/nth_element_op.cc     |   2 +-
 .../core/kernels/regex_full_match_op.cc       |  59 +++
 tensorflow/core/kernels/roll_op.cc            |   2 +-
 .../core/kernels/segment_reduction_ops.cc     |   4 +-
 .../core/kernels/segment_reduction_ops.h      |   2 +-
 tensorflow/core/lib/core/error_codes.proto    |   1 +
 tensorflow/core/ops/image_ops.cc              |  19 +
 tensorflow/core/ops/image_ops_test.cc         |  19 +
 tensorflow/core/ops/math_ops.cc               |   2 +-
 tensorflow/core/ops/nn_ops.cc                 |   3 +-
 tensorflow/core/ops/random_ops.cc             |  10 +-
 tensorflow/core/ops/string_ops.cc             |  11 +
 .../core/platform/cloud/gcs_file_system.cc    |   2 +-
 tensorflow/core/platform/cloud/gcs_throttle.h |   2 +-
 .../core/profiler/g3doc/command_line.md       |   2 +-
 tensorflow/core/protobuf/cluster.proto        |   1 +
 tensorflow/core/protobuf/config.proto         |   2 +-
 tensorflow/core/protobuf/control_flow.proto   |   1 +
 .../core/protobuf/critical_section.proto      |   1 +
 tensorflow/core/protobuf/debug.proto          |   1 +
 .../core/protobuf/device_properties.proto     |   1 +
 tensorflow/core/protobuf/master.proto         |   2 +-
 tensorflow/core/protobuf/master_service.proto |   2 +-
 tensorflow/core/protobuf/meta_graph.proto     |   2 +-
 tensorflow/core/protobuf/named_tensor.proto   |   2 +-
 tensorflow/core/protobuf/queue_runner.proto   |   2 +-
 .../core/protobuf/rewriter_config.proto       |   3 +-
 tensorflow/core/protobuf/saved_model.proto    |   2 +-
 tensorflow/core/protobuf/saver.proto          |   1 +
 tensorflow/core/protobuf/tensor_bundle.proto  |   2 +-
 .../core/protobuf/tensorflow_server.proto     |   2 +-
 tensorflow/core/protobuf/worker.proto         |   2 +-
 tensorflow/core/protobuf/worker_service.proto |   2 +-
 tensorflow/core/public/version.h              |   2 +-
 tensorflow/core/util/cuda_device_functions.h  |   2 +-
 tensorflow/core/util/mkl_util.h               |   2 +-
 tensorflow/core/util/tensor_format.h          |   2 +-
 .../api_guides/python/reading_data.md         |   2 +-
 tensorflow/docs_src/community/benchmarks.md   |  18 +-
 tensorflow/docs_src/community/swift.md        |   2 +-
 tensorflow/docs_src/deploy/s3.md              |   2 +-
 tensorflow/docs_src/extend/adding_an_op.md    |  63 ++-
 tensorflow/docs_src/extend/architecture.md    |  14 +-
 tensorflow/docs_src/install/install_c.md      |   2 +-
 tensorflow/docs_src/install/install_go.md     |   2 +-
 tensorflow/docs_src/install/install_java.md   |  22 +-
 tensorflow/docs_src/install/install_linux.md  |  18 +-
 tensorflow/docs_src/install/install_mac.md    |  10 +-
 .../docs_src/install/install_sources.md       |   4 +-
 tensorflow/docs_src/mobile/mobile_intro.md    |   2 +-
 tensorflow/docs_src/mobile/tflite/index.md    |   2 +-
 tensorflow/docs_src/programmers_guide/faq.md  |  17 +-
 .../docs_src/programmers_guide/tensors.md     |   6 +-
 .../docs_src/programmers_guide/variables.md   |   2 +-
 tensorflow/docs_src/tutorials/layers.md       |   1 -
 .../examples/learn/text_classification_cnn.py |   2 +-
 tensorflow/go/op/wrappers.go                  |   2 +-
 tensorflow/python/data/util/nest.py           |   2 +-
 tensorflow/python/debug/cli/curses_ui.py      |  36 +-
 tensorflow/python/estimator/estimator.py      |   2 +-
 .../inputs/queues/feeding_functions.py        |   2 +-
 tensorflow/python/estimator/keras.py          |   2 +-
 tensorflow/python/estimator/training.py       |   2 +-
 .../python/feature_column/feature_column.py   |   6 +-
 .../python/framework/fast_tensor_util.pyx     |  12 +
 tensorflow/python/framework/ops.py            |  26 +-
 tensorflow/python/framework/tensor_util.py    |  12 +-
 tensorflow/python/framework/test_util.py      |   2 +-
 tensorflow/python/keras/utils/__init__.py     |   1 +
 tensorflow/python/kernel_tests/BUILD          |  12 +
 tensorflow/python/kernel_tests/conv1d_test.py |   4 +-
 .../kernel_tests/conv3d_transpose_test.py     |  17 +
 .../kernel_tests/distributions/util_test.py   |   2 +-
 .../python/kernel_tests/manip_ops_test.py     |   2 +-
 .../kernel_tests/regex_full_match_op_test.py  |  54 ++
 .../segment_reduction_ops_test.py             |  10 +-
 tensorflow/python/layers/base.py              |  14 +-
 tensorflow/python/layers/base_test.py         |  16 +
 tensorflow/python/ops/math_ops.py             |   2 +-
 tensorflow/python/ops/string_ops.py           |   2 +
 .../python/profiler/model_analyzer_test.py    |   7 +-
 tensorflow/python/saved_model/builder_impl.py |   5 +-
 tensorflow/python/training/distribute.py      |   2 +-
 tensorflow/python/training/saver.py           |   2 +-
 tensorflow/python/util/tf_inspect.py          |   2 +-
 tensorflow/python/util/util.cc                |   2 +-
 tensorflow/python/util/util.h                 |   2 +-
 tensorflow/stream_executor/blas.h             |  14 +
 tensorflow/stream_executor/cuda/cuda_blas.cc  | 106 +++-
 tensorflow/stream_executor/cuda/cuda_blas.h   |   6 +-
 tensorflow/stream_executor/stream.cc          |  34 ++
 tensorflow/stream_executor/stream.h           |  14 +
 tensorflow/tensorflow.bzl                     |   4 +-
 tensorflow/tools/api/generator/BUILD          |   1 +
 tensorflow/tools/api/golden/tensorflow.pbtxt  |   4 +
 .../tools/api/golden/tensorflow.strings.pbtxt |   7 +
 .../ci_build/install/install_pip_packages.sh  |  11 +-
 tensorflow/tools/docker/Dockerfile.devel      |   2 +-
 .../tools/docker/Dockerfile.devel-cpu-mkl     |   4 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu  |   2 +-
 tensorflow/tools/graph_transforms/README.md   |   2 +-
 .../tools/pip_package/build_pip_package.sh    |   2 +-
 tensorflow/tools/pip_package/setup.py         |   4 +-
 third_party/examples/eager/spinn/README.md    |   2 +-
 third_party/gpus/cuda_configure.bzl           |   2 +-
 third_party/mkl/BUILD                         |   2 +
 266 files changed, 4146 insertions(+), 940 deletions(-)
 create mode 100644 tensorflow/compiler/xla/xlalogo.png
 create mode 100644 tensorflow/contrib/lite/examples/minimal/minimal.cc
 create mode 100644 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
 create mode 100644 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc
 create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin.cc
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin.h
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc
 create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h
 create mode 100644 tensorflow/contrib/tensorrt/resources/trt_allocator.cc
 create mode 100644 tensorflow/contrib/tensorrt/resources/trt_allocator.h
 create mode 100644 tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_RegexFullMatch.pbtxt
 create mode 100644 tensorflow/core/kernels/regex_full_match_op.cc
 create mode 100644 tensorflow/python/kernel_tests/regex_full_match_op_test.py
 create mode 100644 tensorflow/tools/api/golden/tensorflow.strings.pbtxt

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3dad41a88c8212..8669c25c452b53 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,16 @@
 # Contributing guidelines
 
+## Pull Request Checklist
+
+Before sending your pull requests, make sure you followed this list.
+
+- Read [contributing guidelines](CONTRIBUTING.md).
+- Read [Code of Conduct](CODE_OF_CONDUCT.md).
+- Ensure you have signed the [Contributor License Agreement (CLA)](https://cla.developers.google.com/).
+- Check if my changes are consistent with the [guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#general-guidelines-and-philosophy-for-contribution).
+- Changes are consistent with the [Coding Style](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#c-coding-style).
+- Run [Unit Tests](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#running-unit-tests).
+
 ## How to become a contributor and submit your own code
 
 ### Contributor License Agreements
diff --git a/README.md b/README.md
index e1a50c87e26d49..6fb4486d0de9ff 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,9 @@
 -----------------
 
 
-| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** |
-|-----------------|---------------------|------------------|-------------------|---------------|---------------|
-| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion)
+| **`Documentation`** |
+|-----------------|
+| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) |
 
 **TensorFlow** is an open source software library for numerical computation using
 data flow graphs.  The graph nodes represent mathematical operations, while
@@ -40,15 +40,6 @@ environment to install the nightly TensorFlow build. We support CPU and GPU
 packages on Linux, Mac, and Windows.
 
 
-**Individual whl files**
-* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/))
-* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/))
-* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
-* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/))
-* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/))
-* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/)
-([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/))
-
 #### *Try your first TensorFlow program*
 ```shell
 $ python
@@ -82,6 +73,30 @@ The TensorFlow project strives to abide by generally accepted best practices in
 
 [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486)
 
+
+## Continuous build status
+
+### Official Builds
+
+| Build Type      | Status | Artifacts |
+| ---             | ---    | ---       |
+| **Linux CPU**   | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | [pypi](https://pypi.org/project/tf-nightly/) |
+| **Linux GPU**   | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
+| **Linux XLA**   | TBA | TBA |
+| **MacOS**       | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [pypi](https://pypi.org/project/tf-nightly/) |
+| **Windows CPU** | [![Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [pypi](https://pypi.org/project/tf-nightly/) |
+| **Windows GPU** | [![Status](http://ci.tensorflow.org/job/tf-master-win-gpu-cmake/badge/icon)](http://ci.tensorflow.org/job/tf-master-win-gpu-cmake/) | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
+| **Android**     | [![Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) | [![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg)](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) [build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) |
+
+
+### Community Supported Builds
+
+| Build Type      | Status | Artifacts |
+| ---             | ---    | ---       |
+| **IBM s390x**       | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA |
+| **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA |
+
+
 ## For more information
 
 * [TensorFlow Website](https://www.tensorflow.org)
diff --git a/RELEASE.md b/RELEASE.md
index 2717c75740aeea..84d9d52868ecd5 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -6,7 +6,7 @@
 * Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor.
 * Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability.
 * `tf.contrib.bayesflow` is moving out to it's own repo.
-* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication.
+* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication<sup>[1](#rpc-issue)</sup>.
 
 ## Bug Fixes and Other Changes
 * `tf.data`:
@@ -49,13 +49,14 @@
   * Fix non-uniformity of orthogonal matrices.
   * Fix bug where multi-image Estimator eval summaries were not displayed correctly.
 
+<a name="rpc-issue"><sup>1</sup></a> The cancellation logic of the RPC op contains a concurrency error. A fix has been submitted to master and will be part of the next release.
+
 ## Thanks to our Contributors
 
 This release contains contributions from many people at Google, as well as:
 
 4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu
 
-
 # Release 1.7.0
 
 ## Major Features And Improvements
@@ -235,7 +236,7 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田
   * Add `complex64` support to XLA compiler.
   * `bfloat` support is now added to XLA infrastructure.
   * Make `ClusterSpec` propagation work with XLA devices.
-  * Use a determinisitic executor to generate XLA graph.
+  * Use a deterministic executor to generate XLA graph.
 * `tf.contrib`:
   * `tf.contrib.distributions`:
     * Add `tf.contrib.distributions.Autoregressive`.
diff --git a/SECURITY.md b/SECURITY.md
index a5ce3a62ee202f..01886b613e5d93 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -173,7 +173,7 @@ the progress being made towards a fix and announcement.
 In addition, please include the following information along with your report:
 
 * Your name and affiliation (if any).
-* A description the technical details of the vulnerabilities. It is very
+* A description of the technical details of the vulnerabilities. It is very
   important to let us know how we can reproduce your findings.
 * An explanation who can exploit this vulnerability, and what they gain when
   doing so -- write an attack scenario. This will help us evaluate your report
diff --git a/configure.py b/configure.py
index 3a7f7b3de28fe7..b6c32543cf7079 100644
--- a/configure.py
+++ b/configure.py
@@ -1222,6 +1222,9 @@ def set_tf_cuda_compute_capabilities(environ_cp):
         ask_cuda_compute_capabilities, default_cuda_compute_capabilities)
     # Check whether all capabilities from the input is valid
     all_valid = True
+    # Remove all whitespace characters before splitting the string
+    # that users may insert by accident, as this will result in error 
+    tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split())
     for compute_capability in tf_cuda_compute_capabilities.split(','):
       m = re.match('[0-9]+.[0-9]+', compute_capability)
       if not m:
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index f06debaf316c01..6d1e3325ebd35b 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -240,7 +240,7 @@ class Encapsulator {
   // Once edges between compiled and outside_compilation clusters have been
   // replaced by send/recv ops, some dependencies may no longer be apparent.
   // A clustering pass finds all the dependencies between HC nodes that are only
-  // present as a result of edges between nodes in outside_compilaton clusters.
+  // present as a result of edges between nodes in outside_compilation clusters.
   // Suppose there is a path from outside_compilation cluster C in subgraph S
   // to outside_compilation cluster D in subgraph T. If S != T then a control
   // edge is added from the call node for S to the call node for T, which
diff --git a/tensorflow/compiler/xla/README.md b/tensorflow/compiler/xla/README.md
index c93c39e180655e..39f8caaa961dc7 100644
--- a/tensorflow/compiler/xla/README.md
+++ b/tensorflow/compiler/xla/README.md
@@ -1 +1,7 @@
-This is the home of XLA.
+<p align="center">
+  <img width="200" src="xlalogo.png"/>
+</p>
+
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear
+algebra that optimizes TensorFlow computations. See the
+[documentation](https://www.tensorflow.org/performance/xla/) for more details.
diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc
index e560abc87f8456..e9ec796121fff2 100644
--- a/tensorflow/compiler/xla/service/conditional_simplifier.cc
+++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc
@@ -35,7 +35,7 @@ namespace xla {
 
 // Tries to replace a conditional with a call operation of the corresponding
 // computation. If the given conditional has a constant predicate, tries to
-// replace it with a call to its true/false computation as appropirate and then
+// replace it with a call to its true/false computation as appropriate and then
 // inline that computation.
 //
 // Returns true if it made a change to the graph.
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index dce201456479c1..33d8338809d4e8 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -64,7 +64,7 @@ struct SpecialCaseCopyPolicy {
   // output tuple.
   bool copy_root_replicated_buffers = false;
   // If true, insert a copy if a buffer coming from a constant or a parameter
-  // is found wihtin the output tuple.
+  // is found within the output tuple.
   bool copy_parameters_and_constants = false;
 };
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h
index 557aa4a6bfc2ef..2e55181eed867a 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.h
@@ -33,8 +33,8 @@ namespace cpu {
 // emitters for function and function argument access.
 // The llvm::Function is created with the standard function signature
 // used in the XLA CPU backend (see ir_function.cc for argument details).
-// In addtion IrFunction saves the callers IR insert point during contruction,
-// and restores it after desctruction.
+// In addition IrFunction saves the callers IR insert point during construction,
+// and restores it after destruction.
 //
 // Example usage:
 //
diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.h b/tensorflow/compiler/xla/service/cpu/shape_partition.h
index 33d02b70e61e33..db2cda2936c834 100644
--- a/tensorflow/compiler/xla/service/cpu/shape_partition.h
+++ b/tensorflow/compiler/xla/service/cpu/shape_partition.h
@@ -38,7 +38,7 @@ namespace cpu {
 //
 //     [0, 1), [1, 2), [2, 3), [3, 4), [4, 5) [5, 8)
 //
-//   Note that the last partition has residule because the dimension size is
+//   Note that the last partition has residual because the dimension size is
 //   not a multiple of the partition count.
 //
 //
diff --git a/tensorflow/compiler/xla/service/despecializer.h b/tensorflow/compiler/xla/service/despecializer.h
index af48f4ab6e506d..cc1695b7f86380 100644
--- a/tensorflow/compiler/xla/service/despecializer.h
+++ b/tensorflow/compiler/xla/service/despecializer.h
@@ -25,7 +25,7 @@ namespace xla {
 
 // Creates an HloPassPipeline containing multiple HloPasses that can
 // despecialize an optimized HloModule. This is useful to run an HloModule
-// optimized for one specfic platform on a different platform (undoing platform
+// optimized for one specific platform on a different platform (undoing platform
 // specific passes) with matching numerics for comparison.
 //
 // Current despecialization passes are Defuser, ImplicitBroadcastRemover,
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index a1d4dca5e0fc52..b41eaa303b0aad 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -38,7 +38,7 @@ namespace gpu {
 //
 // Examples of things that are not unnested computations:
 //
-//  - The reducer of a kReduce HLO.  This is emited using IrEmitterNested.
+//  - The reducer of a kReduce HLO.  This is emitted using IrEmitterNested.
 //  - The body of a fusion node.  IrEmitterUnenested emits the relevant code
 //    within a kernel function using FusedIrEmitter.  (FusedIrEmitter is not
 //    really an IrEmitter, but is more an "IR generator generator".)
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 2beac3227e4537..fa59a5fb2030b2 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -135,6 +135,7 @@ StatusOr<std::unique_ptr<Literal>> Compare<complex64>(
 
 }  // namespace
 
+
 HloEvaluator::HloEvaluator(int64 max_loop_iterations)
     : max_loop_iterations_(max_loop_iterations) {
   typed_visitors_[PRED] = MakeUnique<HloEvaluatorTypedVisitor<bool>>(this);
diff --git a/tensorflow/compiler/xla/service/interpreter/README.md b/tensorflow/compiler/xla/service/interpreter/README.md
index 4c19a1b916d421..0b21b251c3f663 100644
--- a/tensorflow/compiler/xla/service/interpreter/README.md
+++ b/tensorflow/compiler/xla/service/interpreter/README.md
@@ -5,7 +5,7 @@ evaluating the result of the HLO graph directly with HloEvaluator, without
 lowering it further (to LLVM IR for example) before execution as other backends
 (CPU and GPU for example) do.
 
-Its key componenets are:
+Its key components are:
 
 *   [`InterpreterCompiler`] despite the inherited naming of "compiler", all
     `InterpreterCompiler` really does is the following:
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 8b4e07995afffa..c287cca0c54ba1 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -282,8 +282,8 @@ class LayoutAssignment : public HloPassInterface {
   // the case that no particular layout is requested.
   //
   // channel_constraints is both an input and output. Any sends or recvs that
-  // are present in channel_constraints will be layed out as constrained. Any
-  // unconstrained sends or recvs will be layed out as locally optimal and their
+  // are present in channel_constraints will be laid out as constrained. Any
+  // unconstrained sends or recvs will be laid out as locally optimal and their
   // layout will be added as a constraint to channel_constraints.
   //
   // If channel_constraints is nullptr, no kSend or kRecvs must be contained
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
index e2c07e38271df8..688cceff0cd10d 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
@@ -75,7 +75,7 @@ StatusOr<bool> ReducePrecisionInsertion::insert_after(
     return false;
   }
 
-  // Check that we haven't already inserted an equivalant reduce-precision
+  // Check that we haven't already inserted an equivalent reduce-precision
   // operation after this instruction.  (The zero-user case occurs when this is
   // the root instruction.)
   if (instruction->user_count() > 0) {
diff --git a/tensorflow/compiler/xla/service/source_map_util.h b/tensorflow/compiler/xla/service/source_map_util.h
index a776d745f4e56c..18e2651abb1600 100644
--- a/tensorflow/compiler/xla/service/source_map_util.h
+++ b/tensorflow/compiler/xla/service/source_map_util.h
@@ -23,7 +23,7 @@ limitations under the License.
 namespace xla {
 namespace source_map_util {
 
-// Creates an INVALID_ARUGMENT status with the given format string.
+// Creates an INVALID_ARGUMENT status with the given format string.
 //
 // Also, attempts to extract the OpMetadata for parameter_number on executable
 // and append it to the status message for source mapping to user code.
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 73e014805f563b..6f5765849ad314 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -234,7 +234,7 @@ class ShapeUtil {
   }
 
   // Returns the higher-precision element type if a and b are both floating
-  // point types; otherwise, checks that that they have the same element type
+  // point types; otherwise, checks that they have the same element type
   // and returns it.
   static PrimitiveType HigherPrecisionElementType(const Shape& a,
                                                   const Shape& b) {
diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index efa5aed2d1af8e..0fd846cef8095a 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -61,7 +61,7 @@ using TypesF16F32F64CF64 = ::testing::Types<Eigen::half, float>;
 #endif
 
 // Check that we can safely pass an input tuple's elements to a dot operation.
-TEST_F(DotOperationTest, DotOfInputTupleElem) {
+XLA_TEST_F(DotOperationTest, DotOfInputTupleElem) {
   XlaBuilder builder(TestName());
 
   XlaOp param;
@@ -798,7 +798,7 @@ XLA_TYPED_TEST(DotOperationTest_F16F32F64,
       this->error_spec_);
 }
 
-TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) {
+XLA_TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(new Array2D<float>(
       {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}}));
   std::unique_ptr<Array2D<float>> constant_rhs_array(
@@ -826,7 +826,7 @@ TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) {
   ComputeAndCompareR2<float>(&builder, expected, {}, error_spec_);
 }
 
-TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) {
+XLA_TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(new Array2D<float>(
       {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}}));
   std::unique_ptr<Array2D<float>> constant_rhs_array(
@@ -855,7 +855,7 @@ TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) {
 }
 
 // TODO (b/69062148) Enable when Dot implements general contracting dimensions.
-TEST_F(DotOperationTest,
+XLA_TEST_F(DotOperationTest,
        DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER(
            DotOfGatherOptimizationWithConstRHSReverseMM)))) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(
@@ -886,7 +886,7 @@ TEST_F(DotOperationTest,
 }
 
 // TODO (b/69062148) Enable when Dot implements general contracting dimensions.
-TEST_F(DotOperationTest,
+XLA_TEST_F(DotOperationTest,
        DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER(
            DotOfGatherOptimizationWithConstLHSReverseMM)))) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(
@@ -917,7 +917,7 @@ TEST_F(DotOperationTest,
 }
 
 // TODO (b/69062148) Enable when Dot implements general contracting dimensions.
-TEST_F(DotOperationTest,
+XLA_TEST_F(DotOperationTest,
        DISABLED_ON_CPU(DISABLED_ON_GPU(
            DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(
@@ -953,7 +953,7 @@ TEST_F(DotOperationTest,
 }
 
 // TODO (b/69062148) Enable when Dot implements general contracting dimensions.
-TEST_F(DotOperationTest,
+XLA_TEST_F(DotOperationTest,
        DISABLED_ON_CPU(DISABLED_ON_GPU(
            DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(
@@ -989,7 +989,7 @@ TEST_F(DotOperationTest,
 }
 
 // TODO (b/69062148) Enable when Dot implements general contracting dimensions.
-TEST_F(DotOperationTest,
+XLA_TEST_F(DotOperationTest,
        DISABLED_ON_CPU(DISABLED_ON_GPU(
            DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(new Array2D<float>(
@@ -1017,7 +1017,7 @@ TEST_F(DotOperationTest,
 }
 
 // TODO (b/69062148) Enable when Dot implements general contracting dimensions.
-TEST_F(DotOperationTest,
+XLA_TEST_F(DotOperationTest,
        DISABLED_ON_CPU(DISABLED_ON_GPU(
            DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) {
   std::unique_ptr<Array2D<float>> constant_lhs_array(new Array2D<float>(
diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc
index 098443824e4ed6..41189231b90e84 100644
--- a/tensorflow/compiler/xla/tests/tuple_test.cc
+++ b/tensorflow/compiler/xla/tests/tuple_test.cc
@@ -514,7 +514,7 @@ XLA_TEST_F(TupleTest, ComplexTuples) {
 class TupleHloTest : public HloTestBase {};
 
 // Disabled on the interpreter because bitcast doesn't exist on the interpreter.
-TEST_F(TupleHloTest, DISABLED_ON_INTERPRETER(BitcastAfterGTE)) {
+XLA_TEST_F(TupleHloTest, DISABLED_ON_INTERPRETER(BitcastAfterGTE)) {
   const char* testcase = R"(
     HloModule m
 
diff --git a/tensorflow/compiler/xla/xlalogo.png b/tensorflow/compiler/xla/xlalogo.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a0a295953d0c47b23718197dcbab1677b337455
GIT binary patch
literal 46785
zcmY&<1yEaE^k?wk?hd7RvEuGf+$j|I;_hxm1I68?xVux_t+>0p+lFudyE{8fhRMrJ
zUf#X;ob!`&!W88tkr4<G00028w3L`K001!neyIat!B>d-)m6d&pv{EkgaLqR5aNpg
z4EQ~nk(9C=0O0W%0PqU}0G_~?{0;yBXJ!E4NFM;;Nd^FL?b2J6_`n~)8p=qD0p36U
zvfGQ|z*peyq`o@>06!!?UJzLEqTsIvz&T0FiNkF{VgunB%+w9<z~2av786!=TRO>b
zb^9?_f45$*kn$y}g+V*DddDWPTR;4e@di3gaheRx!-IS%DJ>|?VMfZq*1<7u5)<Pa
zS$pWDEK55f6ztINR>q;9GxE#F&)cKGW&`QOLgLp<GoeI@Jn!-J#pAN}`R6Yp=*(<5
zp?^x^rKkemh}%<s8GQS4lYJG1CS(G$85H*^(-Y-XCnK0dX)Ogk3qCZ(gPmWWgM&cR
z&M(LW+e7m)5YMd%G^sS=r+YB9gAX?~OiiOzt%LDLv$3Og5-PQ4TQLW25+@>04>;@O
zd;pjJIz*}Wy~VI0j|PcJToRIun9!E*jmGAEnnyLbxt&SrKxsFjL6e9B{VB|A9u<iQ
z+rY-e?T8M5SIiZ!<PC`_Q>B5Dxgt8Cy*!I6_!1X_OPPRgW}e+X*$ci`aWIJ=a<-Wc
zoDbTSoOYeZ?tG`~!ByV8T-reHg*|VlR2urPH_o&7#rdi&%b^BAozOdELInPjHWo5y
zkB%4TmzyPpIuaAS`Fq-7VdyHFo31M$n~?Uew6D@RKksI#g;>SB#ol&P*<5}+`uZ-{
z)SL9nS}oL7eB8Vj>*jhMCxTKssC<gfc~0B1GE(~=>N)pY?~=lom(TXuxs&^!JEh=1
zxmt<NN8oO!nXQFpXDzFwh=Ayr>iV}H6j{SaB`Lb$8f~9{wr_iNNUFu+1VCmEW&KbV
z17<<grPFO{v^$<4=`+h6)-TU05WOZmFcBQXohbV8H@7p3+h=24fh0>xs;V7{UlQ4`
z?PeZ465<(WJm$Ci9)#QKeLkbR)7X+h1_4Fy)yOw|flTGHs_!?xQ%T=&J&fa`byNbM
zQ3QYcw42`6a5ucxf7?R<;HhGvZ4{IO0{m68{MKNh^VWt);Qc7FX%SKlkq<+C?jKg$
zQ_n$CpPv!=>uCADbq5$}GoSz-oE&TK^xFAx*}u6f`F#(H`t&CSTCn=UZQ+gC-NXR^
z0Kx$Jl;-N-B`4fSX7)#pexZ6py#|vk=1*Z{g`FPrTxj|Fj=BGt)v(O}v1b$l86zB2
zhnA1+5#ug~MyBE?47s_8)V@JT`Y4JHKqn{_S@G>1_7!x#!|0XID=e#grwvaESdC8;
zJ2?3fN@}yXqIJ$iioje7$VS_2(jYCQ)O(?p84yhf_e<k{3gF3&(%7Eq)D5bifLP|l
z^-ztI&}N7hrkFI4q|u-XkuUvyB}>{`ugSw!LKONdh0%7ELfA~=2`2lT5u#`^aEJ{e
zO59sa*Xe%X38lqz_p;7=Qsx*|1q&0V`D)3~J}4g=hhZAT5s8hy2Q|0ZP9;(wn~O-&
z5>?{)ueNbm9fF2n19d|5Wuw%7!2<oI(>J@kx?(af6$?WoyEBh?{lu-bps%44g;mnQ
zAO;|otVByC>sD%|hL%l*$4f%>DYVVmC>uz2vi;yc_RoZx(LLYcf|L-=HX7GMQ^hSN
z0D1rkUHAOxuR^1Biq1Y4>4i-54A(E{M5hj+bB`-s(-E2*?qNYK08lL>)YbP==ix{?
zh;_x)Dx%Nc&jQkM|8Xs0=!a&vS5Sw{WmMuAiJGa~7tc>2k35sl;eNXXyifpvC?)Kj
zT|F*Jdp=_OrUT;CVUnUjLHEuh@YDabCQRm?@J%*I$2HZhDzZ+9G1^{{JkL%<uB^Gy
z?4t;Rd=OEZt@FZ2daZ(i5;aGRPoinlR<e8d952wbUiZ-t-2Ooj?*MvUCpQ%%hQnz~
z;Ku-D4ITJKGSAZUIiCdh#ehn+j?+s88(+#T0=<co*S^+%#G}y;KUd+eiw7OwuCdkS
z{eRycJN)83<>96D$M2k-#wk0Esn}ExuH$`GNjE?Yy)WBLkg?IdNqN><qB)Z6<bRFq
zk(?@er+nYM`-BtS-8F~5dzK=pq@X8Y@UY2(Oh5!IJ021fY|t~8|EiDa@>t10y!SDq
z++q=Rem%Y7UMjM&s8<9;%ZBzuKU2FxnUEM*#t?gz$K8t@zGwKeWj<mu{&)_GXF&8S
zxr)?I&m;TciT}3EBy%~rDx_fh@e6L~5DuC!|N0`K5NVup?LXaDv!cags6|5&`VCY-
zJ`nX!#s&Gy?ff~31wcFB$wlxaz2Age*RC&Jd`HSJOw=eEBz13J!uWKNROt)Xcz^e$
zEcWWfTP~ce=cb#4=e=7S4e#V`Y3pYNR}oB9tzh(SRz*)EsibyQT`;oTWeVPj`GA?8
zcR}@7LnS<m#`!UYDge9z5VZVB{3XzeoO%29{*i~(MjCzyf}~AWiS9}!bG~quTpa8P
z{PMA!SA~w?6I4x+(bGarit1xyei7#!?HFQ<)kzI3Uav;CWv_FPycWU8&F+AClq?1`
zVb8JgTs%_hip&t*Am3nlsBY&n;)IfRQKxFxkviGFPktR}rWh=XHXYX3FpUZfhlGKd
zCBp^;`_G~TEQtW;-3Rah04xX$$V<_!e{rOpCW{_vEe57f2>fNVd_rq`$-?9pc_S6M
zq_?M<KNxpC?^D=O;^JIZzOnP`ti`ziAs$48RXMzfvL0`(US8Sj5)Muln4tgp9BI8+
zUH3OHb{!C&<^TZ+$`lM#NZ|bs0671edZr;yx%pR34+(s9>kl}*!`>&l{V}Jn2WO}<
z9Zh>Bq&r;&E@tyc$T`PR^F5>rNZt+o_VOe$r&Q=UF8YMls2bNt4FzK1p;n@RMvnw{
zf)@kKEWvB^bca7W?N8|O)31(}4AGUSH`|Z6S*YFyb{8|?RUALCjroCP%+IBx&24J}
zg3f;isww~#%z&tUQfdG(EZcI&e|4Ev56<c{e0d!X!`x=!(o{^>BYtQP)7<;hu20V>
ze||fR#!+m1<5lN#p{}>mJmt3t)lP?k&coe=TIa=a^??FPfD0qbR5?x4$-(x4@3RY$
z66NmmfDw&6fG%Gwc-zh9JQnbPB(a_s^cO`5dTj5??F2f=sp8ihmxlLk!1y6lJF%3z
z)w^@}3)^7|xGY4k-#8iF?#W@3N`Dg6qs-XuFIb=y`MM~HkKyLBHU(|{l{e8OsCAwo
zAa`!FfB8M?-kbmX>qJ(P*xf?<t%43GmtrGdMoWd(aPNS&`lpTlU~=*<v0&%%3AO-$
z5t<!E(4Z`5+6_YZbH73*=wc6lVBD*se#)sQw&c%=eYmvIG~0j=x!q05c@7gY3K34i
z_+rQF;FA;VxZ9nAW}OSR%pv={G$57qB3PlP&1gMTf%uc%;GbZUDFKU!zSz%@#2(2W
z7OeIXs>3t)`vhbF1JKei`VWhNF3ceRG3Yx*J7~Y`aV?_)$HlRErwQN3a>U7`o9Uh>
ztVdJj$_nr~;3wqV*cnh)A8#m&5FNdJzJ&{$jE{3cx^=U*XFpEIoJFf>hShsJhIlyl
z%`KlaT7nZ5$u0ty0G)Ys+lJFpk<pyjjG5X7he<wj`XY3pWqf`RCK8B|u>=zM?EYp@
z{x8(5eg(5*w-?IXI_x9;$J53U5P1dzGHywpZYDQsisOrpG111cD2R?-PW|><TmTi0
z(In}CPYs|=Pg3Nqqj&lxz#>k>cCsvNGTXeb^5^R};I*=KfQ?0fvd6SK3gBf_icjId
z@?Q52ZxWB4+<g;uozv~_)j3na*`Eqx6<Lh929p__2qVXRo-&eyN&(J{ClDQNl6<6B
z23nPzDw0)iN$>zAaRtSdmUDn2Akn%wtr>P*_l>}&4aPgHTLfgqd6G)d=5~bdv|p=0
z%rnu@p+vhY0P~(FZcHjop()ZOb2}0MumD)7kg;avBsDPoO(uhcCO?_y<}$zKzqv`h
z{j+QnW~5)$IVqZ!JDME73|{(`F#-jC(I1(zMB22wf1Yb6>ga9eqjB4p>qVykdc_#n
zw^{x5u47&M?^;faMtnE}`jWizPXl`Ns9(!RD3!<44|uE7%hRDy!8_jThJ2uN+MWp#
zNI*xvI3(rfa3+`{|3<-BnC@7ZZ1<Upi`B2gnYiho0Qcus!|NuQ<F44g3;95Qxa9do
zVI7p+vescYCCj#-%w<X-R@?qI_=GWAS95t_!8naxZe5E8Qmadw^@*(!?V<s4l_?Iy
zjlhf}Wa=46J|&qM=K}3b^s^#S!}~ap(2&<-IhL8v39@hy5tHPj*xUeNoL;22a*enC
zexp@L|JB~Vig&hT&&#)1FAD6!#R97Sh_)@`LrQwXvz{su0U`J;yPFi}5x<|`vd@?;
z-LV_&9T=Y?Jtixia_M(D;>XJ)wIUw)NOavTbRQ$+^)R9O&QolQ=vUbK<ne<OJxh-4
zLpl`%Qkms|q@K3U`Uw{<=;Tab%DB}<e{+d?!BL0T&&pn2A2xSJAgc2{A5vpQ@c=!C
z5l6B-J{u<D#<fUgU;Uf=X#NV32|9p6nc{>ia|8@ACrobyh4B`S7{SjDJSzLDI*7Xd
z7PbaO;&Ul`Bqxi#FUw*tdoVZqAG4TL8lX9sE0C{<xoQe4kzb+g)#0_dGo44F5JH9Q
z*!Z68INSVi=5Ee=brOJL5sK!vTT^O3lPGsx$UVL*yeEA=ePSe$5Kjhc1%i>vz+31)
z@JZL#T;p*O2N;k}=wj3gIvlihO8$G(;53z&<}FMn`&=hMK@epKKIESwWoN=dE^mwD
z9Y42*Po6GShb<AB=B~!+@L~Zb0e>&&&CUi4_Rosn2Hvs%a+{elMDu!hUhEuO_GA$d
zJXw`*r}LAPc2$c2Q~{sSe~SnOHvXo8AZr#C0zu_yO*o$d(6_GB()UrHdVagW1zHgl
zY!I@GSF;)Xmr^0fg=A1Q*5l-}aAF-6^G&0A(MqDQ#(fVs+-fZ0Vi?Ea86@OJ<{p-B
z3BJ5%1t8@~D%ou*4TK**KPApdxo^6?s0>78vcoF!n<HgID#GHef`G=#Yjor2&_Uim
zcuTuGj*i%LD@jkzyCM(S5W%H%*biQ&mM&Mx>hK3#!zjL_8LJ+MwQ-j|lyIsZD)VGX
zct=2UTw>;B-|i$l`l#qVvV&Ornj;ARGUY&b-hWI~)Y;5zS`Q4VC%s0_RRK7DLAbS&
z4xR#ZQ9)91aJ7=zCLRe^AY#t@U8%vCTlOj1^*P$R2>79o!<{Ua9f+!=AXhXI&P<w_
zRd#7zqr<!L-6He|%E_0pXD+KR&$qZwe0^d$%O;|4(|G+IwcxjzLS2RPY!O-OeKDJ7
zg=HwD#?sU^keqv?{6FA?B3tQD@v=#krp;;HPAZCzn&C(LIT&90j6zLqVWAnN#kvo~
z99gNsI&jr-y?<Ll%)L)!F|dJX-Mf$KTjLSs-@y)&d&*KTEv>fRoq5>2@8GulW8lTB
z_3K`c6UTHx3~#t)&&?tn3)&bKSWKq+w|(xKrS*P66MO58kmT((?p)V!!D3d{1f&v~
z_B#IYm{I_zF8`&llu=KL(a;@U%h4P*yOyL@7aD?WIQEBL(mq#pV!ybGRHyxPa8aRG
zru~H(4J1k$@~R4Om(E%9?P_0+;6Zs&_W0t7WmiDVOG1t0p}2%TsW~gTBkt<kcD~W7
zP9n2h<fT!xfO_|Q#1d9z52nmj?lYsX1CRQC&31%7z0)Wlms7yX7SR|dVr+}TW?$!{
z{V9Lk(|!Xjf(_TBYD;OOnRG@xE?hptLnPK90TkCoAq&9P$Vywh%F0%JpsbG4N59tj
zaVV5eo!f04_HP<E&;7%q8ulsosqfpmV9C4ROS`o_J7~{eJH61I9Acq9bt}!mCf1F2
zLBieVszv+bi#?G3pml4hkZr5-9Q6K2lOg&v<M`r-t->7K?;)mnDfg572m(n*gbm#V
z3vzwq2*)-^2sE-!u4pf8{)$nJHt4HcMBx)QReGtnIf3&$6y2t#-<DYV78Wao63KfA
zbuu1?{xQ}b5$y*;&_<A9G*3r^X&E*<|8YC*siL>v9*7L>k%7dw7=HhCKIlCRyx6>0
zFm)&9yDFlAMXJ(|XI|z6k`cZ^<>@*H-8=ZmNCjEduju}f0|+Y65ags9_5Zjvd~F`J
zXJDUsnA<J>yEne89T*o+?QnIqJ$6tcUAHk`2N<F&Mjk9dD7c&sa_<*+P@-A4Ag%L?
z*Lgb`mhb)+`#|Cv^BpXddt4f!pSyqfbjm~g&~E<q0m0A#FMp*fU_EqibtfcsOjKca
z?Dt82bBCJ8;wZWeyE=!nu$^rrQ}nRi3x=dM*rsXIXWV1=>dr`kBd3l2RPn12D$v}Y
z{-O^KwYS?W@3c6qI?*zskEPdM=o<IH%y@cVP0D@N_^Bv5DV+u2K8Ys;kzIh#PY<KK
z7Vq+qnmdjy*lxId(#YWj0re+0HM5WgqTji%u5XKLfz_W)0fERc6*s6LT^suN5AU_h
zJj$K%JPH|@?kGrRZ`B@n2n5Us@RKtw(?e(SpL;tU|6@cXB;K#dju&q(mjqCZ7RO_J
z-e`t#5oC!((SLS?oHl~1q!ao(0mqPLqN}Rr=zXzG{dtP#qY%ONW6#`_(6v3Zzjrg8
zwR>JdE<b4n$E)?Z=@l|7uD6mf4WMq7-<FG!jF5yxIOfkF=$R8oYdsm(V~DR`zZ2Zc
zj7RoeKHOQm)zwm7O^IzSX$^3<Cu0{NtdMbkg8nr1Hx(Y28ybKib%!b92;uuz8(kTl
zj2ul5qc*eUsW9&!?<}%W371*866;>_KQM&iKm3}j_w?g}Y{bh~bNF9whMmiA7q_ba
z((>hF4xB)++C+olChhaB!)kayLmmiw{p4J*aLN7)O(8{kJNj=7r2&7Ka}$t&;UHtz
zo!U-wCMX;rB9K|F-{ReOm6uscaBwVS^>M0b@Ew1sl$Kr|gfiu6le!<oG&j7?{fdwK
zH|Rn!_>K2My;t_GtWK21&xliq2E&%`O6V4!G{Kwx9Pe624m2S)(SmaGIfOud!ni}1
zaaUNpIkN>3@+>$L3z7ppyugluRPio$LHME(GUuo5DoHUUS+1sKMs4A3P+w*3?>&6v
z1eI}am6JG@zIt$skP;Gzy=XKB<9z=IC!&lq`q}E{qbTWIOj_?ORuJ3Rqhuf98jMd3
z<hipf30#-5w2NsooGn!|3TU=6r+6q7#8cxMp^vWtjcX+dZA5LDa{Pr~BTwmJkb1SX
z5ygb0_QN%!9OpE?9Ha|W<w4QTXz%NXU$PW#aFIb*@SeXqdHb$+t0`B|1XHo=2i0wd
zc`n9HqjWKjE8P+tbX!&%iL$5SL>|t=1qsa~cXoj|%d+Nn7BpZ}UJJhV?O5$XD)(G0
ztg%4={bDtdZz?Ve>+&ebKs=t+m%6!+9!6px2w;IaPyPk0Zmcxr;Z|0jmo@Rlwlzrb
z4B!S54x3LE1f~)bUe2zV9DwAnZS13s`m)WJFEWD_)FhK~Y<DZ~A4|)txdhT>xB7hF
z+1yrz-Up8Jy=?ttY{S(y&bU$N5U^zR*=8_=0Co&y8Cb<Xx+Y)13apP3G?sX_iPDOu
zd#@nVs>D*MR8e8LW8ngCUMA^NP*;jsmZ%;+k){?^hAVC^)s#<coJ-kFRb4Om(Q^(+
z2eRfFL`0X1BZnI45#Ej;Fb^BnG7FrQeQ3x`hF~PR+85~a#TEh=DPs~`>Y$dh?z8Je
z0S1B-dP_gOo62{iFBoxAv+JpJ4-Dj#{zlS$(EH1y#4$II=evVYa6bMg<*rJLWq9IJ
z#VgHgiV>US02fI!UR@%*BYFPhyLn@o-r>96Hm?`fXKLta2KVpgw3u5k;ptmcykx?3
zcz4Clmj`n3p{E!RI0!#tvN?rcqh=31|2_7q1l1)}+Ns%iv(F(9;VX002cP3-D{0Qr
z0IIfo!g#xwUknz<n<Xlf_sbg>5bwJ;TS``NNfz(tc=Y3&r$;oDzPW-FtigG=5;(az
z5?_iTS|m}xSGuE)<kEfI;2>bQkmhe)3QYu(=7okZAmtp-Fx7Lu9IBi}xogZb>5o4@
z!tylD>aGUk*7X(?2461|wNh(6BI63$-B#;h;6VEmGx6h=;CCMNoKO|KIzaYMbeg65
zg=&MPwK|YA1yWsSux(wXb=cy#8u^|ScMv9{B|SBm#8Gdg*4*>U9q66^A3kA`L=wl7
zaezjZlJjRPo%U{!x4-#=I*Rzo^Q!!al_DZexSCa-APOlUMqQW4JY6CWvXuQ2Pq~4p
z<Nxd<X1)Hsentg+GDqvJ_1C=C|D=otV>5sl7O?|$z>F1jdH7I28&rLY4l!a>1MAT0
z5|moHo3Q@05u$0U!26pdu2GhSXN?6+VcwjHA2uQ;G$!g!WdcU09GW7*nqirzzr6c2
z7#uWmg_6zE+bx-)h|W3Hc^@kD3ZCI0sAoEUS?#c1hCq&Xv``$bnI7M-k&Ie&94W1+
zgTnKGKky#YF5CfR(?%k6^1bLpL?ZgTcW_7Dsort**_^O*wS3Qm<T=E$w`X-1367^u
zkl6z?>F|zw-=O?lyeQ#pb=g_cf-is%cVV0sHTXIX0r*hqtPXn6eV=r8^heql7<S9H
zI6d)rbI|(%x~-63nd7mHmcRMS_IN_D7S~QIk~2NdQG1}c?ex_an-#}%MieF^-SO;;
z=P6m8{s^`Hqxx!*a^tL~{DrP`)*m=!Oj;m)QF|Tz*}Tsxi(mRz*_FbQHQG-&3U(%S
zQg4BltY_l4?XJMTa>qRKVas<(J)_+Y>Ad21v7Z8{f{Ltm0gXaba|&qeLE(6wbOHz(
zciO~PwO*#ZA(EMI-$3@~GI9r5H3?}HBP4+z0*L_mMITH={JHhg#TkC|bv0V8t_>+k
zuzyJAt(!%mWC1aNOtp(ejk&Q~>J2e`sqr?t(ugJ@)#cmg>oGc7R0i?IadV{<@=XG$
z;tO?1DuC3$>5@{l-zFlqhuEXIy|>2pb=t{3uJ2on3t=E1)oC)t0QkmV3HtWNo$<c1
z&d9SyR;o#JS2CQJgrRZ#&Nsz)`P;K?@EpK<E-+=I7f%YtzRN!Wu|a#S?{SsyO0Pb8
z?{G!j;|ap1^^qVF=zGx^49E~PnteilesZ7`DtbN$Eo1Cr`i>qw8wl-;jIE3$@UTEn
zh|##lhZ!ge7W|j6#Jg;|?YS(|c@-Yd635On&bTySQtBOEM^9$!^L#$T(<5u*cd@R#
zFH^qRrk%YLvpv`EdN^?7kXd&P3FL3TY4cPZX?^pP6u=X@dvAsVxb9p2J)6&@y#1R;
z(|kEzot*WYweBUy@7L#=J32FBUuoaiJMreLs1%Z<)h=?TiOCDhSrn=Y=dXt-ZTgv*
zSV*^s&TDfvvYAbOY?-H`1J3pQT<T(Z3)9Ug(H37xE%Eds5-nIHP+*05aKzynuj3Fw
zl00bm_Sfl%y3N^p#!z}^vB;4wXDk$cdnC<dyhK)Pi@xWl2F6)ztXyWKy_L46P<6X+
z>(w>F-1olEr!0E`wN5T5;|!NBej)VdRpr@sC-}Jh*G1gbvm5vw7&i*UsL(<%G2|jz
zz~XIql;&%$L)4{_d-o#G@@hV4C<%lrB9UFb5Hxq5cnUW(0L8Rw_smul*qnJ@Xa}1!
z_aEkr1EahW-$|X;64;;;hSqW@ep*<XpZF!^9;CjPkdA{sEz;dHj@#Fo`4-sr9I1eQ
z_=fH}K^g5^<QUu9yLP+Z>g8*!KG=4By>_|3cs_)#cW!~i>)l*=AAXk^g&~ZQQ{cK?
zt<<jsK|3U4Y4u^0gw^G8rD$a_I#mxKl(%OZP{gn{t7S#Dw+ZyVF6XsGtX>1W0i&NL
zTiK!X$|0EQ&-4K$0aOEHY=VH--19ceoD?4lG)Yelvz$;4afn4)t|hUBrDl}(1iYV_
zdtA71$uf%)j#<opU(ta=!C*^a;4EJaF0<#@j6}qci#ZaB?2!Qb4kKOiEzQ*z<@||l
z?fTy3>2Y~og#qf^{RCxdvQaug%;-Ewr-vCpPIZJDUen~nAxd1`^o{q`b^=W(;O?n*
z#rAxj>IS7#)^OcF#r&LUODbd=U~C0dw3GvE?#yGP#ez^GOvHWNGxC0K+XyWZ_^xzo
z>~01FhC-Vg3*e<j_Cw*5^32^XcMVNJWxOw&`V=4yEy(7CW@@V^Iij)l#h29@1?&t8
z<t;gX9#^nSobDF3<@=)Gxd-|rECnwj=266G2MbMqv>krEO~KsS(>c(9{rpQXZ*UoL
zJhPt+A5V(M?-1aE7LFqk%mENyy97}HEP}V-MytLES`6h?uKF$~zNMJ;O*QA#e#l`$
zNb4koVhe2*Q0wb;<ZByXSBy2u#Q$e>>~|wFPt0sKyQf;;++a%$Y1;Pf;ZZ<9;PPV+
zoQ=q}KB|S?{d(ILkx_3uP90{E{e~YMc``Y$RWN2XZ)rsIWLsGLVgWOR*5Eq^1jnWZ
zu&_Aru!;0t1jL!O-&e#^65utCJ1#|}5CKBt7d0H_9_R&jjS<in{O66aMD7~w1<tZg
z5JaJW-@d3eZFNaMudX1rO5?g~?z~{JB*CUb4_KsBKzFBblnNv>3*x#t!@)>R`l$v%
zi*F$+VgLuB348*GBa&0)T}<{s0PbOFcD76Y@$4U2CUvq~7gs{zj}UVHQaKsFkiGb+
zLG4p#ZD|R1S((X9zT0x{)<{5fb&eoy)K?IXMm|}edUcC7D$mM7k(rMIjrk?nOfvaX
z#RiKZJ8IJwaS>zZTL5OVA*alcR>J;46hH`_F7=<WE^Ye+1*MV0etb54IVrnh5nnrd
zR2K6j-JdxgsX0ZUEQ79Gr&xdKqjxjylAv%b*Kg-&E1mjR5E~cT{WEgC@F)CtvV(>d
zgMq-NEJiaAV+tE%=!c_ff3Q=?+KIeCJYZidkzoCBc^8$fM{S-;YN6VDr+Fs@2;g~6
zrOO!c_2_F@JS7wW`Nub^V;0&e0kU}r`d%b8w)PX$pw4AJzZmRcdX1X6i%S9E$%S@s
z(oYV_!hrQq%jVkosTz%pRLaBGKEgGB(>b%^vmSaI|B$oS#W_(nN+u3vmO8LzOc?mZ
zAp9h1^Fo2!lN3)`hdY%Rj$vjDV<w!uaKXwyAt^OD*5`%KGR)-~4!a6O1E3$#L#8)9
zH{)t}zvSoFlPhtWAZ@Tf1L|Cz4Kqq0ZQD#=W=NaN6n{|<*W(!(|9g<P9O&MxATBj&
zb~(MEa=Ca!T?=@K9`l_2`jnExMw!8Hwi^T;11YSdKGEHK3^aKnIY%BbHI@zeV*Q|h
ze&?t|dC-Cq_|t&Lq<y4D@`XS}N6cp<QRW%-b%{Sz<zDFf9)e06RPVW441R)jt)0&D
z39?T3)%B+-#b!BNj5FOlNY^c2>R-@%Rv*#%hViZNL>`7{px=P$Jx#&mP(b+};{iys
zyp{eH=u(~)IAWClkZ{G*<lP0B4uR&y5Ki_Sg@+fKgo{xRi^sfNYKOvt2=d}H+H`sz
z#+DS+U|*Q`*K}+Ho0AkEB)EaQ3VBs-l!rfR<H(zXoln6JX-cMxA|XJI*w!qL@cm<?
z^vlsKizz}-UV1(;l7!m1htGN-XC4RtP4p3e2ELCeo)}HiaW^bEBk<|Z%%WiBw4*pz
zVjnXDJgN5Nnt|kZ6`+q!FzwG+`(a1R&5VoENb{J9rv>RhD!))Po-kCVQJAV{wEQ8-
zR+d6Mqct10+2vZv$^Y&-EP`A&U?tgk7eNRVUy*MoA5mAe)TdDzkkX(2wtJaBJBmc+
z%4slrvc*z}aQ{H11@N);<gl5=C=OF@fN)G*D`WFZEm!8J;G6<-gJTnwxOOl0KoAsU
z`Htdc{mn=8xK8q<3se4ehvmn8BPU<k8eE-or|d7rBuV$^OD!|c(6^$$Dt-S8O_{{b
z3i@*#AQEJx=bfC`{(u@ESSQL#+3<$#bDq~(I7tJN^w)0xiAk=r#m^*%hNR>Fo{o1c
zCp=*i6|#%7`LGxy@f__E4hRr{(EcObPix6J#GhLl8Sck-RMyw;8Mb%KPR^ac?-8_W
z+5+B<7lnLC{&^qT+n@>8=NPooQw%+MM?b4Zm=N31N79@nsm+&A8&Nq0jUgG{*&8x-
zz8A#IcJI~XRNm*592QE(m9$9ZpiR>#Cq-m9kmV=3w0gfO^2Jq4)%-vJ=uKjx2>mO+
z-`DxBFIlU>N|>|X)6@@txQwu`UhBCZd)-jUJj2?DPgM_;H(Del6nng&zqB+ooCkD6
zpc@ECn|)b?DXv3f{)J}P?{?<xI30bu`7^$zt~VAXvCW~g?Ws~QpN@^Ku!4}=^DCux
zavodK4|`wOAgAE*9Qu8SY{o6_Ly4vMK|Bm$i!RALkb_u7TXz+ERa{VoCe{7gIcm=-
zoGvcB-QIeoE8kljDpKl%l~wX@mw0YZq-bz&22fqi!0wdriaKFx4AJ_SOwduAHUt{3
zOOeA8>2x5dLI$p$mt*Ts^T;G=hJ>o&A|@2@-gXo8dUwoazMZOc%6=A(&^3g!S4_j$
z2!!iGgQqEymE1{BMk~0zJAJ>Dk)UAA;o_iR9$1NzMTZW>#^Vs)B`@yLcEu@K#^MGm
ziLayF^Q0}~IRbbo9U`cf;nK#HZ24LEvHv){Y~KRLj#Y2>rE}kuS3nAg5b%-imG+!c
zU=QL2<C|&mWGK7o0*Czyo9|YhuCFZ3bG0qr=$d}Kmc2XybP>y7j-yug!J0OX3?WAJ
zDbD6E`*`%GahCW5m!S3rHAG>dg@Gu?qD2iIuj!*D{|=P8(O)0dnk<E92Gvq4kJ3KZ
zh9pU%;4f9a1?s<~GUf5>;~s%W+e?-!#2-fgD;rL^PG7;N)0k27m6k*%w+CER3;gF>
zEjl)^(m+SPu7(&CNwLA}2>Yx}nMvPMO{{TIS&jTo0u8pj)cpJ^dX$g!!FmMj#h*xX
zKZVwth1o-~P@1rt_`1Aw_W|t#XPt~<E9+iw>P}1j`ik{<wA@lBkpTJ!a1>y&Mq%ds
zfo^pHQiiz-URmnaUDmT;Z=OU6HN;7O`}Laq^IEh57`m_Sa4>sV&_EzSj)eH;elqf>
zb!#5J1V>`HbQ*;;G_^E?zVo#kxth5BREmJ4Et{`Lz-*Xjp!@mknOaX%`(C6Gty30J
z34;`#zLm4kjB)aFczWITAnyBjjA@i1jO(SW#B-l5U4r@!M^a$ZNGZlyjWdf;fc}#j
z3InKGcmF;f69tgWL22~uU^+-31T22#0LeBm?Pj&|t<R--zXmI9?)NHuH~s+}y&X$4
zRWJdW3=5bwfJ2sV;~uJXzEI=o)fEYyUk^bb?a)eXH^?Nax9NYxT=BK~K!}G>kH;4{
z8wr{Be=KT6XNY<(e_E2$C|ckC_;Hm*2TIO)jteH>OPPJzkb-8h$1e1V%07lLRzxn1
zPyGG)SN8Zf(crcnjjRLyjZNI~{LGxK4&ih|ugD{vD5<ceN9zY!nKq-mlC>C$Z@AZA
z*}i`_LZR@d%3zvq&1?#F7rejtjCrWZwN>DI6;`>@sqsE*p9OXDBV8qOUKAF(bZMg7
z(I_8wZi{H=lc5$yKOT7FgMq*&V3|6l30%9&1jK(f6c3lF@6P0-FR1Np%x2W;KYLe}
zId4hrobW)>`fK>x{QTg#s+z69AS*6x@;;6ctss;X0z@d#U{5-Z(0p2WH9E?zMXCT?
zN}TU`-e&pVr5#&aKO0xtYhVFb!5ITEiQqj+X$SZuCB$DEOj)FGpXvbh+h1%6!U_%@
zFZlp7Ce9_9qh>l<gw)<gX{AB^@=<R5TBf5Zf3=$|t^eaV;@wh}vrx`1g=8@am_KWm
zJY7R8DX`cG?A1u<{hKnW$u6tgPEY9E$&GhO#!qiJ;%v*m7AF7|+Sftn6MY${EyQlX
z0@hHC=G3N1yIJqS>L+6)jsc}Gk3Mk=wM<9TD9it~gz4h8kUTAeeJ-&a_iFd4>Vz7b
zF~j)?NV2?4c5pzljM-0K9(;S88$QV6#oApnq~A4b@{H;-K4Cu_|5ZUOPIVf?fUftB
zk@b8)jOFmTF;msQwe1KXg)MC#4I?6<vf=59_?+6lpLHbVV!cI#=#$!(%c2GWDS(}*
z91#guK-@C?3wCGdT#&+>W6Dr%DVc7Zma_kATD`0DR!skAKHs%x3?}m@MtJfyhC*c@
zHf^G<terCZWQx<JoC4T~cGT73+Ln3DD}Ai9k+V}AQTy>K-=kbQluWMjL7Vj}(SGBF
z2Kv=xtBC<W3B}uAVA%s~ff6O_?*6!3PqMOEtvgjmGHG+`OQ1H&<Ss1;G&*aJW3n;V
zY?o8Akwa9%x@1XW$GBS^bfchcs34AZvb#tPwO?}d&GR~iz)?{#hJvQdx8GR7J63Di
z54W=#7$ab}h#C@fZu@05k0MzvJPejgvwJ3FqSMTL086|6aeFB$i$*ETdm9UDb77on
z*lw@v+X9{MoPnP*>6{t>Z5izHce>kWSPJtfu!>WSDNJ4)pTB;3rdDmKW^yuxPBy@)
zqOGA{C8Lq#B(pOw-%nJ5(70f-rnbaP%IZ8+fo~zX0oz5{)y1H}h_g4%bz#?XzSI`D
z2qRB}@K>WB=Z3#N@Y==hzwgKMrNa{{VqqfkqRnMWU8|O(oVGH&XN<*lf!0ve0@rZ5
z4R=G*j6I8l{^)YCh%AzY!d1twL_E~6b9yy$#{E(33=fSPX+nqn9bjh+Qn56)21+-<
z<u9sL!xzEB7_|f_kTdxj5z|do;etXxd*BB&8Mg0I5IkHVLV2&%`_+j%?K+27JztD-
z5QpzzRpxdgA8xHVRs%2sr}w27*sS*pCR*ieKsf-omrAkU%ryWTg9H6qhkuf#ve{e~
z3z3fm&xI1Oeb7?fc|f;u|ERCXh_vi@Pjz7tSwfa|9Xx|cJ=sGkHZd80{__vp8NMt8
z+E44=j8Njm=hVX07&Jo}^1E#+3FD|5KWvCk%mWEv=xrHl_~{}uq47Fk%#vDshD+;(
zGTia%Fh3sFf0>TUtsm*?haerGu#Z^|KI>}^ZgGZcm!#qijq-Gw4zGwn#m{obiWV35
z=enE@6k~>SozUICO=eTz4DL(noh+!X_NrQm=30mt!%wg%&DF)nBN-?yX0~wy=>(1=
zmiIC0-9?4h=+C*&gKshP;|4pGTm#VL+bSE{Bu08hU1=QlA<*0$0Wfs|8o~6ZRpP|c
z>N$c@kCGbr2@vit;i7CKVK^2oA5eq!*%{^@`DXjD-(taMcc3gV^GAKbqJ8uw)a~Q2
z_9wp}G%|{yf|X96C`<_S9*N(xvXBZ4d?XEme20yQNMy`S8I_N(oc0SXxC3*4-E#2{
z)7ci~V)2C_p#{ox)xD~suvR@S@(iT&Sur3|{5!k6A+Un=rI8Cwgm4eruwk&FpoP#C
zvHbBJP4Q4yFDLg~uwU`>mBH3V!JyCj8oQc$?%iUMy{DYqdvIXjx8z4LQb%1ILN>8M
zpnpvK#qKt0G^DW5YqZ>rx+pzQD(-KzI8VBKqs@mRp8?!;-#DN&VY1V4;8YHC?_fjS
zm0T{lC>Wm=fC4qf#q~|a+~iuwshHy(gc^c@R~k7joWE8e>m$n{@6fzT8izH4E$KD%
z&^P|tX8M}UQ7e4YIu1RW<ebzy$dO}_xbxgQ16P6U5`;k83g@><))otYYesnHMtK*f
zQIjZ0n_H{R>jI)6KkHmWeL{6cK9oZ{HFa<9OJ=<K<)z2C`#0*b!$rl(_I{nAs85v8
zz%FB&4TrZsTeFA7T&;Z(+EG%382G)ygF{_KSOXdU6o61QyhLPVX_`LA?|a7&(!jr{
zVcH5h-rGvY#77~qDZ?Z(QSi-HucO80*1fKK<LlFZiUnmbkY(=%zKTNgYUgNKwGvVm
z5GRigE3rk*kak9(o_RDELmEY=t0-v!7vieA(jmGtNwzjT!Pu<N^(9Vj{Ik3>!-M#x
zu{l_NE{CNZNSlt10huwF*HmiS&pN6>c@u+s?l6o+Oyj}8V3r5vp*msQSK~iami2kY
zb<Tg;Si@wzlyF=V$ip%2eIoyJu}Vumq)xO(Y`F#3t#8{^sVYXT<y@$O_R1J*Uz03f
z4dQhapXgU8KlLhLrEIF282Xy#fs`>768c3TS#?|5B2>>rEYR!UfV_OBh*Q%vFV!gx
z_vaHAPaO?vI_>2PkdeZHUimTS=O9hN@Aow+WrKW*sw01R*>Wx6;zuWgCRVgcveus`
z%U70a$t&f#jKn86#5q1sx+Xy6LtM1cgvzJrTCMh61?)1jh6J4@Utl)@&1|8;$+8zd
zPwIUAGxGl>Riz)K>f~5x_+}$mt4G${>W?Fl*F(<pV^8YRerxc9nwD(a#;|`YsKA7J
z)lkB0Bhrkz!!nf8n&@d5%55I;ovd66!S1fVLN1otF#4-7a{%45#$V0KTlaB6Y``==
zd;>AGKK5yQEQOz+USRnw#pzT9F*bzz?DvSM>tB1v^#))fm57E3?s#qGxoi{yytH-}
zk={P~vWwmHh#_{*P{pvLhz1pQhn+5U1L$0>w(j}u*p5`7#536U6jRV+Fx1u8ss)=q
z5LsKMsN^g<_9ODNvAz~kURS{T)Yd#OwGz_eJAA8KgZQ<CqFH(M^y47oN1NU5*u9Zt
z&4F1tZH?>$uNqj(OUepSsIRNDRh}hO5labsdRY2?C_!r+#qFTO8|`&T%7?DQyTEF9
zrP8_GyYA6K_V#C+DyIHU=G;tW2MMSil?6p_m=CK4#?PbNX<UR5bZccSp=4wSDqUKP
z$}SyGf3Z+Y06jjLopp09`Q^J67h|}{`ld1;#NH4~_CZT>T}ZGbPYJ2*Ew!zq<mZgx
z2oG5opBY8ahBUc8I{9ei^kXtQWP>y|)bhRd5_t-XQ7`?=pLL&%RJhhZ@f^!E&x1`O
zP4=rRTz0#>N3BLb?0>3M&t>nxXk46=Z5X4`V1Ub)VtjgC?`3|SC^i6s4P3smzQ4VS
z&U4SgIeuGGx{)0=DPf}o6Z<R%t+sX@HQ&rnN2}}YpLv%Nq?vNcSO&zLPZe23H9M}7
z_-v20#c5X;a63*sIoApYEzpb^db(><rHcz(U!EgIm;S;pOOT<q6Hp^SqFunpoQ+iw
z2l!Qi2>0Y`E88CO=jy>t*XAkc|J6*mxEh%?&PmW-_%|!}TkMXeLYL##$k7|Xb<T6#
zDOIgjGxK%+Qmh<Ki<4rl5oKm-E7I$fh0*v^zLVWXYUnPz@1wvR-r?^B%N6VZ&u){N
zU~VJ(@oFI{bA%hzv8{_~t<Apn{yN&&AuizX*B7ea;KKt=mP290f<+RrSN%lPH%4MK
z#y-CEaEYip_f=HBv@+dcoYwI3>0N7HDQ476+T~4a5^8()VdJMDH_NvBmSzzY{^iS@
zPeU%J;MH&;DXj~HfCkI@!Kf>dhk^W5_;`-ylwiT8fh%+qXLEAvCq7RHi-OBuUtiV6
zItTioQDuLaaeiGd?U*PYr<l-SsG?(i+to7Z%EI)_a&*2@OO_E6^B+qU_UUeE-zR=O
zRlWBs7x89Bs(!QW*yB6&WHHdV*M`gK17=jcB^)nKwEJ77*GX!E2yOT7tPZ>se6Zv?
z)*_%g^58@dJ&<Is9gi@U1uuG+41nowlfZ320||U$=lWJo-RffgUw@vUNvu6N^8{y6
zcjj*wES9(@7E5sRe`yIq+hvP==Y;P)+0=|tf=e>x9YMYF7=a_2!IM5}#{C9kUZM*#
z-tF?*79vSoLau4cq$D#r4IqGFGcuR92HclR6Kx@M5ks&kPEE9IkS_<9YSg2#Gaco<
zIS*`M3P956qRX(Jfv5ccY_@_Ku+1iR?!?!X-^m7U;9oY~e6MeCwswke{GIpe#<n(2
z5J;5?-o#&aE1LPl5(s7j9@I>07eA$Oz%2EL%Fc~IDutO)6#*0x9~i^!w_8EkMc6Gk
zparQpao8Ju&?L1g^@zFxcN=sebTCt|MT!1S%5D7CNnzQ!$`8)R%AdU(4#BliRGjK*
zzHNA3ZQe@vXR0IAD58Xlsu4Ol6|~nVFwx^5S0w#uG0{SZ8Y0M7(jwG2jN4YAFYDz$
z8B}xHGpaQU=6-zCEp&^BP)I~6yMuE)7Lx!bVT6hqmLV-xig&UcDIlC7LPiMP4+=TN
zL{uC@m!3}@T1e~?M(z#~&euS?Rqn%cqY=1r_+A1KDYtI&KHTKDHD5x8CvP~j6!bCv
zhewmvbs+M|&Te1E=|~`-*rNtC;d_A-eZHWo?r;C_P}6@Y;d1DuF-<nOh)u*}&6D8K
z|CJ;J<~=bOuxR285A6&wZ5dPZnyF1Y@s0PV87G&9j^p0(ttdD^GX*}TA`Ee_Fg`Lg
zm?K@3g7r29@zTr_?5(pK0f9qPb;Nz9(Dr_QG^hWIYdw#aHl+wd+h1%29t(V9wd8BJ
zD)xoH(JZXG&VfOm#-giyq&G5z0?uY~Ly!f}g$$-gN;yp_0@}s1K^TE?*;RYTeMki>
z9n@9+J!z~!ntAol?_br@Rg8B|Dd)j?B4H-AYBwB|DwFA9C{w#lv=4W5hn0a}fr)@g
zu8crZvhDW^P3}%9<X2XP>sR6HC@vp~Ix~LXtF_9h0P9ne`uQe$&!6F=01UDV^xGeo
zZyd^XtR1`yh+x_}3_<vrBKPK=;68{BSU78tLi-TEW!cJ}f(8&5Qo)jq;LIm|THS8E
z|Jl(=mU6;$Gg&zJd;WXlw{N4Zo77E)S~JJmSi5^zQzZ1tk?7y^v_inN{=dl$HbxJl
z1R~l~{gEy(!B0wR5Y$!%*ASEW!a-P|X#(h@d<28(BLd*V4ncvmN<7U>M-!SX>Q-u}
zI|b0awCyyx@4;p3;d43Xg#G(-Qf)qXjJ?P?iQ4|Z0h~>7*$Ur!L()TvSu~N@7fWdL
z9_2)ne_Ti(s8^M;`+uWe+0n?vilhgX&SzLT(j!{h4_N`WO(%W3OGg>o?5ks63R_T8
zO&F0fD~1I;t87rP&OiJF3Lu9ivn&*k3EzHe2{4B2FN$R?$@`HGwbgG!wn%WtK`z;O
zlA#|ycDeP#Ub=f45B9B@bZ&QSn13we>3veM+=}L6y=7=B$=~o$-)_eS=lENPqlz6q
zM>*J&Jv6xSpPysp2-*|53gtqq7lB<2&-%j9(B5>}a}*{Pnb$gMD;31xR*v!!1+6PL
zllMHGUz`IUU8OPmqv&?E1BciN2cEkoZ%)>yiFrtQ>|I{juo@Q`Xpmw-Qq5*jy=|-Y
z+f%^<M!NggO?#Ds*>sdDiP$Bcf{0&2O8mythP)J`n33Rn<G~tuuQ47@YdaESh()WO
zcEfYM2~=WlmG!-6!?pm?<J~(kIJ0t{yb0dxyWBq{uL_MW{yKT{WGZc`yKbUBB3eM9
zOi5H6L+T0N4wLD+)tQT7sRegLmRGfOlwxJAns@RY_XN&sQ4BF6t8|nG7m^Jvwt(gQ
zx(0)TD#~ZU2T<o4p20TPs(tF1)jzypb!LxXFrQipwMlT~><G6I<%Q;pXWwJ~B!rl&
zEa*c3!ADfKBD5P)y!mcQ`On5b;y%M-+D7l>&6@!IRYHIfw=Cp6%Nz3abbpb_yl5T&
zvgPXw9ht*eUx5j^Dr~@eHMlV2nIcc82p|0HR~e`Pi(en!SIGpUt?$#*y6x&$BQPqk
zLvVp_0C(o`$MrMj*j-#&Yj^VX+IN^keS#3<$035ym|5ILtU1$EwM@z6^#Knx#Pq*S
zkal4YaG)&zMTTp!ieGtLw|)28aUcS{;9uKJEI#fpYV2m{9iLXZ@6WGOJ)GTWJrmZ!
z>xT&cF)sX8ORv@^iVi6>t^X;sp?BZPUwMne-6$A;aKYGWA;;@%@IBdH)Y7qsVsm(-
zkOm3n^=59|N_+y<CxBG%F2~)hK*d#E%DoC9{zqs3YaawNS&(H<#;R?U)Oj7FJ^+er
zfm$I+ElWo%RS!!om=5*bEa^PzVRO*;oYpP+#RJsMwOq;)YyZaGRA8&trvB*giR?y%
z)Kfs(6mKXJKEG9n6<(o?7UT!O<IsJZr+>TG^WJu@{P*t_jenImm?Y7R_F(RR20F15
zmW+sPy^RyC-(7>_eQdoyt$6=aZQz+79@flNPAqur&XfvdR>j@8@Fj`7=%H~RV+{r}
z%S!n6kSwei`CL>8+PWQLnrXbYbSQJHtF&KQR1r(~DnpV<m6Eo!Ll~^mULYm-vf^C!
z6A#}G_!jM1X7O;RJnhCoIF+3<+*I*N3u~M|!9FQrm-loLR*sO0m2wc9e}{ukHuKE+
z#vUpjl#e!<j6Mz)XXDn*n>PV?7?8u<EYayJswJz*cZjBBu`bN<iT7{MDQ~2{ueeON
zS+!QF=*YC^R~Gq6o{)WZwGzt^6v7ZOC?msNmB!ih6|1QlB;f#f{|9oma_Y(si$#yn
zmZE%OnA{D=*zrg^OG2Nv3qNaO@Dzn^B?ArXkL9oZ%Q}*GyPR4!`NDQu^juE;NXmQ-
z1K_Cp#0qPBcG&217h79ckh0jk>!0%s`};~@=F2++kB0xr4A3VE?WT1)I={f(8Xe-l
z9><RCp$@}7r3nq>8)q^OaM=XHphklSk<IOhE{5TDIO*s}7jtMjNv%U4l&SYgk4GA8
zhtUUzHK$u2q7n&l{30v5?lW=jIAMkyDL9*yf2&GRq}ZRmjDnW49xvMo6_w@!^w#e^
z&I!}X(|*IcCOP`WAmz)#qN80jXL-s&yu2~XiWw{flNh{R{*P}qH1wZuhG{tLS`WbD
z<AM~D)SxagEZ1~?S`1{eSQ>*T=NWECEeT3v$M|u58e41!l`kIT(5|EpphbJwAi(Eo
zhz&1+qy`zwhDFlqt<PGoIqc8lEX=Q75FOED+da4LXn?yQOI!yZGYk^Wn^|hK=KSO*
zCCZ4Py&=OVz*5aGznF3CYJ&ZvO5I8Bc3(-jX;b%cs6N>#8rVV&b;Cu%%FWg(^K&?i
zAw{598K{{i;NCdqvxo!`x**6GRZwHoG*P`F%cMC2h#90BJnm1wPqBc5t{WK@ZTv}j
z`cPqEp6Pk;)%Og4_F`Cj^?>kTK;f*e%Er5>*~&o2RVBeAR}WWhxA4{R6ZkACf=6y}
zc`XQGAjq)sRRWB2Pg;JTAc_OXWWlpniT}-B6?8k%(UlZNLR?8rjW&Zf&rN;CzuwlE
z<mUODh{B_vZl-Jq7ThsJcdhHGhS2mJBY)St8w6gz@NEKaH^uskA3tNLtflhh9&fY>
zXI%O1M_TRn1^*$vGZ0{K?H_Qzbf4RJx<6-MA4h)oS<0-V-IOBBo*}Wmdf^ei)}TCv
zHb{C_o+bg0kFcxrkT&n8o;_AJbdt7*U8TZ>XXNrM8zdInSoh+Zj-M<QmvDXxQ{LsW
zE)4byB79jpL>@d9H?0{L-W35$^t6NB_kX2s-5IC`4S6H$$Y-G7pUp-p`7LJV*0tMb
zCO$attTwFQ*kMAY=^26SwM>JGKo;x>7$%2Qw1{(gilCw09P8KP(3Tb#mb7JqtH^ay
zgI3aL^eT48>2II9hQEmX3=@i~Y${;xc$BAfQ|YeeQ{Linom0rYKEc-sc7RRQTQ-d(
z%t@7_Vpr*g#g{e#QZxv&*Jcho3$VhwZ0n#fF$s!ThquRrh`{yLi=L-Z(3x)gvLnII
zk_}YVtwvN^Laz4(Iwf>0>c_8ik02<vn87C9##p~DUTz3l1pQi+nNgu@{dp8M0Q4wd
z_M*>T`Z=SHnF{GBt%am6O^b}2i9}UBawFsSrCRklpG+)6?dCG--F(8J#sb3U+?9M{
z+RuaT2}B<APIQg@byJ2~k&)!j9#5xPy0@f`&l+BD!TbW1?WLHh2!Pjrq?zB|ESRZC
z8R0BK4O*v>J0&LX4IG330Y2FIp0$>tcHSC<dmKv|x?a=t|Bqnu2~WX%_~x>XL@&41
z30ID4YNZ~*4{|L?0gwP=iLdq~5zlhXQoIaktilQ2BSt5s_ntLE>}Hrrvb>$0+w03D
zZ&SV<DEfAl6Z2RiCUyAp?|Zk5#|?{|1xO?ey8v>0DSRaImG=IrY+64-v=C_}V?LVG
z593WJ)8(-G&rBGKQrsbUJp^c4Hh|zN8}ajJ^_^(ar_8C_Wekxv^`oq?9|lo9kzII}
z|BI}*V9V-#!ge1*O1itHmG16RIs~Mo8$`NW8l*uw1*N+~0qJh(?(Wz({{HW~kA3Vf
z0MA<Mj+tw&Ift=NbZ$kC8^ZToV4A{8I7MzFOHP!Siy`8=O69&ksySoJyzPiPy1oYw
zq^3z{(E8`5%?DXvBq`p9TaL$LAlde1Mb_gGD8^xq=vqk6U3SoK@c(iUferLT;MD9d
zX?@5>dEN3{lFR2si^r5CAco44lOwjj`&s{^!eUTEvVrqep!zoJ0ou-w3fCC74@50c
zrbPo?WvZ6=<oJ``p!OC^sv?-e*{BuwOKI-rVm8xbBS=y2>9)vQ5gq+!E2@vHvCWbd
z(O_B}K|?<FOHZ4SShiOjLCa(&Q_12OA}r{>uPtM0oAL0UD|AUA>6Ih(=*fFJEQ1++
zq_inHD9;D>HV*Xrs=50RVf&*j^pXM0!PHL#`nFH@0-$(JW#*cdvG7Y2jWB$R<!xv+
zg{F(O+|uQgUDB*@>)nR(gPw;bttBJ2z=d$gpW(%+JGG;jW^SXzMc%h*$n?6j-%j?q
z?>PK%i=?^{I~jiS|9YVQf3r_C*W)17=bnoO?2B0xmfRd0+*U4+!|pedH(FoK%O?r~
z1#duCgGz#Po@lv|A!^3t+c1rZq=mc{87T&;gU3L}vnJuD(?Q3lrEQ=~f{*m=LSJXJ
zqLuw-U#FWHi=d%4J`wL-TI`?O!rSgy3*+~5*^`tuZo}MS|4T$?K_a@?Mn$spH!1~4
zld`!*X2s9nnjiH#wC7u<6y?()-fCHX?N(w5+r`mpCLif1_0slvsJfX7s65UWx3jPJ
zGcnGsr;=Im>BURkPX>PpDn6`o{BU>po2;KpNoPN>%GJ6KuQO%6=VGgvR!MzIJA}Ie
z5gC!bn=DY)gwoH#gp)Q%bf@U#Ikt+}jqyC<)<7oJ^_o?w@G=0M3YB!b%LtHkP(B7C
z?NPK)sa<5j-(>8MMzRA4t{%L~P~9H_tGMOf&BdBRlPSFtr&T!n@wv+xpT0}>67{|C
z-La4N=l(Ee0shiyi=$nMJ7=QysR0<~LLc_z&8;7pEO5Q=ksZ-MX5<F}DS%9wc%O=(
zOIbpK^Td#?eQE3F!1Ny00||>ww0Aj8N)B2SL}`oXdQ}|l*kqD(_#DAOI#@#1lX^^|
zJQO_!6YKlJ8yt%eL@}D23Li;i_XIt{?}yYeT+}KAH)VI3J9j-ttu9ntHtgLG+ED3A
zI`jU_3(cigs%2h|eamlIGc}|exg5gm85L-uK`GCVg(<9)ZilVqB>SIZYv(~aZ<t2=
z#~f}!WshuD5%H%>DR2_&S(%G0UHMp(9AC9r+dAy$!iX5LN;{_^g#PfcboEcSfn8`B
zh(Je&REv@AlNvw}(XRt(N_n{7G|!ccRuYKQ7n*fBuZ5G{@2-@+7PuXTV6J($wEB5Z
zs7tf5)a%0eC+MKsUXb8VSk$WIS-8!`urQJD<K1E3N57CnXOa#uLG38ySz|b+S4a2<
z-6;4?G=}zYd7^)A2^5ZhNo!x&$0%lgKqVL#zH{)jc0Wr4S!*|Stnz@%09Cj>%^Edc
zQ#BQg|1A3YGB2yS_Y1uNLSEXxGQ4dMkLg5Ycej(#&{+0qc?^N}ChU(#(N?uC)-|?r
zruAj`Cb`a2!s@DsLN*_BlPG;f$RX0-FN>;yx%lks+GYxwueZ~3{LLmXZ+FDMwYNUn
zg@szy)T{KyLw<Bn`VtbZJ{LQWcK%KT^m5RdT7<xmzed>oKF-Th*jc5^{Z@xE-C@ef
zmeuz5%r3h3!_}E)g|ODYeKsMD!=k@)zpMy@cJ(T~4O?eeI#?&L3*vu!okw)nS%&c$
zO`ac`F#PsQ^Y&r%CCTTWE@{<a1BXGFd<EI`cVz6HAbDN!m(<}`drn?Se%EesRfakk
ze2wgU{!OO-f+b(Y3x~<e2qd=*;8Y0yz{qZ0vI||T%OBk>k3%}KE^kS|rYOGQKlHdA
zeAJvytD|Q7WW>r@!}fYwc(M@l9B~NfW>BJSXg?;&P2APyR<@qm8ymDFmBp1$`A%T7
zBj)85l!DDZ0XF*z{6-xk1KA-~A6eMXkK?^?#qIbHsy*VYa{NpXg0{|9N9~y<e7Y!6
z9N!4~*uLJRsUPwWVVOeX4R@`BkXps<rJ_)}>%$HB(*)row^B2i#dP|w`>b1;a`6ea
zZ&#Pb)Z59vbV|S$z=u0}GUPJ!Zh5Xm{6d_leC`eMi&q1E>A)lMrMA{^<4+-KYn~N1
z8e&E1dkwWu^4}QP@7)?BnQ1KGyKwH$C~AlW8nb0OogQPq#&YE~7>%@^bUwLO8?6K>
zLJ6>}_!NTsTliX4*xh5QFvFATtR%bogDP@2gdon4xAe0S)3cPm-0yS%`kbkr{g>MR
za?s4g?IYin4#Al}S?m5(5Kx=ZShzxa6tQ^-|0{N5qWy(38S`=p6ri!gtGi%3zTD2V
z2}rpIZBEX7iTEQ^kS<N;<92*iawL>K?VL}0U?&L;9d0_f&gfIU6Z!LDh*>3=sfa^}
z_OB97D$9SBU5xjXVG;0EpbK%W-Vtq(iB>!BI|nelBgnDwfST?q>tFbVI<~WzC6^xZ
z(W;<D$Eq(eL*N`g{lNDgS(pEl$IsPk3u5gL@78N{u9>2`WK6zg+jvb43!gmn)V^-I
z*P2ln&U<J9ANahG+Shh1bGXo(KRHnmXAGa*MvjL~CoryO_SG_$(gW$i929v`RoVBk
zhWkQI>$JZOJ8b8La-S@j8RRaJrQYXU=H=SkSetH1skczb#xqRB$DAp?`0s6K&&5J@
zq>`?-H>JrAs&*?y<h={cuD+iaL_1$_8Lm@=jT74O885^rE>1m})NidZ6p!{&?OXQu
z#ATXKu-%R$2dcimuK_uN^mnPg^To=XVhDyXnf(OQ+3Iepj(;W(D^Q<Szdhs_n^k~E
zflcWc7Pfj~z`ot{MZdza9JZkZ%T%5sp9IynnjtHFcA&#`rt0x~;-HMldM(O)1kvNx
z5E^Rh6Jyy&bNlhUr@zm)TQCkw;l935{I!{n%H#ZiqXQ~?3>$4X_`25<pa%_3fd1gv
z>TM0ijb9q`fxi1+JJhtV&ik~lye>IR1GVSv&sJXv+7-_Y$4ShewdAmk33I+**1i>!
z<sW++bC%2IDi>$=ph{3mG$1l|Gr0BW@;7eUdmHtw-8Y`+st7E4RPFLqTievb6z*O@
zUPsGKvm(2-X5-4Vxmdb=x0u2l)X%8|XV-nwcNikQJ>z&BvzkA6c7uw6vFl%ps>UCc
zv5pF`%p;=$Xi0q}yeSBEYs8fg|6FCv%REhpd7R*g%^5<;gm6qH^(>d2xYZ)`R02Wa
zY;*2nM@QfDVPL>#sRn9~+iBzAs1Dc%Sc`oJtJb*>4?L`;J8#5W^GJAif+-hT+Qf27
ze1-9YIt=mk_UE$MazuspjA&^UDfm9jvVygW&-{u6XfnuNrn-Vt-&_n=wRGE@WWI2p
zUZj+V#^6HFd2_I4n4ba}66BZLU-(c-^tQBif~~N&dRr;(k{E@aW=d$uPMF`?A#ZpQ
zgGj6><&rJKwZZFnL*Z;nLdtm~m*<rYn{^1r$cqWeWMyD)`Tu8`Ugd2h7HEeZ3b2y)
z^hd(#Dyt(NhpX~Lgwxil@jByTZ{0l?YC;`Y(|bK-`H|5F<{)=TJy{`YW!+Nvi4ttD
zGd&J3*3+AJ#5^YZ#oY!8rB!fn>-QsWJV&3beZG5)G*B5S*t*(3s6L04)8R&#8ZXch
zqzuC4@&p+UiV%8D%wBU1!QgIlK9?7x0zRgW_q+o!As0Q;@f_h==3~2HfS3U5Y-@=r
z>2oJLlFO4LTRsV9sZTLaerUiMgWz5!gKk>=z@^Y$cYo`Rt64t<<EYvZ^NcGn3Y{cv
zeJvj&8jo&CY%rqhDw~O;JCI31PPD~PU_IczbAlSw53fE5IAFDa2ZWs|LdNvyg-`FU
z?ll($$oRjSuug6zRPq{${;O&<*3J@5G4_E~D;~%BD&k!6b^K=|{cBpSxmZJgoM`*w
z2!cG^E^TeU-ek<s>a%0v%>BEw&9=<J00M3ioUi5j22!dK!jEw%b~dwyOQ~G5-y+(K
zOvCNZp`QyO->StK)H6P4sOc_E-pK3xlsEl-h_ZB@?MH9XgHQh%<xk)!5YvOh<j&h}
zk$fr&oX;={SozzV%_-B6w`8y=s9^n7a0m6)N<kkd1#~6Bp@bof%(a?1*|*=Z`8B`e
zS=Mo+*@9IRe711c`thE&jE1L4k1g`P>L3X+0T>UJHpcJv5skvu0?U4X(+8zCg_F?z
zn_gsUHe}Q3{6hRYSq7vILz3WFUn$&XQ~xp~U*8n?q4G?vlZX60rt=PtX9&;N>>qEE
zO8<$ZD;ZSLb0`1VS<WQNis}!NKRiu{#KLtG$Gtj`VHbT<n%J>MB*^#|@?odjVC<$J
z#b@DmnPf=-cYGJA?pq^cs$5Y7*P4z?YlVeZ-Se6EOG2-dAlz^cxlLgttU8tx3IWd3
zS0f;P18{A~0UIm0VSJyFS!gF=F8lF+4$fENS}&v_MW9Z;7aMZDyPIm}9#~KHjw72a
zbOPaN;-j!z9K`_(lrh8S%h%Bzc$0!UfkylhYErhAOYxwy$Y+7?^PlM@k?HPE?3Mk{
z=cDgAiR)=NY3;bXjXTXO5M;GWr20z)BS~%t?nOyeUy*X8=Ps^iZb<L=8$m87zEMoh
z1W=}nNL+2L>=6Zmk~jMI_Dg2ZS2uyfSN#+e8uM83oGG)=c=}LaefLYq@dxJ<^>BPL
zmyJ9w_F-6t3juq*H2Qv8XIQUd^%IBUOI&xECJKb2qFdNQ_vJ_uJ8LdGIpg)qP}lyk
zg22~)S&H$$R7~0n#ZJtcoLztLgdiZ?p#Q^(Y>5-;xC5LBM;QA!Gnd@*7%9Ip{pe|W
z*;;A=fmIz{-#dCrN1v_0kH?B6%vi4uSVUL~U+^Ah;Evs!ROt$uzJ0EOm9;En+-^)A
zDjFX)ml2Wd@dI)HuQ^B#yz>29J)#ozZ!e-~ovGLDuBP$ZKjEkR@F^A^ettO^BIHU?
zY0dNpBiK?w`lQaMV*el`dI11366Ifs{G3Y0KwK~Sr^x&}me3F?KKbO6z+nssaDqdS
zCdVoSHbw<@VmuAw8_vlO9|rcAA(^JarPhN0eYgD!alA+3$eL%{aM^S3!4&6S#qFR|
z=oWv3KJ{3oS93Q6c^WZ9ndfQZWOe#QwaDwSTGaV~))LY5gkU7MKL`JepGgW?i`kXm
zZfWBy1;vF-CBRkn(ELJY>ih2SR|&7R!BpbrgG@rYQKyuLgMUG0;n#p8ONP+=>OY;;
z)WKsz<I$}gu4rcomuYDc{_@<Hk{s0bbB0ydIY%~L`5x^z>T|?PlGfVD;~%fXlz?;Y
z(-^8oN$jZsyTL#Ir#5DlQ03>2uGpa?yB?s)^zf_Hj(-4$;Vv=!eQ($4t;}z6FR^=v
zW)E8cL%^bV4-4L_iBF^R3}g%{;o>>QL_fC1h%zQmiT(bwWivAKaV3t(vg9$Mu}EXv
z@E{JX;O6Q@=<-)*$0a*WA!GNQ?<1jr2}r`WtXoB58llyI6S?Q;&nj_ScM<A|N1OA|
z2<xbPQkV@)-^A%Izfx@n24u>~iE?f(_)d}6W*JAfjh;ec>wQ(hf=ZsA#PhFd9*e@p
za<zCJk~kT6Y-tP&{GH<TTYes7`w#rWE~V%26FI|#j<8;V!MC`cn_KB5Y0fisdB|Md
zr+3d0<~|qB8x?TNlS81|$nvHs4?0Row163OA8zZP8B}O?33Z%)@QRHr-U$S~+Q6aw
zc;Vu$Z}S(#nF|@XA1CDHnr;gdZ%sHzEkdB$i!K`;zhewmY2{K8{E65<Sg>3;7ItYJ
zIS{tQ;j+=v-!_a)1#jj0Gc=3&!(r%pCcMh04fA|psbo}B4??GleRe9-+fJim85#?`
z|CamzNhMR^-kC&uWbX$Ufp0{5eCO9Dn5?&RwZtlOes5ky!>$vk8u9#2n8r}yVrZZo
zK4QV+sk>=6#<CJTAl^kU-CYz&dp}$lv9z?hhTC5T7obxn3|_CaSk;!!+b+d(xEgRI
zIrYWZgD+&lVg&0sS6)p;rYj$Y2#r3$%aYsIaO(5s??U6xyDfaVq)U`xV`aFc+tyH-
z(A)8-E;H>_k~Fo8U~FY~eaFty$L|}zz;hAMB)>L9{WkeFbu(1nB>K4s<~-H5n9#bh
zq}VL<dBbIiK@PnB=l!gk5539EBmB769`Jy;W!EiKDLrm#%oMEn>RRpxFew3B0vzM=
z7o<Ev@LuSji4A1@)Atb1X;=jv^BgAzj|CLP0avi8kJ$tCU58rP@o(kZ(65f<LC%=;
zQU<YRz<<rW(U}LLlIrWM!4ChC;&cIS5MeBMeNpr->SgPF_e(OK2j84T+x?f6S;e=g
z_ZufQURGS%+_Y?;`WJCwd2|9{;xnG_U`{AOrSOQK9j96U*JIjRaFjJq#uw%eFNlHE
z9cVW6r8`0ilr&nemk;Ye(ukOI*kbaIORfh*t@|Zb(y54_loOCMcp~MhA3utff9QDH
zHJhULHX+bqdveVwGY+%AlmK7Zk^%ev_9grot=*1Wfs4!|O%+}^GSyaQF-A4ppXZFR
z!vL7NDNNk$7<C<*{^vT~h@oY;%iQ}0f?$6$|L4U*NCIyscw2t|iQ{$#RkFdwPD`~h
z@cH-$ef3O`l=AM|Q=a=tp`$@!t%U(u?Rn?-b$XBbC5zTpp}CE)AVtmr;cI4V4IUAf
z)2mFk@p>;hs-+pHqjz$3nz@u8(&|4+Xd<>6puF7)*%7q}Q8ZC|ar4b;Smv9^&;DPR
z;AByV-%@YG<7?!rsCFRK6Y!8+b^8~4)--w3$c;X~6WO`gGsWMP?x?SSrjR$Cj{T}8
z5SKNZY|GevA~}2KJh%RJ2*i3A*zX&&5`<5?-dWXN@VC89`&9TMu=<rDe}u0dAz1sb
zwyy%t3G?0_UF_ikRrAB7_TC4lgVVZ#SGV(quJ&locJU`BZDz&!zhZqJ1%2vU9WI`&
zGp~Qydk)BQhc$$rNL11SO{o0`S~=+aInUaio#{o+V^jDDRG03u$NQ9bqkUzR+sv8K
z^5DI4UGYL{iLTn>Tg_NrWmXVhrPh&1%^8>@b?D~+k68|b2)WvemlVwM{82g%=BB)(
zl<zPE5(G`@GZpXXo)xab97tIi>*L1NlWEMaI8!ci)5<XvAH;ufl<F>Ou~Y<l3i3tH
z^Z-Z3W*0_5=&rB$GCWVW8%s?`My)Cu>Xs&PoSVmcfpe+2_iZ|JO<b3J+=a*r^3F-B
zDOF>Ur%{oR4*+IVCO+3RhN`cc?|3s@I>S|HwPrSwdHUh~3{{4}OHos`nxRfjZL^>H
zWemcK7_WX<+bh+7eooXB=3-$xY)h7xci4;G6U}VG(EAx{JGa%aI*!};csFq4KfdJS
z##RJ|rRN2b4e{eOT)NpFS%OnJ?U0b_?7b;WtY7x2Mj!-XPm99<m9nly4^+Qt3ZDA(
zz2b3h5A7(Q`nR>ge=cyO9p6mvu?BkL8_hO!A2P~M52_8%KO!8do%vOsqRHKWhK0rp
zmgJ8LrqpA>65aL&$*6C~{Y-evUlY}uh|kT27zL0J?eiGyK&Z&}AxxrFWXmlAEcbNO
z1gal<Vjkw1m+HMITb6he(=0E<+9czGi_tqraqssP%WRHjO#dcx)JM~$apJn$A3>rw
zJDP;o^+;2TMP5!&u9>qOv<R0?kP7sLC>j+b_M<I$T0D)FEqS0jDdKEldy+EHfvzEB
zlJexo<71c}X0Abf_~bN({PT5LS@i5Cw!S*=Q$7#*A2@tz>$IxsD<T0X3D!D6U3^+2
ze{lJ<!W6jFW`4x36|I*`aozN)sPW{*y$GT%S?3|-RU((A)lrraqk>10rEgb~6^o$`
zqW(@z(T@J>tpNfwEUQV|7Mwz#UNtOt*5UclKHlsWia1rP@O_J5TYoLgIqDoqbTiNA
z4*1s7-l{y#fS)KYyj`XaH|}tK`4R9t=j(9be18V641Vkr8pgk|gT@R@XCh}}vgTww
zy79cYvSj#`FDlsdx4FOXHUoFg4NV8VdH(36RX^M<>4??&iF4>taBfH4nGEKL4X*o;
zx%SMF8g^y@%JAxKV*z`a;{e@La(uf%>f!y^5rfdlT@TmgkacLTqqv!nw6+?6D~nJd
z%7<nPKSpUzPyW<5)|yw}q+y1W3P&KIP-5i~87Fm<y!q>PgtH%Q;7Vf%hhNO!P_ISW
zPVQn#;jab>Hb8`uwZ+{w{o<dGZl5J0_KFA^6Nwl>20`X~XE2<Oxp5p45fU|H`Co&V
zL_(F=k=EH={$f#TD?EH!N-Ht?$zgD5pDVM!9?->%N#m-0lDNFe`U-;+l^bImNo>AO
zhr}C?GV>$GI0zp}wut&gUe|+{c$=5!q*cqb?##%3A{{4rR#h>*9jz)XO(t9dLYJg`
zm_ar@zfZyE1a<Z12SLi1(n<xY0wz)PF#q&1RmpN1x+{;607wzi{d(yo3k>?NFv{hC
zJ8fhLk|?czFvh4d#(viqbxKM)B$Ow;Lk~g>#;{p^!~D)E10+JTrK5DiePToWCUu*G
zGndO#4-c=!!{ONn4}NhE*P+8q8&bC=*cc>fyTF6%EOHnpOw}#VguBT6&te!6VUVHQ
zUw{6zp_4e<nN;@x{$#&xnOC(dD=q{A;2sk)p@rfUOXEb`o;e*3t*;=Q+QCsgN3byX
z*w8c>3?%ULo$B{DjJC^3xS0&F^fQj7EVa{1<HEsh0~uC}JMdJQy>?Fq*Jg*){O}gs
z^VHlT0pzmHBzgHn^&~~>G0-8EP^mBSa@+lozzfF~F5C}SS@QA<J*)piwU1T_Wmbse
zB09o)%Dz5Twoe^M?Jw43tsg^lZoXw#0qxFDLV0C(aO>gk{4j8+$Nv1VwxJ^mqK3b&
znBZk}N`Jo>neW0@@W|k}Ot2W$Yi%e){Bn8QpX>UdEMPGWu?#0f#`lh?!5Q`aR_&kn
zG2M_Ut=voINAtr;`|l^WtJ>0?ckC5z{f%O1CFHJLj{7)BbDG~2>mF7Mf&g|zOGb|K
z6|9Ox(Xo>K06`3VayE=P;ch<gGzLu%edtW-3x3ek&1tz`OHbnNxzI$kr%l%-0fnxZ
zSzZw=$i)HVZA?_|6#dp4J~c1YNg*R1TB3u{EM*@@=qOiz3P>Z;%-CzSDePc|Fc-?q
z2==T`!qDzmm>LKKRtO!kbGC1SuRIjD82~XTDZOFFm3j33Np~iZ4Tf}(v_-=M8e$&#
z`=oQ~wQH7@?s}@k>RC;cqjn5|0+&+Jakp*&EDJU47lc^r-^Juv-aRzc+ym>En?ckP
zJNlGVFTK88o@edDkV_i8<j$Vd9(0U%R!OjKtQAJvzf)#f?@s#@*5SnLiS8^WhJi^V
z2@G%ByXz*=zr!fA=%5F~SG)Vtj_%sI%*&|ab{WshPkl&{1bsc9S)JiI%lzR<Uv&5u
z`P>R@o}>eL>6Q!dq-s@zhGA$R^pyBw<kYVgYsYA=ZQ>Z<Y7Hm-*J4pMF%lk9T#29!
zzM$XkBShJgN(7}+FwpiKC}$zmh2*ZFe-7%OqcC$p_=PKTLS<Cq%dvbmgCcfmTK)#p
zV6ByVO7JrG3MF;e#!_TS#3wr>QBweqxJzAqTWi{nBvfL}B4uE@Bwv88PBsq`qjksg
zqRE;xfq=_5Fh|5h$~B1yic)N_-FX*P^JKTS>kGEsAgUO8ZLXi?Pn2c^=D1zfLE2ak
zROB>rf@6!{34b?Xg5L5_B6SUjs1is2C5IG4U|G)EP47~0rkQoka?lc2e(sN|+PMLG
zIT7TwkW3SKg6A`d>R|2{&vXt$O_D9n1^{rdT-uhB{@3;ch%`v@Xa^me<uyq6qAg$M
zewv!1@x9nV>MCP<n`SVl_c9Vv5t{WCb^@u+Dx~L)-;X29t_7*PKD5kxIh&j|n2Jr;
z?2rUafdt;LT}$hyxw@0Ks|DUG^znO*SB__;wB&enY_nc;Ey%(+|G<!Di%lU+7&(8u
z`(oPvF=_JWyX*zfp}dMHGCAAjkFnkCjs|u0IIaDu5QP%@i!;|+Dd<=z5)hEb{p7gR
z6bBOz?-WITG<0hb345<tX5$OAWRJ1;mt?p#=@>bDNPN4TuO1EG#>R8}hmU?3h2`jE
ztrWVvN~ol1@+gT1nVbSP83AL{MS$xWNQ2@^--@rZrxV(%`Vz%gsX|Mk>*F1wtx9d|
z@q3eCa!uK|3SGf44F$45Ws2;tfuRW2j3}9#KZ#%|MKJP8kO&JW3rO$xREciS+oNOO
zp5wWjyLGT9HL0rx_>fgZ9J*d4m^fPBSz*vKUUE%hEg-Y02LI+Ipv(&rG2n>&_8*-l
z`1n^Rmcd<@UMi6YEVbmT6x_3wdxRHic5a8zwwDIHu#9Pf?t72E<OVRE(Gjmjtldl9
zxzhx>)I&_JBCnP%&oQdrNnaNW#tOPq@^pW6us-?_GJShzIV7Los!g>~>GdZG8=}DF
z!Gs5jSKL;_&n9^-3-}uD0SVwS=ne!OWHy`f<~Vx-DT4kWa3g1y<A(%UzyH!+{Z>(q
zUk!=^_M`mVfV9tXA$zUJff$YM<8NLQ;c<XJ+}6bQbh!Mm;lphwU>3aQNi!f&{Kg3h
zCYYRuE_$CeJ;63G|JQcSAn4ekWGg2r!6n_2wUCwyv}i|NLD&+NQ0oFcdi@m1fPHy9
zv&$zchVUXdHz0pU-9p-_cldM%C5_DOu#t3a5ihMz$b;!_xkytz8<2#bF7lu7g!@?9
za7=ius;6sxo1~L4prP|$AC!NIL5F5w7c)XWmk6Bmv#A#RFjz95=&mwXff6}k;A{ZZ
zvir^g9x8Z)@Y3xy55)G;Tot`wM+6J7v*`d3?)skQE(oSfFi$XmB8oH7#X&)<?Khy>
z;?8ohxU?R{zuoi#94dUP3kzQY2KKR6Oa}(_T5J5WKnLlqa5yw_QL*l_1aFP$CuW<b
zaD+n1pR_U5gzOUJF3^K41Dk*?gW{?&hjHof|7QD>OL~{nqFQEAl?8BR>E2uqyWeS1
z^~|-Fe#BQg|L~(QJ4*QD?sp@1tZIn)?DIaN{yVc3N-h!SWemCBzc>0KNOv$J>V#eG
zenT#>#%lrJF-XeBYXosR_paLT7vrM#oZZ!(cbsgRUH!KCf4D#$z7#~3xh83Xi3juZ
zgyBZ=Ai1AuY&fknUVX_K<=E$1c=o{|O4)x5HFOk+JZ_};MMt{PNo(P5;h=L=rD%qd
z$RXx|hnwMV2IG)&;QZNeIbyNz)@L6`Rt$_PoWVTq=V<jKQ@a=rby3x3)`w*~vZgA{
zvnRwq#^8|TqL1I$NIz42I2DBtC*tE@#Hl-A4oZPpYl%WU3}ukCkmduXN4i({cWD=x
z6xMk%{m3T_(Lgnnz~N0C3^~a~C>oOwL2E~eDR^f;6W$H+B}b%uL7)TGaq{7!wCU<g
zw|UH+-Fvqu)&3-WXMd|cHV>x$W&Gk|v7<&*<ovJWU>1O9u3R6?WnD7OR+F}zE<Z8o
z*^!$PcA`9OHnfoT`Zk}2=cX~oI_(MNFuKvOudGxn^P2JCcy5dmQe@oqk&H=4Of?TL
z+))O7HDtI}!|S4SjZ3cLg;c?P9*N0SEikB}L(E70<{G|FgaL$cGTqDitaznm=PJ4v
zyso8R=;|{;2bFjcn@aui%6w5lQ~0d--^Ybqc(`+#M(GCFzovl=uvb}ENxqwxZf|v9
z3$cX|u#hpRic$@>WzyCw6v4=cU@u~RgJ7pw`Vax-*(XbQ(`WnDk5J(uF*JjmY;YJq
zOw?S@>Oa*l8%zt=SmdC7nsHG_sAILxM!O{GmE}Hnkm0`k4O)r836Rh9iGUy6JRb?)
zaWDP)aFR-!PZG}wM_xt<g8+}JhR>CSgjlmb7U4xAERuEoL+Wlr>xkF*XApK>d`flh
z4}T-TcV|XLUHy%h@zTrTe|&fEqpAacXzt@=F%fBpf<@jZHCmlOb^^zwHzX5>GhYff
z--!EE|8eYe)qx#3P+POV6P1|E+ltc1@I8POh>r+NpRwwMh#+?<ukegH-j&(vu)}_S
zq24O8+7Y5R!}9szs)U7S;<<GY-vozvFqQEBCMN&9gj<i_G|@(1lfuL}r)>J$QE}`f
z3T|$7SFap5wAM~{9?N76OpBT#h7}QW_gS7BlgFn>D1?+BtiGl0Wm~!Aa8&>`IRpNk
zmPJoQuv|m-O}!Sgn#TLe(ne44Y0sRySJ^APbm95n;CB-XRj1YE-L8#aU((Yp0;ui|
zT=#M&-{INvNhyhnIoKo>zr0~D<N#pG5q7zJ4`@ioA1)CfVKL>zFA}>l*iDDQ0V`?H
z{f(fz%SG`1#uoIHel8XWa}2Z4BjX0=4qRY6p4p&=PqWEc13U3nT(&Yu(?dYdRMzU5
z?w~BUn99d7#|D5b$H!KOW$X$$U0c@-x5F`1i6+z1;hldyaFW9+@&@138VcIO_KDMj
zOi!O+d0lY0t$7z9M}MM(HdgkM_EYj$PEKyS?c#V|rA-eCjIp%hnAoQ8!}QDYvq>l^
z5x16bIVak#8)c;K&SigO=QeD;VbcC3HEaS%p2oFXxtftsY8~bIz7}0^cV++CZ5{Uh
zSlRM@YI!Hf(l0Wnd}{YhhBHEj;n_8Kl1&+aryP*45jcQ>%M0v`z{UwV+LhUndR%-D
zhKh6oP)l$sDYG0uzSk}C^JZu<?FF`VNeKOTP2w4$gou)3JNdLr%?L~%`~P*>t}f(e
zmJx#1!De27c071;z;TD%g{!It;3@K?9C|*^if_$MaWmxq7}7LA@2%mWM8}|!Q{*Lm
zOGyoC!C0@srPxqP@)y*(Ma2vlxs}5&_=Yj8WKWW-*ADW3|ESJgH%q8k)84~it8IGP
z<+KQ4fS~_2o8q<7R(yY-Do$Yg#=$~bR==Cw$l;m6*W=Nci${y4Uz~6n=xK$fj@`;b
zOZztl3#<-3z}kls;M|fqEIUoC4uHI;^d&9Cl%7G$QF-UKd+k@}A*9XaHspAI;MhL<
zpn0UVhXIAuQAl4@QY<6ny5zVA58Xj#L^lc+eq`F$mNt-8Rp=^<^bpgQk`M}1$n`lQ
zQIwF^_a`Gy^A@5v<i?^5L7A>w$+}%(QLuP;(7x8ckuOT2DErAIieM0VBy$VDI7m=3
zl1txy5(fYREHPYy*f>&INaR`UOg)@3$Apc2|4Nu|xE5bBjaZ3e+EjpukJDC;%Mn>a
zJsXTEiumk)8!EM#HmUN4Y2u?Epi^r>medgqWDt}{fsQAD26(B=zwxTL^GWz>d*O&b
zmPw?KZ2<{hr?DH-2>((Q!Yz`@)zN1fP1*#Tg+Lc3a+Xtn();eo{YhhNkq&6%vT!|s
z9w`LLOcoMvYpuImwE!tnOhZ<W3Y~>?7=lzQ_%|O5OG$b>ROUvREoC^dJQ0Lf1TCbe
z@Y{PYJSUpXkXgpG(d;)~>xtF(=;I-lJ1p((k=~8o!4Gb6amTMO8p*;*7ngQ-hYS0s
zkzP;dz`WkmNf?L>YL&(5b!J3|hQ5h)p3!<z<BSHqVj1w7bAFe>8qYLZg&xX1*#Y!4
z!4yHhvB3;AG~9v+0r+6?5kd2)EF=*K=xVIU)N*(ja{|Wqu-1}S+>SNftMw+*%o*-$
zHOFSb!9qHFJOJ<Qv+Jt7q7kV*TRi&b@hx?r@4G-2N9AoMT?Fs>yf8nT_ch0^d0pxf
zk{GpTPUj*jYD(R>JOgg{qLU}uHmLE2Mvx<%0toNYC<Z}w&j7{(4s759A$-XGV@QTp
zmC!{OnG`F_%=4lN{fZRTK-1mEvyFZd&OJ>!y*Z;FDnsxlaWASe`mM8#3ERs+DLHO`
z$*-KKnIUye&gFPb&cG$Q!!fQsXtVknuuNdvwTvK=u2oyP;7%;%6`OM5ocgf#Ba*Cb
zSzVITSk=qIB>L4wVo;SNPg-#B?$A(Po5rh+?B84mQRljoth}^5w11bar`|`{<uD%1
z#2?JD4yum9fh3IFnFm9t(;FSfa!$1etv)Vy2i>{{$cRqAc#ka>`?5kIk};(W=HE2Z
z+?SaZZMM%|ct=Pe%JR#ZayA31X)wa)V~SnL@9VS5^op;$qwqmE#Nsv<^_J`U_9X~>
zw?fgATf9ipt4@=C7zZN>rZ(pybTYQ){%*6UAtH?jAs~V!vW~mzR1817-#)L5$1(AL
z?TGwt*`OV9n!Bxo9{6Ve!1jnzjCh@uodjH@{~Wiu>mzr}de6-LAI^$}=Gu7_Dy9#2
z1=iT1&TyK5XKhw^JcpY!Nd-5cLr=(XMjO^=;!S~bQwm9;jNL~XjLgki2RR+srh4W{
z*{)1}_yX=>Q6;ndY%#sl62)`U;}Ln<v8e2?h`f>wlyyV^x~ni~e7xHBh<mN!^CZ2O
z-%Q^A;YS4qB#WgBu#Rao=3+5{fFd@~TSqjK{Kx5l1@k@-RBXL+5<NE|WCMy{r~7WJ
zf(Tdy7_9K!PzBx`6rtW|2~`>PQ(3i6+AQ2GoPVA1U<zF}pPpF$o*SLJ)O@~b=xB3C
zgrg!A{_se}-5P;TZXgDolJL57G)$(kV)%@O=^%LrL{3<!)f(Bd))%0cbf}N8dH(Fg
z#v+8K*##9F3SHULJMN+LewfK(_!zxM4qOZ8?CM}jMeUH2tGBe><eln29J=+W*;_5_
zdZ<BcXueB36n@RXHRA_W`7OA3FnS{l*}4ae-f&Z!!+6#Ik$6+U-?t}^WHz|IcLw${
z>xbdA<FdL%2qpTtMUj1c2fSuWrO-(LnHo+@b#Bo=Iw`4SN75#<ns=k&gVyV)vj&!`
zHnmble<F0pyQn)=5o1gWPO(O1Ib@du1oe|$Esz(8AXs;Q9;np&^|oTjaSXMrpYYR`
z@2sxg#VRid6;jiRjivYN8<_7r3#@-I9`08ZVcm0J1kWvXYA$&iNewuZqOxix&Q<5I
zi<^;_vXG!*+HUOi%FU=yt<StanMt3h^ycix6bwEQGoU_qZ`5pwuS-c_5}Y|_$!hu(
zRlPBo5#2uI#Y_^7vxN>Ov-lCH58*s{?QBjR{9Wo&DyTO*%W3femw<9t#82+Oc|LV@
zS7TB=4S6G=G~rkN)LLReX&fk`B!?YNG%}+IOOd)u8vq&+eUK|O>}hA3DZi9Q7AObI
zcAQnDa@5q*28Rki{%FaOL|=br1QFGWL!TFfci-8Me2Z|nNJW?{N?N}XOPe9EWzeON
zU0`0-v)Up7h6WsdyBhl$E&OR2D)Hi{9Q|H6-=G&fHsmV;0VN%N+EFnKc{ZI=G6wx~
ztuZ6SdOGUARHDh5r!)FM;WcU%nOFkbdc!JlnadBz;-N>%-UrI&3k+XM5r1@H&%~PD
z5*M;2<*=VDY`<q~a_It|<4wFe+YT?HMoKbyWL<EC7ACo8u&T^uM|ZezrX<*6C;9C1
z%zn4(LClDE-bcT{=J2>7_nrd@r?CmjddC303+ks+++c7giIEddXlc!lgMx8%>F~ni
zJkM=7iw9HVax%zJE>~ZzCSGzYrdrHDVHlz&+)^{d=&Jm=)SHm55U@09vfUm)NN}Cv
z@g)5;7Ud3#E(I&(dk!372N?sTHfUxF?Fbej()>yFzj?7UC!ZVc1m&l+zpZ}UpHu$K
zt~w-K!=GZ#Llt-9UK0<W<k=lB8ANo<L}~826{p__ZkkwdG;aSbB|%hvH&$t1bVm0=
zZKt8q`_z6D*0^LQh=@2J9{h1#`Q>RGwHxSAcrtf~u91srO+))4FgRvDGzTe)_i4h)
z$l)MlzP7FGk3%UUThhCoP8r~o)YbbtUqJw%0$H%2^+5oQ-|2uSLgr)!IVHh)peAE0
zdHXc4nktFcZ5Cg|z1~q4q(iwB4BQ-K^Os`k`LC=>awHQ3aVJe?nPF=mLC@MrFpAAM
z$z7iTq6L^IFodi><>F*F7WM73I21b5C*Vq8V>*~werM5>B@}t6%q&?)aPqA>01R*X
z;v2uQ`_wEWVpO!42yp!WVIVJgb%}(7AJvqDa##P&&1e=*_{E6!$p3#y;}v%x79N%1
zSCL=|6p@jOcc&Ozd|u%Dg~%Af{72^gAXU||-~$=WI<M=X<5<+nz@OY4*Y>|YSD*a8
zf~@Bv0ZL)<;eJ<rnmDPFtI#r>#)BzUeysR=M;AcQ^Xq$U^v;42f=Kj?;d??KSJ8Vd
zTLXRDhsIEe0s~i_|I*=rhst3Y8L5H-;mw%_njF9NigcZ7K-WGzhB_8?bM;AJx`&Ya
z&m}|DI*~tuXsMkUIv+J)WdLU+Qb*Byf0JrzegdXSCXYlCgV^!$yVbL7qX=Z1*Bi?D
zz)7#CR-b6EF@^c&Dg`jG7(uQ5O^}SotLYl5HLxaHCPY9IL)JSSyTul=pO%jG+rwrF
zrB{|8WHXL2isD*J=tpw3R??CpW=o0dQ6YM+Mqal`b={--u&pI8+MZ{0>?>j4xAC6E
zXIjr)X8=}n3QJYl0&~oMSlE0!FXI6Va5#g){DwEGFGw*|M$07?x;mz)itq$8;+)qj
zrTb56S$(@oMg8&eNs1vN*lAi@%~S53er2+;0H+_6<cxi9BZFy}_3+v=_hB#*cfNt|
z7x78oR#(!6?mM$I%II^&i#}cm!lYE#8`ooviO(*qT{@El&=G^J4d)&wqR3jQTVY@b
zg=pl8%(Tm739RD_xq_=^v(W#wkQMK}*-mAhS&FLGy0*&BdIp>K5YNMH4qm0FJ`ejl
z{WAJxtv@JMYE@i|mROjb11%i}LXOE(zxT>v7wd|JDR-pn&(jGb1L<*k_|`Y^nNx!%
z*UY{Vb*XO3_X^q4s`zRv8fDr;^O3PFfuR2z`qOMmc;prI%k$JMK>ODlVa5535^iOA
z+}$*kHJ|ByniZ6IF$j)7q>R4J6p(r%`Xfn6^mP`Vy%0%4we;X#{mv`>$wxh|Fw7A_
zL|aX%Z}i&!jv|m_UPll#7un8cM7wKU@-JMWlrccY9n!@27zFBm{Xhr2e79#_{<znc
z4b{QBJk=n^A^P`-BH$3b971Kf+tR0m*V!~Nq&M@hs@t(afN;NX$)e3wlG4C+7YO1-
zgsy7hx47xkKYdb2LzxO_67Z8|Z!`F;n3^@c8hYme-LCqXNwAOhSSv*s%OsNW@>8W0
z`!ym(UAyp9O$4hQa=i|s^$hHg0Wvo{Y(rUFO-P6Q7uM)l(b*+}{v%dbWS@Tj61UH6
zG@iz8)^Siuu$#>37P@1&cV|gu!agh~4IZo8+q+D??~y*)jRB#)B!`?JJlsd%YCsD<
zb@Qh(y;XA@V{_#1b`dZ1>jkKQsU_~xGEdiWe{wfAH`wF~A(bxu=KDfdhf54<<Lv1f
zsgslBcEy-Lf8KkB%$55@?5`egGpPfsvm$SPB_E4MS!92q$X0@bN)R>TQy?N_GB-k|
z@I@IkHN|AYnf~mwRJJVQ7o<E<dvvNoaBV2w^}bU`ygg7TDvN<7@q+wMV88~}HNq1t
zp;j&*+D71k!Mr?CHCp;Zy7&>AM(tU3QNSfMY_Gp@6dVR7$-(to%F!9ic}@5fK>~aN
z5p9I!JQ@PtqDD%BQU5=9?zOuCQgJ=Sw@?sN3<k`W^TiO`HSUk$yjZ2PyaEw?=~>G6
zuJL7@6nh@IFDsz~xexNsa|b*{CZT*>6^F4S?tk(g5hKueebq25fUu)ni}@asdGs~X
z$}!cJ8|5<o4Wqi2{kH9a{dQ4XmEGDhdEERN_+O%b|GSObrAKG=1%dS9)-uSQa3Orl
zA_pf<B>i3kwC`udu*(eD463Dg>~E2gwqa9oQWib5&xpQz%vmwCFIGnQdE^>kl<(I_
zZx<y!4L%8NiYvue{QaB6m-n)r6*3RWBUAEfBJo+-ycK9hYeb!)Z&!eBgj1|NP{KjB
zM5!OI$CqE1FVes=(l2;pc+WsZ7*nX{{&x3sj`(4O#>Fg1As4H9n)Thh#0(y2g+nrJ
z=XeUi<usDwmEv+t@LH4FH(oJ_rzt3a-5(atli|jHt9l|YRgCD}(JvTCDFyH-@WD8x
zP!IWBbAeNXt;=dM4R-xfXArxcl;1lW;tsL5<_<Wg3p?y$U&X)g=X<eV@yidG1bHSt
z9DKM)N3${CIr6|lq<T7`D7$e7(!?+-%Ie1d#4A-WoKOLh<#Gys2ph6-;>4%z0Go9*
zhOU=hmP#o3941ws1^nArz68+6&K_?_o#6;)YmDe)RW9{J`Khp{Bj@`dVY@=jfl81b
zXyS2Qy-THyQx<9i@!uLvgwlqua{AEaY`t8Cdz}x=i&6VCh>o6qk4q16k({b;P2!W=
z{X51?B|DZ9NfqkaS~s_6YrS-X;&B*0(2LW|2X($2&w?>1>g?$=apYwy{8$)f6GYDP
zx`;;e%0{Cu--;8Bga0iSMY_!}PVeHYy)n@PKuokeWILBm>s6hDYezw^MU){L@!Tk~
znc;6Jh}P(SsM~EIfxF_MgKkm45pyO(RnG{TZgx=z%D;AEbxQQ(vSOi)UnObE=f^w}
zx0g{kqjZh@)7f5GZO)Q>PP#Sm((BOKo%>u+i@(jPKgjwSq%Zu6y{e8WI4m_)@uE!r
zj(Gk)+9qH)Hv5wh85ahon24d~8m}i?Fd%xmR#^4e*BYBFh-4==%^$x(co7rTbXB~2
z*wzAvK}|nnJ-w#n4Lv?Vs(2u1>ZMsC?wvSnJ#*Td`n1b+=<e;lSlQG3!2u{`w>U5K
zt<!!!y7q740HsWo!c_2zYm4S7Rw#EP?6*iI{jXNWO=WX!gXk-NXy2j>cuXSIy~(I-
z?UuIyCOK{XcQJhQ7Kow8AE%>MHYDd^Iig=PV>a|y5&6d3UT#+mAVOpiko#DO=R4R=
z@Hv!Q9oI6Hp1~+L0E}`bT$T5CnZnj$Ao|EsPsj!lbKA=2<&xGkhSh%q4|SOxeWuI+
zG_2&UFf1~8KGO080r58+3ipt`g(Iw$_Mo4!#tC*+y4!w=n5I*zTwAP7L=vFln|(&s
z{K79ggRRwM?j<#tW0knbq@e61a@GVj#`RJ#wNnK4^o4yJ50>J4d8jwlm)va#(49i1
z>3^3$T#T|W6)B=tcU7NZk<ke9JTnsp*nD{%A81Y3j-pw}YH5!~z75lL*<9*GVP$vt
z^3wMCxxAzGA{(@M;cG9z(Z$=8&yCm@KYtTr|8DEbq`DN09p+@*DZ+ak8mmAO+`rG=
zwVg6HX(k}A_VtKc{@wSNDX$2Bq+mtCjn#PuBYtbY-sN+GGC>S&%h8wfS&8t;h@j@{
zQia>f;ve5T`f0yOxClJ$Q=HlC5hWZg2gaQd45EulLbV*wtO-D5VATr3%^Zy#CW|I?
z!pJ3u5FB=QGHhFxis%bXPv|KO&?%^2c^)NWVcyq<{*y<RpXHHtKf-*B5XHmO;sl3)
zfayd$$y(0(1c%{(X@;Z_#ejB=Lfr9mR3<h3vk6Roa8+@rcT~F?-DWQdJ1ds$-odE+
zd6a!<0^=slx>uivB}89-mS&cvX;WiN$GTG202wxV&VC+=1zZFb5zl8s#+!%SD(oT9
z)*<Q?cV=9-)U(eVa%N}}ZV%c(q)te&Ff8HfR9%X~G>7>jdc()y5`9fvHRlX<4j)T)
z&(sX?gJG?=XKd9C2{b5+saJqpkzK*i3&Snoz7)rpi0}N>ipJ^4VeEMxb<{za|I_==
zS=TqWrQ;uKAA(pM4fO4{>>Z-e(dp1ELIy>0=7x81&d^f`Fgb$NF+|BzaYwMqP;5uI
zX!qoG=VHe>_+}};jVhc`H9EJ0ks4!#hWiMU&m+<_4^cDW{74H4F`D2*=!V@Cz+?ab
zRYA)%`FXpm+hY^|@nyuox-37<YaY1b;*qLSn3VrXeqhE3u}zlhvMF8x`bYx;AKMYF
ztZ2D{-AeDycu+yagTKaS43<3@{sHMaUT>XX`5RB;FEM?VgaJnM6%=9|lZtR4Qhzf4
zIH<oZ{)br)4ZSO&Nkd~!xi8b-K@3*5nDHQ{WMmuI`_R9@IcZE?eipvprP^b!d*X_E
zlm+d%ub!9nG0gR^2!h)yk4EOyO`at96loG%1Y41Tn0b_T%}%E<8kj$c2zShx^4WY#
z^@kU*5ak23SG8tS^j7dzP7Ky$Lno^cd=+d2QvD^lAivwi3s+um^jcS~xfRJR{7+_5
zbp%{pj*Zg`7D}*iCa{3!|5d|Hbq*?Q(6(CNmVaxxhD&8ZKo+#d-h945^sB-1g5|be
z6et@-iMV|q;13cZ@Ib|_C1BgrXR+(>;J+MypQ!5cyG)hE{&B#(a;hy`&5BEPwq+gt
z>^g@ZBQ@B)YAosyv(<!<miohz@{Wl`$f1n|JVM^xiJb#iMrER_${fb2!{mVW*6wiG
zb~4w*5~|VHJQA!7KQOZFT6PIl;I7+hWf&R7yI{y+Q7|KlD4yg4{aq;`avt8a{c(;K
zHo}qqPkJaNDodW7te4E^8n?czOZM29aR`5Yz>Jv#BWUnH>xN}nX6WUnBz5!VH?}ce
zsZoJV;<b~$zYALE(DbMVy_YNI+M&LyKQp4WmL@krv8458MvGa3kx5I3B>i~bK}=YS
z4hE|k0^#(Ll@L_}nJ7Q1ki`A!Aa_){$-I7~3<2rUJeSIxvSn)tX1mgRGlw1G()UWd
z6#Udkv7O`M@t(Fl8aC1erD}O3nC&A7MkfzV+W_6$L2nU43llia=u#D<RQ~(Bc+~?g
zE?t?2WvCz+2KPVq<<iY@VN=i})7baW*Fz~I%lLnASFGP6Pqpx;KWZn$haB38s3sd?
zcI1qm>b_&`5b!1p(fswNFjAqBl~An>b4YDlXJ38ZscpgK!jAQS0E%xht?3eAEax&v
zzerKh^|(xi#Ux6ryP`f42uH|qzF;<q?gWj&ns7XB!!OH9Z?{8btyFVt#j1J`Uq(Hb
zbAsCObmk|XW*aw}ccRWt(5q_O?2>%%63rxbu1n{TsR{jm=nExvvioD*j^e=P1p5k~
zOeqyMQj99?kD;a^h>JbDMxc|r3mctXc|>A*YtPvhw8pHsrMn76MWa<Ck<9oLzu=M6
z9FjIuEB#9}0npO>yuf7d^=>e)AL!_m{{?<Tl<-o>k;~6C-mVKp6om;Lb?^b`B_V}x
zx0V;=QX@WAiim{X3XN<k243V`>YCK!Iv`I|g2(X2Om1n38`5&)j~`IVM&o=!n5@xo
zv~C~3cIUW}Myn^NN^!n2XhFar*2nkByIOH7L0PZpUJiqF3S~UAP)<RWri5^JuYF+i
z`d7=NM6(Py*Vd3>%I+UC+UxG_O1~$ac0$eqB`ab<<$3kZ5G%gSV5#v_z9ZfHpNevH
zv=F%(=5O0oe7~$7oLVUpoblHtIikLP_@^g5k2}QHoLYGgX8Ivtd7ft-vbzul`a@|!
z%><j}laPlE|M+j9G-n7K7R^C&jO=9T_l~L_Vnvs>^Ug81+J{Ia`VyL3qhg4CWo6XI
z3tbZkVkVNOIj%J7_ml}fG07O3G;!12lLP+qJatv;(NVv>Kye9#Xz_6#hA+>PhD_kx
zCZ8t_b-gMtWvS>n+9_xkb4W_C!BT3`XlOcfn=$4#@H-P&-Gx*9OZ|VUI;C&_tLof$
zkWs~MFC9%U9+l9Oh8yW@@d-5L7VZZ8Sx>Uc6R@p3TnPiGxFLB}wF2DIl9b;v&ky@<
z2$YhzHud)}Zv@7%PW-%ne-`@Gy=q9$nxa|@l=X<H+Y~@+JAUsAsvy;D_K{GZUBA<r
zTyr4!Vf)L>5k~m~{R^;8!jBWvNb4UOw8r1kVt=1I+IMIZk_GD(4D;1bZBx402}Uui
z<+-d`k>%(ax;?|oKPv8OSDJVZK`~Nr{)9Iqh@qP!F@)kdTaS3KMgFD)(n0s?EiB&3
zK7s!d*LQ$x_0XJ)b&4{)1moE1x#1i7bd@8l<l@i}{az~wYJcNqRdj>VoB#MMsN(&O
zOyGXp^O~3OFlx&uxpRV3mB6?3d8zMq7P;fl)5`bj1Me(ZKZIS)6c}k(adPuQ$)nSQ
zmsfY2fwzdE+WjI&>)XiW!S8aC%)K5&gSKCfMt(BO%cV)bzAG+tb;(EQi4n|V&(NKl
zpCUK>=9RE@P(W0xQtSh#8i78_VYlRr@uXYnRxX2YdFB?`l%*~k<K^A)AQ|b*QAYGy
zMh_59CRmL2-f{0N?H#n_jXJzRVN-PfkhM=0_t%adZb?1dZ#{x#FKJk^el&CX&RLXP
znG6BhiH2`tNFrAuD8b+K3+Q|lHHAIBUIZ*KBZ6mCNJu_OSL1MmPTo3j!-&-V5S>^I
z4)`4ogqEUG$Dsk3R4_R1X7Ra-G+Igan|Cck#s$F-Z^(n}Z>3YeKM;JG=u8$%W%My_
zO4)ql8GOgh?2DY9fZslRiIxF|%={%S!#PUOK!FW_hy!Ln+prrG;=tiy-Ug!&A}9O%
zYyJXH_E!h?0eLCTF!^{C{<+e}@b1Ek<);1u&ZL#CB`A(W2cY%rCSlIlhyTZe(oF>8
zI2f_%@bte?lPAAMYJd$;N@#EWQ)|g@@&gtEnb$>|?IC8W<saJw(Yk$va@?t7%jYee
z^naE?em5hN)59`R2Bs)~>AVFfQ&+jU6;M{~(Q!usv52=8lwrO@y(@Gv(C}b?rCNQf
zIa$MNW&kYAGJw_M+59KYj{Z-a{bn(*AH@=KN`iw8`CtG_kK`B=_+%M_IPhvX)m`5x
zWi~d;<37}QHQDw4=yw^+NtxuhtxE0gTzG}vDHT>dFcM!41A4u?Dw^FKSuiC#Xqqry
zrki1?a=YyUXIhwVDZ>QMPp*r0{mg}tt(OssbF7=Z9AKFH-wa|)9`^slTRh7keZ%0C
z*8`(3mzz>e+j)v(#kU5j+sevc-ouI@7RScxAAD8(f9m?GsJMcr+rbA3fk1GAOOW91
zE<u93`vAcqI0Sch3-0djg9Hoi9$bP3=Rf(r``@+h!+n~0n6sw4Pwm>ftGcUPLY@*d
zIXdXyRTViHM-Kjt0cw>uD9Z5F$@>d9Kk0Y#;0eBgl?jZk<6!8g00P$-Q>w(t0$l69
zD>EUJti)U8ADnKvixd?Ax5<vIOBhl9+G)EIBp>Z0Zx8$LZpBJhIr({4IXYa}GN?Rd
zGGCfO6&qhxeL43{=k!LQ?Prd?pKcsU95`=fwBQkx0ZU71ca60V$iKE+hz2r<F>6GT
ze%Knzn9T-5y1(ToW&U&g>wh<a7B)tOsA<=W1;i%2CUByLD>p)YF`N}S9BZ}JXpUR$
zqIYSz?|a-hsKA#6)aai|7|MQr!c#xKi7*cfG+2v&J0wG|lU2r|C_NMv<_7?BBIRI~
z2Dl;nMS$Uy6!!YZV`CaDKeg4jM|N<*4ex4HMD0@9O=bhdaXatvX6j8`;njJ}_49^0
zP-kf9mLT8T7+8hhhuHdB_lAG0;hss#6D~W#fJS3X6cJ8%XO~B&Z%2(udFsD=r;HbV
zsjMzJ`-uQ5oW~_t&h+_gMFN?k`Q?_h^{JS5>G!O0yn!N6OSTSZbW}(@4Z|=~xqYMk
zxtgJ>As<I=-kd?bgXZ-A?)IDnU8LJb#Q1TnhDvA@>WdI#NZhg48XtaB>@VIf&=piL
zBu9SSZ-?ym0ws_soe?g*>$x4jWzphjq{o$@+@I{(lV1(blNms(vMUZ!(@X(b^Bw5Y
zCuAo5$LT5>*4lmLJ0rSu3vPMRg}F}gRtDUVCl?Y==q8w0=R_Mwd4+p4PO9va>11L7
zS+3a8&48GHS*~XiRU`WvU)64*GIutBKRow&JEptyANo{}=;xF_=-C8K-(Tu+;7OC{
z(g%=aO7#BO1<sP-!v2tKknY=c^{|p^d^<(s@dMqDOu`?8k;VWFYXE2j4s@Kw1uRrG
zr??D<tj;_^9r`9wzny1TRMgNf|4RjS@-)^Ie-lCm<Al}MvUuX!oxQG;t29ULwS&4L
z@9Z)l_S6~vooPI+_{nqz`j?$=!lGYSmwG|i$4i6CFE?~zo5D$nr8@7&zida6>(lFP
z7r#RI(ekUOo!WW*Ib8i)(sEP?`KJLqT=+ptWA?LODT65;9fSNP@2=#GaeSA2+{42B
z$Rk4YjCFhTwBQpZT!W2%<a!1hxtbq#mLFyRm2!E@#tb*srQ1#7k$PBFR?4g5GAXd1
zeSRPB>tqsDNfp1gf%(<XN!%oAFl0r%lsHF)Z?|ea((Mhab89KFu0&&YloeEcx17UK
zA_jzw9#Li}XQXhDufZj_Qn0K}foI9H(`C`2^Awpo?hWpCBpGF5k3`ZF59^<6AbzYu
z7iW<T#f$vuv<WnNbyR&u&`9Tq*-#@uDUnt2y&VmO-W4_(sRqsvV?&wiZ3NnFhE^!i
zY)cd^{^Typ4gClj2P;$C4>C+(@)j_Rh5I{1OX`s6_Xt@2BAR}`uZd0iO95BcUE%uE
zV%K>p2bG7qw}#>|(Pz{M`{*h-ESe?rAhBjDWEK<V!*>D*A*I}QJvJuuIaA2s-@GG^
zCW0puahn7%N&Yl?>1qVLOl!o{^Gq*FAn?F2rkLdIIVPSmg~)9bM?~!UCRMzR;el!=
zKJw^7>WVR&O4+Yid=XicueHYc9>nv98p3F<)<Z_1kf3koc`X)$e>0fAeI0*ak$XD!
zvel5^A+0Di_=c;ND#wP`uB_;pS|)UCD?9Z-Gt-1;2H_S26C~E%8X8|U_B(m2b#{?T
z71d4BDeFy~OJlm;xhJL?E!4_6e2atw+KVUVp(9{zUpT$)U|=ZMbTp_C)k;;qeo}A`
zp3#y8L-EMRqNy2?*#@*J%gKgk@%%DIzkXDp|A<6*ym0o|!uqsIFHx-Lc{1;oh>eq5
z^gz2_DVTn}=Jz^u80@en-BO8|cMeJP>*4um+U*PLRHQAb3eAT3GHrw06)cS)G?i|0
z4A&@TfhIVSi$NAQ5bDvOP~tk~*HK0suWkn|B@Pb!K1Dp<^PW8v&GbOZ;nM~qaiiF8
zP)|7Y_d)y68ha<XZ10RRUmF^wENaA;a#}+T-I3SgMl<ttqg7tzJa|yez;1kAEx3+}
z6eQ9v>-80e$B9TRukDw{A_GIOi;jaNX}=HmGA`dhOF<Qx<=lcr=9g1aj#bWC3${B;
zjnk=J9bXf)nn6Oi;qrSh;z1K-#je{{tU7I$2Qr%)pRz1#Boay2VAipn3q^&?IZ0cl
zXshwFiVmEArgi_8K{1NMnKXUiS#?*r5o}+IX~I>L@t%F;4=)H3Gf4J3)zVv6@JfcV
z?SXrv10~d|;f<Neh2XZ+9J-n@BxIoUH@abFevq=!FRzwV+vMcsxGV;`D~srrxU_fg
ztx7-YNyZeX7B$p>zq34P_~iapeB<J_M*#hsDc&D<?4vzE^2pto&*cmk0P-!%O7!Ik
zp-QHVJjfL{`Vzap{bDPxHovpAn*Ah#05YiXwavp2Th{DQyvtm^;As?_pLL|+D=Fc0
zWR~Cf?j<1$3CXkHl^>3%&}-TYc@yr!_q|u;u_~K&;-Z0l_T@?A4rpkGRV<a?El*9r
zr;H?qV17PHyI+&HKRO`-#mGuhM6SPTaT$<*s}KZHc_&udMCUD*HTa2k%ch#Phle~e
z4e%Ea<PV`(WyDmw6FKCikdT6?R(vj%vO3o{;Tirc@Ufva6U+hg{Z;6axO-c06EM)&
zq>~>#;ir#W-mhhT>Zg<ZR30bSrinN>TN_@2C#z3kR~ELFuh~|3&5I!-ue0mld!0k$
z^npV&U@$gi{LRXKO<={=oKR!)CR(HgU*=0dR{%uKFfu92z3T@HqR@WLJ2IodA{D;;
z0LW0{q#EC@%IO!AC>TmPMz^|v?*tTtR1boY8t4Hs>Y4fW%o9q^Msc@?e{;GzSVxbP
zV%J)^i`RIPad2`amHylg8kqjPJ#Y8wsCp$=3N489IleABr-K1&V&gR@ec3Kd(<PSR
ziY{Xf;hcBj@2KLmS%{pSdG<y&=T$@Nhi-ern4le2-c*p0CEX^=%AR7tPW78#Q_kEB
z(ZFisf!OjbkO3?Q^8KoyGDNJD$Rnr^q1;5<M3X_MUxL6!S-XBHL3@4`D*jO@wlU|r
zpPHB+9!wG9P*X{@W8J%HVkN4}O7JxUFc%tcjeL0c;=+WTY=^P5lISvb4!9phnh9Q&
z%P{)$@wO&wj~(`M(WCg~FU0Z(70YptfUy)E??Xan>&9l$qdi7NgVm|18pVK$qx5tW
z?4z<>US~eNVu=f69;zTVh>be-lj$n_@9Lp4y9u_)S7*!{&-$ZWQN@QU6p&9!(4R`=
zM0Tp6n6Zc18L_P~a+G&@y>D)An*5U2=B6<07^*PHj8Zvq&asTkh%x<|<lA%w=c4~P
zEgfHLCb{rb&e#SP5D0HIm{$M}H^OE3i%wxWB?}@g8fV1!v4-6qsq?Sp5Et%Ol$>q^
z;R?yYA#Ux}kU#AdXMJ9JzEd(51{EyI_D$Xz{qP5DbtCQ$h=d3n+<M<(q=%bucW&d8
zK|73kgMq!NS()M#&Ac~gHSg@7o46Kd;Rt2Gt>N$F;JC*UA?&7-v8*DE=?Y3Qsh9-?
zn<zs@E9_4ytKqSN+Bw%}Mrz11=njV`P80@6)H-ay-J^3lCPs$7AZ_ea=3hQ8++I@e
ze&L2V0Vca`^H4<qA6S=Ju&&udgq%OekzT`GB4Yql8CH<y!4NFnvMdlYaaf`4z+LXe
zgWjbtatUgKL#mC~4Xn5pCQYJR-^_@}BA6?n_xP0r3Rv*<Z4ay*an*dzK#Q+AFPo7~
z&NSWe4|-i@wpz%o&f_tBKpK=*9O}|?TCA(gQOK#V>Q=xZ_ns}BGX7%NBpl&7<Hj+J
z-_RPA`0Zx+C}&xxgfkms!ddnB__#$rR6JT!JIx8hM4Dhw$%KfyaR(3G;(Y?2Z`r_X
zbg0*C^duo1h&jUPNm^7K=~HgZ?|j!(t=ln5BniQiKVQE+tj`}5t1W%$8z0v+(8%!e
zCH<4&+<!M}Ink9PcM6bOJ|+gOJoU=+KC?!;TKE_r65eE8`*MfB<J=C3wb^FxEp*#>
zfF0+Y(0Dd_o_CpD%MpSUw)l`CzCg0zh27>ABtx^bl^PeI?9Bv~kv{!0KEX3Mb9qzA
z#qV$t2iE@aba|OJ+97-H64dsR08c(-$rT1GJTM*X06~KJvw|JUH>9*Zi0|c0qsk|y
zmMX?1ZTY6U^5$c@^jk-Cmsxv)N%Sl=zE9?IMcde$@Qq@qaJrljDF(%&pz(9Hk(XNs
zHfGX3J<GYw7I9Gs5?d)TvMVajv&YYLMzbW5-`#z=Pu6`~21O_=YRN02r&DfolJHS}
zn?YKA?o!B3yi~)*t{|iz)LQXRBKf)-E1Ag93S{xv-}<3Dfsmm>$1+#icT!(;Su$Xy
z*YW(EqIpLpoAw5=yKuuzf8`syPJCpLW}^$Q%4|_=#407iyTLS}p#a%~VTAU<%!mRb
zvZETPn~{YoNW5v%#a=UW7PF_f&PUWV>vkK#7>^+dZ)IpmknA=0cK4+g=03>r?+N*;
z;%+&IxXwinZrdH+Q$Fx{m^!@lFvCZ9v!Dr!k6+Uft~L${%1b`<SBR!guq^VPkni_)
zSULCBzS|hUr64Tr;SR<u>7MaS4R(!^>34g15`M<HX}%9iTKK?UGetEJghRgY$IIi9
z#Fbn;6ayBnojim{MWuBLS;^V?!)&G4(Mc$VllZJvhDy`<S3cf3#q2r*-2G|ykQuLe
z1kJ~kMIUsI#+$q`T%zj7Au#FZJgGcl6wLUtw`96qVwSRsI)<6Omyr&fC%BHo9T$iz
zA%D%*r>d`7?s|hKbHdn!w*t7ds9+%sBJ2v>il#i%QWX97XbXy~<Ru3Y`%*Ki%U$x_
zOf-K_rIxif8nM>Mr7(lj6r8~@wsNpW>K`J&q}wy{a9O`u7C&w{Uzm)cx~$&rO2rJW
zA0O0c+KA0>313{~Kfj_2F+0DU%%1XQ4$16eM1qOzW;Py81omE4)mTr1FZ7Bj|Mntd
zl<AgUKNVPO`e~^tbmCzj$dRC1WV}nofyF`$9OM~REB9`c-#8s@0H<OiXc7G|!r?)u
zmR@j}SrlMrq|c*VtpEK{FPcihON4j}+}W8=eV2Ab_ff)rI<ctQyW^`5`LEyV{jt=L
zDQt8>`WParu*~{;d*)iMw&k<LXN8BN`~<!mfy}xc!!bR4&^W1h-uB|w(H|A23VI=4
zJ}NUKWlG1!ac0@y?yO_#?$y!V-(7zzY-Rp+v?mgTk(wlPW+oF3SL^c0r|cO9eZ+~i
ziCW|n#)!4EE3m`SAYCHcn6_r8`*G|%hC8)4*|mtd)f(*Ic)SoQrK~K-GjadfNakxK
zEzFja^6+hvZt~`){Sr@RZA!oEwpnvJlg^WwQ4Lfdr}65unnFk7T|@@c`U}L)V<lVu
z=(=w+surFU56ANM@aNgH+Tb2vH8+x*h1q$jM&h8zL?Ia;CgyF?-puEOYNOymmf2!9
zY#h)G1qe=>a|oQepwO3zEzSDkf-o4mnOys51@Ry>(fu@Oi5K`qoHQ@&x38vtr+5Du
zr+xE6fDS9GlIx@1%Vfp8gy01Ib*|tH3?dAemtkNZCzfr6>~sfX4*U{Z>nSB+sN_A<
zOkt-H=m+<E>#glAu2-b(9A&LsIi%M~r7FK{7WTeO6^2<13Fr&moGRaH_MLgmnNCBb
zPU#zOx(Eg}uk}H)C|CW;u45z!BOFEC#i68B-uJfl5p_TyhL~AHM<uAojEP@rV{~*$
zqnb8CET10|it)pnEa4=@lr_TZFAT_OO8|p2?`C}YyQgpM)@I_ggmj~mX=37b-1a-W
zq_PQAW*oeUa?@v+0PyjULZRD`9<%_`wT2EGX~_T~8fO%4LuAX_>;EA;|FlWDC=T9W
zSd<zZAv>fFZ4&`YAq0^d|B8Iv*myl!rUtz!a&q<*;I5z*cGueVZugcTG|7;%o+Sl?
zi&kXPbb2ynl7G3!S@_$l_TaM5cRb5n$(FVCmsrhj-Fj?N!rHFnF}YH8%=O*3$0>P`
zq>>UjQBKEOIhBkjah6p#@TDr{n^$p+0yPA=F*)dPN+aw8$?TQtG<0(eMk?M(qUMGQ
ztLE#yVSK2w{aih$_}Ya4QkXsD4U5%Q4$pa?qW(c`4k4cd0(L;$DP!5q%w!#wtJ;DS
z!+<r)Z!P=u5M#iwsGh;RMZwOc0SBTJkc!n+4j-Ihf|S*0?jdkj6IND+QW$<SSuJd^
zJz<|H>>wPrAsk9QB%%svod*loiq4nBfu&6a+uFa8{FIqWv$3v9R{%=|eL#ySuD-jS
z9qsORY<RAB-9NbXh^2(LJT~{PrQbSLpV%O@RSHlLE+LJpSO9X^dR_R+M8I6iH#F~y
zamyAMUT@w@%N+7E7ZD<^uhb|!(Nf(cP)``3Mb$k5ENefi;&>SJu|Cz@<;~3@8ggo`
zKeFC*i4qbPfpnZ_WgHr@hFOc1`B!3WwJJ|PKTS+@Z)Gi9ceCg3otoE@T95l#{DCMv
zW|D8B-j|vA_+32w!sdWyL}8{OnSf2PQ5!v-at7`Iqo=rwqN-Q={&d^ed0$dhdeShC
zS&h_OY_Yl7+_kd-Y5a8Q)aBdo-s&Fv{iI@7a&!SMiy%tYE53+nQ-q1`G|F7b(v@(Z
z%RG0eA46l>8N+%EHj>O=3`+tsqW8JN!`~qDnI{jx5napTNxpk905jeGo}wG;0Yw#M
zPGXI_pZP({m}I1^jPi?JiE!n*rz$HzcL|C<OPe_y-{+xr*EMD*&P%7ZqE7`V{#Q9!
zF}@@C_1)@9IEXw8!4FT5sNcaEjx^Ip7i?6FeK4s97#yR?$~YG;>d56ze0`zY&{a5g
zfh+F)aNR9fC4LAtNs?!PU1wt%dbN+{I>0;(+qWL}V&*uvK0D~It_jNy_5~?b?_hG_
z=`S)+HpgnB-nnf!iF3gCPAxWB#hkn6A>~`UHT0g|IJZ`=9E%X||9F1PD<_UlS>fKm
zBYvMuF}tMKV7g2MQ5cT>oL#mC@1UO_sg%E(Il&!Bnz(p;*p!E|S2yw(P22^5OGDE`
zl<%sCt@?4)+mxI-`IMO;RWkega5HBE_1kq12`%<%<Sir6vf0vZYW9R#rQ&;d_&##Y
z89eVJr0&;&gp5XPnAh5wvmHuNWtop3NPHC<HI()c@T>_BCA10r<^~N46t9?K6&JY?
z&BbAiQYS>}5Q4GhS{<dL%D_8>?q=*{9HhisLLWGS^Nr_%*KDnmE2~06=QHc{Qq&y)
z$QP6d-@gnaxjv$hnCM8-HTIK|Chkz)Kw<Z!WB(d~rQnxgp?pj}Q!S&-XVhd9d2S`Z
zqNE%7aoRFx%gK3k2VHSVIi_cY+B1L5NS5!+mav(=UKAPu_c=#Le%8z-*mq`)B|q2B
z$AI0ZR-IY>PBCVl^pgcvSR4x|aM`s~JlX#D;bcl6#(VX*>zX$7eZK2cLrFoMtlB;j
z9%}SJ30Z+X-okF2lUJS%J#a)5O_bDTmRJzS=g5xUmD5bG8UH3q&7!e-;aJ2`=A(Z5
z_UImKgqeTNRXznr6vh|q4(3C8XWIHihY|83?$q4DJjr`!)WSVys4$2KyMQm^R!Lu3
zmA9`uR3a)j^xHiNy)`G3S3S;}6@&AmbAt+BlkKmR7G1mMXkpP7N#Sq0?|{oa{U`4x
zVz<GP@af(UsIM0(6okGPSrM^m>-Abr!A6_EV%GDKrrDY}9J?3>kd(xVb4WvLsI2)r
zeI`=U8m#3h$8I>QhNTQhTY0ih&6&}3lRW~fKIn`y;M`M#<4T0N>G*Gk8ZCUTI>}F^
ze3w>Fn~HDn93Vq^sGlJGCE7Ufc!Vso=&%?K<$;8#!)f$(ud4DicVd!D6X8Aeze93R
zpN+IQ=*l|<c%eqsNLTOP{dx%LM&w$unY_na5tyDniQ+_JOykTTz^dP6=o9Gxy3DzY
zHI*nyj~zz(gQb7lI=eaGv+9brg$RS|TfdPqB#%FxLDya%r+>|aXJy?YhXKn{X+YtV
z9A9_QlGnkiJfE2dFJ5dB`#jA+&E`UZ!P)R&{(RU@vQp#wP~aJtu+-j|Y)ba!a_a4$
zfR3jCj_^9tVNKJ5z!(DXIgY+KAqtamXL3HlA*qq$V+*;#uDGdwd*WOx@tEO3%ct`1
zpGLlJhMUg50fD8Ew@PTszlJibHE+vagcR~Awm;)D?g9X}Rp)fw@M!mKAAG3rh5y0_
z57UdCu_VMXNLOqT*!t%Lm4KwXx%J*G?{$j)IDbu<YXTndRAA6@=VzTm=dO9^eR`b8
zDJhZ6@Q?kXadR@yZE5Z;7=IZLT;-$p>BklEj=Uxq;=}cv)5L?nr4zp-jWg#<_T#eS
z=wCg@pbK8k#<86_sn3L{k}*3CI<_(KdD^Xkryfw*5T8G{?s9V_l{!Yjonm~)0Ef+&
zwH>I$2sYkm8uy{F6ALXDo2Yw6la>}>`)2c3-qw^ZJ9xDeF9k&I6owfT)m0!*<81ly
z4zoJk&l9AP(HCFAw_pyAo7~RKUUN=R<$nd4H~P@hHMN%a({nPlq1#n43hfA5m{9@q
zLzydZ^|Gw5#+%UgMTE)a$JN@~WeX9T&>{x~<^`&EzdUSc%y)FVj5H?3&k6+?w;B1(
zS7Ua|%j&O;C252>#o=HYd786y@?OwM;<$hQyQ7pJY23x?#Jj`XNML<v9rSfC_Di`E
zS5PE{t&exFhA(^bLx}GuzVhdZWo5u*73C_sjXs}(%ZRogyc}_;q#tNgxx`3KRg*P>
z_qv~6y5{M-nhB*8|7}yhb$p=2eQ)xd*=pNfk!<0WN%En}DsTsfFj=898<H?2trJMm
zm}<vYtO9TmdQCXSSEfZNs*jge9I&UeQ8z2}u+`mH3v-Kge@|E~El!1amHCp}bnV_q
z%|+9~#Eq1iK01D?A6Z605@seBS1Iqh-jf;?YInL--8El+ytzvY_Rm=NiaHw`Tb}Dl
zqUu*7(m~n^_>O8Re?b*Fq8$j=I_RZn&r20e@WwwPo__FzpQn+)BwqDpN6`D_xx208
zaNXdP|Jd8pw354VM=N2OMW{!!-(Jg!97I+e#N6zcsda0#*M#&|DQfdA0iWQ~v5xFR
z$8cE`Be(M5;lc(Wr2d4BBdQXCeNCGg?M!S|{>av2(Pfo3I4|446OHsAjUm2gjkq5L
zP?`ZMQs2>&c+^zLqs~&NFqDkyt@-9QZozmi@OtMu%S9{%jANH*XEu86pG2334Am%|
zhGJlJQbrKrTCi%}zpxxhwiJEjeeI>;B>qJ7C}vuOKGqXU3tuNIdtt8>wtv9<xjqEH
ze-xR`O7V=+J;EMTj}as-%VLnmljRWmb#wH(|GIEhyCfc1ePLCaz`VTDF0G8ef3Wr+
z&U{YlKb&stv_nsCHb*OLP9BzRUx;yz$%Xbqf_aOn!C-T4>DF359xQ<E5c-t-oBS?%
zM>*uHu>=goov+i|9H?75KP-j`=h)8WIJK(3O=0K5G;-61>EypBFxRB4(2B_qU`WVR
z$}h;AXhAmCj&ggSLN@m?N8O|Y@K&ZBw__O`gg&ncounwY2Y8+7%7rNY^aeiuXXB&m
zDne)_#AFkOx&q0W3GrzKUcPt$BqN{{Lw&DzGj+soKJDw2lnVhp#ycen_rdX>WiKMr
z%yYPi25NN})ETmPBch9TM|1)M5(d3~*9DAaPPpO`d>!Ty#x)-)o&yi$K|{GcDp($_
z^MnjAUM>upZ|?9q>(T+rY%PODx|I@aSTZ02BDD&_E!<%h3sGjS$UhTzz!LE3E7vK$
zT|bU?sABXK@T~&aOcc1#Z0^5R-Z{vQLaBqQ@kZI(MFL$+io^bXW-^nTcA*A2{?{5r
zz@oMC`jwJw5O%<_nATs9FHeQMoZo^pVq#{j?Y7B9wE^Yuk?!`NiWY0!!v@Mi^W5)_
zImx!wVVvjC(YX?%@OQdTioE`P{nj${tl}9#q5rn(=a}fy^Tvg*dAJG5RLB5IAjIMw
zV>Ty}phbGc1*|FVRZ`jveh3|T3rBb4gv`}vVEU<9y`q>7Q+mYuC+^3Nr*=#qKvVNO
zJJ-vOU#pSA`gEPNAyqt`LW`#Z0AEae#%)UD8KXN?rhoxU2Ch$42zc&KHlKH1KV1w@
z;H`?V$b)qo>;=yzN7gioX;H;8-?sTi`fW8jq|8Lus<f_3aPy|-W+)}tDCGn1p#?dx
zry&s1CZk*lO{Gk-ydnI&C)nWn`sTF>t7z4G@)yl<DQSzWZ0(;XRKmAq$@ecJ1+GAG
zKg$2eK)FU)UMcuI^VeBLp~d-2@?Qzdz4==(^nHiuz|iNyVG~ayy3E`<W}){*y_vL_
zn4`)=(-A6FX6b`GB@2Qb7`Rta)f}S;06c_|DxrPei8<JD@qb9dP~iKGHPUEWp6)^n
z-j@qio!-<w{0EtrfI@zyq_M-3n~P=!on8_-X68&#?KZ(OTgdpw{voc6;lCN)Xi_aK
z6b#=D%nifI6qlyM4>5=)^qnoIy@HLrN2DJ=Q&D5c@M3hILBUV47Y-GJ{0E>cu|}KI
zs&Dn!ZfWbKxoM%4r48meU4Jr!9v7YNn}x*erb`{Jm%alNW2<wj*-5s$b4x8FhEq6Y
z27{v<;ylCzow%t8;dx2ly$6=DJjW;hjCk3t+@3d4B!*ze-cvyOJ>tQPJXGl2yAA6t
zuQ@}2ZZSD`6>#igy1$D|tEM9ZzB26tix>?uPnI7(b8!u4TlqH<)^}*s`EUA^L`fM0
zmvr|$U0#3Imp)0K{~OKEYjSz8m-}y&HZFSWF91Y?`ddD|otPFcEhT)3bvO}Lvs|y6
z^NeZls)3n%o&^#De>?=F={^pQey-;3+A!AI-0$!(q>&BO_JsXsq>NQSE|V~Lz_{yl
zuJy8tBR+Sh?IeJmqTQc9+|<{%;j=OVp-uzB+n28@<74MOh$3&idCj}Ir2x)+E<+)r
zjZxu4tjJ2G_WKiPTIXYicJl5M_YfuuDg-i@;{E6r()4t2h8mkC$UmRT`UXz1;xV=>
zBe|<E?rOh3@{Oh|2g>I7^k>V@FYb{)_b_-x9a#*Dq~zJ8TP9Fat<Ovt<(;LJc;YEf
zJu#zE?!=%W=U&TCw@AL-1TWGXLjgWlpJ<f$5DT(WmxR=)#uzCrqC1{dII=cT@jW?H
z?o(ztRD*Cjls3-)LA781?MrNGAT)&JjgN6X<Bda?6p3FqHfAn4Pe`(2+F;&1yp|mT
z2!!?Szuv+hUul)!RY?A)Ws)+$+~D3C%Tg=gK9bD~Q;flDPCF(F(eahT<9IxADe)wa
zp4}VxMhaQs-8{KEgZPn(_&t>{KSND%NJay~#{D=n{31wVSPoww5HDoRXJH0g219#l
zv$Uf#BG9~N0VNb#lw`%Wbab;sX}v%b$*8*U`_b`3_%ZTCC!)sd#y&A+2nVeB>;Wo4
zIZkDn&f|4{w}iR&YvANFWE^18w5wt+BIP-sLRT>CcPrOzC)8B7CGt~yykGFza1YB1
z^CZ4W2TW~3qGIYmLU~pHt!SkG?aBPhq4P_9(pBR#kJ3BwJ9balPM$4UL>Z((Zd05q
z0i`T@yIH&i2t$%;ISUTQy!yOEw$?S&`vob7+b_Kh@cai0#UB+;x$AtQ?s9e$qlVs_
znfWU5^+k%jUg9|_$QZ?6K9j)%tYNMwze}RX@KV9qd*eBKI}<gvQ4)Ql_uOCbkwZ$=
zMc=P>?<I{ElfXuSA&oO6{at;*r#>FCy`NMSU!t6N@ulOODwXhHaj7+aop=Fa0;b;9
z+`8x6?;%AGP9f)oE4+l6D^7>C+zrN~TMuRd8r+rc2bT?n2p1#YFMMdQq6o+Z`Z+^H
z=p*y0yre|$Ex*nfJg-e3HJZ_zD@iW@kuA=SWbX%6>QMOPq&5_#T`fEWEa$>}Yd<iq
zt+nS(X#*n>4|>_YlZtw$zTa2TE$x?cMz<WHE7M58kp;|-0;yay!Yy&*gU@A>Ny*Vr
zQySc*cp<WX_0ttq8x6g+U1uJ{A$=o-=@v%))ri_=ybYKeW{ngu=PiM=&F?WyM3#?z
zaP3<Hyxf4DqQ)!)?8Ym5eS3aW*<Lwc{G93ez^O#ug2iUF)Nrh|n9Mz!!qpI~HI}lw
z=M2kI2}R@5`&g=OQejKocz3;T_j0b+zVCCGGkmlk{tR=$0XVNmRaE;Q{7*_GrNrr=
zK9!lk(@`Sx`e81<a0+ZpN$>imyW5=86_;l-thg*eR>UB={+`qFjURlhtZuVTU6cdv
z)%E2f&|2edHPvhmeF&zB@K+WCYRWz5zDz<>*@`(ZC|wd980p=Pj*v~$_F3mji$40p
z@di~9jAxmyxHQwp!I+3}flf8xFwgfeoW51jy|LJoHq1F>TNsn$<<rWaVFVN_qtE?S
z;9@>N(Ri6Q!|D7DURXN)xq|mt!G%R75@@Scc)dJOo?7A6<T}i-N6%A?)q`9K9W(#f
zNC4n$U@AlVM&d7j6A=BoD38th^=i`Rr!{|iLrPYJ-92`wy*7G}Bv7dt2@0JUrXC79
zj7>UMgohWC{Lx0nd!E~Eb8Bj`M`0EEP<)FMCQ54YK*_JF>D;0X?5*)pZ=Ca-S0Tkz
zDi$hw%FzIu#_EhM=vpKt&LAhWDPhk*<Z9mDETO&0OZO9^Z2G!7(-eQ;lYiSHNZQ-y
z*G7~i2N{~#H^3-%WzV)%kjKCnb&Pj3fgwV!n=ekEjl$`^INNSM+8}=@HER5WiNyg0
z1g;?&tnI6u@A{$k+_V^a1$v(+!i<&P#bBh2cHIVg+Mrih51JNF7PTs7ZzT}9P*VOU
zYOuE+c9#2mUw44S6aG0lhl3$B5T@~K=CXs$8o@=xZsLbh*Fc^CpK(n`1?la&Hc6vf
z#Y#TS_vy`l8rPzbks}<OLmz<!P|K$HM;KlSaD3+{JT>su^ikOvoP;8tb*2u}rGVvW
zm@Cj2MkA?E%|qbF410k%XZ7-IN4Tqx9_>+27$aIs(3<|+qTT4;ri`x_r|fEcs}^ON
zvj`Y;^3*>YP15wHtKI^kUW|0@8{C&2Ux5r9K@WVhs95sMvsfcM?hc=*Z|qbq{SFfD
z_cqkBWYCLC9)rA29;`wf7DUqK|J~T&5n=aa1wEFKYAIAg@g`R^)-b#uxKNwtX!M(E
zH^8rYMqeZ|*vafqg$!~%0qMY|%qQxK;A{fJGsIm$jP4Qukc8RF?ISMnr3dU=fkS-G
z|L8du^plB|vunF%ZQKX$>M6x~0Zbze53)|yLkKlP{HGdczcJ%38t$xk$*5p|{L&9(
z<?0lmK#qUfd}4d<hU84&hq$m9{e;r$OU22pN6m_{GT;2-3Dcc|N-f@d;oa|Ob$aQ8
zHUK>!3o|n`EEgTH{T1r+er>n_a+M#5<3!sqTCNB854uh=y<NgDZ9=cUKAY4Jh8L<>
z{HoopoyOggX56D^E5;&jAcz=^1(2Ul2JVghWxuO=mo{j3X7vFr`|l5Vk?q;&5Yz~M
zsrFJ$O4#ooIgOow2nZyJ*};&!3=@b{4V80pc8g-q5@<(ZYW$ZI(CO)Fx*0CB*Itwf
zcXC|SGEFdy<k#J|1LE-`Ww=SR(NxA#3b6mFS>#mhs0CBvyw6TYg|iTNIkFwVikEjH
zqQ81vZsFr4aQtzu5^0|Sct%(J$LY%TZ-s@UDmomBNRf;*v1q56mVz`m3>g2|UmgY2
zPqRCdHsuO(V*x)|+9J2eMS<v)k3Mf#owC{<0{)d-HZ5i}Z-UiG1J55mmBK8_MzI4!
z(FATtA^<-pemoPaJids(pUdZfJcjUa3Ekgd9NSf;t=FXvS)vbthDKkc^gc%}v%Gew
zlmdO-H#Kofoi5Y*KklD^nIDUr?yeVJ>+<%*@{q(m!g(QiP;Ow%%Avv;LM(eUlwC7p
z_eaq{{@n1J%hUS?r?JGz2z!j`FHwL%;82SN)?uK5U?_2>H69>mVOl!-4td{;^`W<C
z9e;zZhNX!!<)(jV^C_HKYX6P9Gjkjb-a{`VCQIs1O_GY<hNmu7r9cB5Nb6r|``)sf
zN1gbDb^P0Z3}6?zTx$$98~VwZeD5<6gPbbC{WyJTIk;<`_BGDAY!wsO-J*o?WHFZ#
zVgc&lCNE$b!jUdeC~<!q-ou(x&D(I4kSv0VgSfU0c>f(}<lsD^-R)_ECe8zoTL>~(
zQ4Vvy+%Zj0`2`&mNG{6WaPZmAv%&bW+v`S`paOsT3)u|wpWWrRhe)MXI_ca7AB@p#
z%MgAR=}M_tAvi|h!Nuy0adpkpQWRs|>6T+M4-G(ZPFwAs9X0NQ5{BO3ZFSS5h0(}j
z!Pe^=$hcXUpIaGvg$eaSrdCDIh8x%<&9n8IG*oHsV4=}BeA+3<ot)PX!RACCwq~Q)
zwrUmvhT>K1PjYkM1sj)>@9=M{2>B4s6ZN6vv|(-TBQPq-GOzrQ#2V7ic(5}GWtIwE
z8rqeTKw$4X^!A2tMV}lJF;BFB=n`fnoy`r%RAD8SFG?Tut>t|#p!94O%f$r>7f5JZ
zB;i;D9?WFYe9IW#M;osVOIbN8pz^5vT!#`x>&-ZFc;(kS9WSO4{fi<<PPf-I$LpGT
zExqi(f&&}6YhIi=?pYj++^Gjo^k6qFP3M9;1!Cjl4L{VFuNFO|Ai+n5i$c@$w}8;^
z=TGZDqJ<7HuZW94qUR5b76F1%#1&F@e~tRFJn{B;=@~97>!A$<4jR9@Qq}XlIPhvY
z9eyoS+y3%>xmTHd)lmm6kXqlPFFDUj1YS=o^T#DCFQq462gI{>XTgtP;W)#Zq<@P1
z+uWoMSAf-D@_%+>gFzB_)_xyG@UF@x!9F(S>k5(ZAm&(5OIfV35+(_iVwXIg#t%(~
zA62`aV9-uMNix=Bvv1hxB)43bv|Bzil7{VpbkoT$0X|KU^UG5K8@!6m!Gu36wCAo=
zpVTKSlFfK*J#46lm*fMD@A~~6D3$occ}{)|5t>*<B8<pvJ~)CEIk9hu@Wb^q6d$As
zDsuHHpJ~DY(Xrqh=a#_$H&fvcjCzR571(%L-xrfiVmrY)r%&=_Z<KBpzdl<$WoB(X
z8DnuHl5bvC^gGKyS#ZEucx;rTDXGDv-=}eA7XZ$ALc}}_a+e4Yq((nXtf;906ZqeY
z1`jS^ho;x>H0AOalKGeI^+1g5m*2B<lE6(_kMv`PO#@kj7L;xrWi3kSwmdwJMsL&%
zU!unxPz#73Ov-k<-U&h_y~Tj7fsux_pM7e5P9p{RfP(Sc8)FoCdlbH241g?Yf)Z*?
znEh2guv05|--hx^7T4w+ii^8n)@lu%55va7&KCxl8y1vh&28f!@2*MLreeybpl7of
zDeir#s0hI4wUtt*p&1+<Lmn*^<0<7tUssB~DF{PrwpUi27wiptbYN9EG<Bf;VvEu?
zQl4O}Cf(?InhEwZ-YU>+bI^xpR`btuZ5R4j#shPVIDm~14mtIZrt))8cydM^iVUDv
z_wxs>YSiJ@Ly7#UTjc1GX)1bq=#I%lNWML7HYJ)yBxf7rJMZ859;_4WUik+sfj}TC
zX9-PbQzK_HUK2+%-~+_U%)-sc%*x2hqRPz8%gn>e&d&Jn3p1F~V)p-ez}C*x%G~|`
z|A4#uh9&R-{=a`vu`_peGjcQoiJ00Mn~}=c8d;brn;Dt9JB*m|13v=ENGOO`i5h<Y
EAJ6@~0{{R3

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/autograph/impl/config.py b/tensorflow/contrib/autograph/impl/config.py
index 2600088595a127..878bb7e12f2b39 100644
--- a/tensorflow/contrib/autograph/impl/config.py
+++ b/tensorflow/contrib/autograph/impl/config.py
@@ -33,7 +33,7 @@
     (utils.__name__,),
 
     # All of tensorflow's subpackages. Unlike the root tf module, they don't
-    # have well-known names. Not refering to the module directly to avoid
+    # have well-known names. Not referring to the module directly to avoid
     # circular imports.
     (
         utils.__name__[:-len('.contrib.autograph.utils')],),
diff --git a/tensorflow/contrib/autograph/operators/control_flow.py b/tensorflow/contrib/autograph/operators/control_flow.py
index 9f7202821f0d0b..671c9ccc13eaa8 100644
--- a/tensorflow/contrib/autograph/operators/control_flow.py
+++ b/tensorflow/contrib/autograph/operators/control_flow.py
@@ -174,7 +174,7 @@ def while_stmt(test, body, init_state, extra_deps, opts=None):
     Tuple containing the final state.
   """
   # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch.
-  # That could be somethins as simple as a collection of dispatch rules, with
+  # That could be something as simple as a collection of dispatch rules, with
   # some prioritization.
   if any(tensor_util.is_tensor(v) for v in init_state + extra_deps):
     return _tf_while_stmt(test, body, init_state, opts)
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index c492ef19f150e9..5dd2e0c7f254f3 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -371,7 +371,7 @@ def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode):
     Returns:
       a dictionary of prediction results -
         ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS,
-        NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED.
+        NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPTED.
     """
     ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle,
                                                       ensemble_stamp)
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 44e39f7f7b5da8..0708d6b7b9f0ba 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -172,19 +172,20 @@ if (tensorflow_OPTIMIZE_FOR_NATIVE_ARCH)
   endif()
 endif()
 
+include(CheckCXXCompilerFlag)
+
+# OpenMP Support
+CHECK_CXX_COMPILER_FLAG("-fopenmp" GCC_OPENMP_SUPPORT)
+if (GCC_OPENMP_SUPPORT)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+endif()
+CHECK_CXX_COMPILER_FLAG("/openmp" MSVC_OPENMP_SUPPORT)
+if (MSVC_OPENMP_SUPPORT)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp")
+endif()
+
 # MSVC SIMD instructions
 if (tensorflow_WIN_CPU_SIMD_OPTIONS)
-  include(CheckCXXCompilerFlag)
-  if (tensorflow_ENABLE_MKL_SUPPORT)
-    add_definitions(-DINTEL_MKL -DEIGEN_USE_VML)
-    if (NOT tensorflow_ENABLE_MKLDNN_SUPPORT)
-      add_definitions(-DINTEL_MKL_ML)
-    endif()
-  endif()
-  CHECK_CXX_COMPILER_FLAG("-fopenmp" COMPILER_OPT_OPENMP_SUPPORT)
-  if (COMPILER_OPT_OPENMP_SUPPORT)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
-  endif()
   if (WIN32)
     CHECK_CXX_COMPILER_FLAG(${tensorflow_WIN_CPU_SIMD_OPTIONS} COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED)
     if(COMPILER_OPT_WIN_CPU_SIMD_SUPPORTED)
@@ -323,10 +324,13 @@ if(HAIKU)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES network)
 endif()
 
+# MKL Support
 if (tensorflow_ENABLE_MKL_SUPPORT)
+  add_definitions(-DINTEL_MKL -DEIGEN_USE_VML)
   if (WIN32)
     find_path(MKL_HOME_PLATFORM mkl
       PATHS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../
+      $ENV{MKLROOT} $ENV{MKLROOT}/../ $ENV{MKLROOT}/../../
       PATH_SUFFIXES windows)
     set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include)
     set(MKL_LINK_DIRS
@@ -345,6 +349,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT)
     # Fix me: complete the path on linux
     find_path(MKL_HOME_PLATFORM mkl
       HINTS ${MKL_HOME} ${MKL_HOME}/../ ${MKL_HOME}/../../
+      $ENV{MKLROOT} $ENV{MKLROOT}/../ $ENV{MKLROOT}/../../
       PATH_SUFFIXES linux)
     set(MKL_INCLUDE_DIRS ${MKL_HOME_PLATFORM}/mkl/include)
     set(MKL_LINK_DIRS) # incompleted
@@ -357,6 +362,8 @@ if (tensorflow_ENABLE_MKL_SUPPORT)
     list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES})
     list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn)
     include_directories(${mkldnn_INCLUDE_DIRS})
+  else (tensorflow_ENABLE_MKLDNN_SUPPORT)
+    add_definitions(-DINTEL_MKL_ML)
   endif()
 endif (tensorflow_ENABLE_MKL_SUPPORT)
 
diff --git a/tensorflow/contrib/cmake/external/zlib.cmake b/tensorflow/contrib/cmake/external/zlib.cmake
index 116d42309394b9..8942f3eecf07ff 100644
--- a/tensorflow/contrib/cmake/external/zlib.cmake
+++ b/tensorflow/contrib/cmake/external/zlib.cmake
@@ -31,7 +31,8 @@ else (systemlib_ZLIB)
   set(ZLIB_URL https://github.com/madler/zlib)
   set(ZLIB_BUILD ${CMAKE_CURRENT_BINARY_DIR}/zlib/src/zlib)
   set(ZLIB_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/zlib/install)
-  set(ZLIB_TAG 50893291621658f355bc5b4d450a8d06a563053d)
+  # Match zlib version in tensorflow/workspace.bzl
+  set(ZLIB_TAG v1.2.11)
 
   if(WIN32)
     if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index 92f2ab6dea8e7d..5942ff3363a96d 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -267,6 +267,8 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/variable_scope_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/functional_ops_test.py"
       "${tensorflow_source_dir}/tensorflow/python/kernel_tests/py_func_test.py"
+      # Flaky on Windows cpu with py36 (b/73556968)
+      "${tensorflow_source_dir}/tensorflow/python/kernel_tests/sparse_reshape_op_test.py"
       # Windows file management related issues.
       "${tensorflow_source_dir}/tensorflow/python/training/evaluation_test.py"
       # training tests
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index d269b5b69a1304..c483a437694b8a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -355,11 +355,15 @@ py_test(
     deps = [
         "//tensorflow/contrib/data/python/ops:resampling",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:util",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py
index 5f47dcb3399911..bdc003a8a5bd64 100644
--- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py
@@ -18,6 +18,9 @@
 from __future__ import print_function
 
 import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import time
+from absl.testing import parameterized
 
 from tensorflow.contrib.data.python.ops import resampling
 from tensorflow.python.data.ops import dataset_ops
@@ -30,52 +33,98 @@
 from tensorflow.python.util import compat
 
 
-class ResampleTest(test.TestCase):
+def _time_resampling(
+    test_obj, data_np, target_dist, init_dist, num_to_sample):
+  dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()
 
-  def testInitialKnownDistribution(self):
-    self._testDistribution(initial_known=True)
+  # Reshape distribution via rejection sampling.
+  dataset = dataset.apply(
+      resampling.rejection_resample(
+          class_func=lambda x: x,
+          target_dist=target_dist,
+          initial_dist=init_dist,
+          seed=142))
 
-  def testInitialNotKnownDistribution(self):
-    self._testDistribution(initial_known=False)
+  get_next = dataset.make_one_shot_iterator().get_next()
 
-  def _testDistribution(self, initial_known):
+  with test_obj.test_session() as sess:
+    start_time = time.time()
+    for _ in xrange(num_to_sample):
+      sess.run(get_next)
+    end_time = time.time()
+
+  return end_time - start_time
+
+
+class ResampleTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("InitialDistributionKnown", True),
+      ("InitialDistributionUnknown", False))
+  def testDistribution(self, initial_known):
     classes = np.random.randint(5, size=(20000,))  # Uniformly sampled
     target_dist = [0.9, 0.05, 0.05, 0.0, 0.0]
     initial_dist = [0.2] * 5 if initial_known else None
-    iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle(
-        200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply(
-            resampling.rejection_resample(
-                target_dist=target_dist,
-                initial_dist=initial_dist,
-                class_func=lambda c, _: c,
-                seed=27)).make_one_shot_iterator())
-    get_next = iterator.get_next()
+    classes = math_ops.to_int64(classes)  # needed for Windows build.
+    dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle(
+        200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat()
+
+    get_next = dataset.apply(
+        resampling.rejection_resample(
+            target_dist=target_dist,
+            initial_dist=initial_dist,
+            class_func=lambda c, _: c,
+            seed=27)).make_one_shot_iterator().get_next()
 
     with self.test_session() as sess:
       returned = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          returned.append(sess.run(get_next))
+      while len(returned) < 4000:
+        returned.append(sess.run(get_next))
 
     returned_classes, returned_classes_and_data = zip(*returned)
     _, returned_data = zip(*returned_classes_and_data)
     self.assertAllEqual([compat.as_bytes(str(c))
                          for c in returned_classes], returned_data)
     total_returned = len(returned_classes)
-    # Subsampling rejects a large percentage of the initial data in
-    # this case.
-    self.assertGreater(total_returned, 20000 * 0.2)
     class_counts = np.array([
         len([True for v in returned_classes if v == c])
         for c in range(5)])
     returned_dist = class_counts / total_returned
     self.assertAllClose(target_dist, returned_dist, atol=1e-2)
 
+  @parameterized.named_parameters(
+      ("OnlyInitial", True),
+      ("NotInitial", False))
+  def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist):
+    init_dist = [0.5, 0.5]
+    target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0]
+    num_classes = len(init_dist)
+    # We don't need many samples to test that this works.
+    num_samples = 100
+    data_np = np.random.choice(num_classes, num_samples, p=init_dist)
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(data_np)
+
+    # Reshape distribution.
+    dataset = dataset.apply(
+        resampling.rejection_resample(
+            class_func=lambda x: x,
+            target_dist=target_dist,
+            initial_dist=init_dist))
+
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.test_session() as sess:
+      returned = []
+      with self.assertRaises(errors.OutOfRangeError):
+        while True:
+          returned.append(sess.run(get_next))
+
   def testRandomClasses(self):
     init_dist = [0.25, 0.25, 0.25, 0.25]
     target_dist = [0.0, 0.0, 0.0, 1.0]
     num_classes = len(init_dist)
-    # We don't need many samples to test a dirac-delta target distribution
+    # We don't need many samples to test a dirac-delta target distribution.
     num_samples = 100
     data_np = np.random.choice(num_classes, num_samples, p=init_dist)
 
@@ -109,5 +158,23 @@ def _remap_fn(_):
 
     self.assertAllClose(target_dist, bincount, atol=1e-2)
 
+
+class ResampleDatasetBenchmark(test.Benchmark):
+
+  def benchmarkResamplePerformance(self):
+    init_dist = [0.25, 0.25, 0.25, 0.25]
+    target_dist = [0.0, 0.0, 0.0, 1.0]
+    num_classes = len(init_dist)
+    # We don't need many samples to test a dirac-delta target distribution
+    num_samples = 1000
+    data_np = np.random.choice(num_classes, num_samples, p=init_dist)
+
+    resample_time = _time_resampling(
+        self, data_np, target_dist, init_dist, num_to_sample=1000)
+
+    self.report_benchmark(
+        iters=1000, wall_time=resample_time, name="benchmark_resample")
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 144460fde06401..eceecfd1744d0a 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -214,6 +214,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":batching",
+        ":interleave_ops",
         ":scan_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
@@ -223,6 +224,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:random_ops",
         "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
     ],
 )
 
diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py
index a182dddd38d23d..bad6edd5147d83 100644
--- a/tensorflow/contrib/data/python/ops/resampling.py
+++ b/tensorflow/contrib/data/python/ops/resampling.py
@@ -20,10 +20,12 @@
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import batching
+from tensorflow.contrib.data.python.ops import interleave_ops
 from tensorflow.contrib.data.python.ops import scan_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import logging_ops
@@ -50,79 +52,182 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
     A `Dataset` transformation function, which can be passed to
     @{tf.data.Dataset.apply}.
   """
-
   def _apply_fn(dataset):
     """Function from `Dataset` to `Dataset` that applies the transformation."""
-    dist_estimation_batch_size = 32
     target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
     class_values_ds = dataset.map(class_func)
+
+    # Get initial distribution.
     if initial_dist is not None:
       initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
-      acceptance_dist = _calculate_acceptance_probs(initial_dist_t,
-                                                    target_dist_t)
+      acceptance_dist, prob_of_original = (
+          _calculate_acceptance_probs_with_mixing(initial_dist_t,
+                                                  target_dist_t))
       initial_dist_ds = dataset_ops.Dataset.from_tensors(
           initial_dist_t).repeat()
       acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
           acceptance_dist).repeat()
+      prob_of_original_ds = dataset_ops.Dataset.from_tensors(
+          prob_of_original).repeat()
+    else:
+      initial_dist_ds = _estimate_initial_dist_ds(
+          target_dist_t, class_values_ds)
+      acceptance_and_original_prob_ds = initial_dist_ds.map(
+          lambda initial: _calculate_acceptance_probs_with_mixing(
+              initial, target_dist_t))
+      acceptance_dist_ds = acceptance_and_original_prob_ds.map(
+          lambda accept_prob, _: accept_prob)
+      prob_of_original_ds = acceptance_and_original_prob_ds.map(
+          lambda _, prob_original: prob_original)
+    filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds,
+                             class_values_ds, seed)
+    # Prefetch filtered dataset for speed.
+    filtered_ds = filtered_ds.prefetch(3)
+
+    prob_original_static = _get_prob_original_static(
+        initial_dist_t, target_dist_t) if initial_dist is not None else None
+    if prob_original_static == 1:
+      return dataset_ops.Dataset.zip((class_values_ds, dataset))
+    elif prob_original_static == 0:
+      return filtered_ds
     else:
-      num_classes = (target_dist_t.shape[0].value or
-                     array_ops.shape(target_dist_t)[0])
-      smoothing_constant = 10
-      initial_examples_per_class_seen = array_ops.fill(
-          [num_classes], np.int64(smoothing_constant))
-
-      def update_estimate_and_tile(num_examples_per_class_seen, c):
-        updated_examples_per_class_seen, dist = _estimate_data_distribution(
-            c, num_examples_per_class_seen)
-        tiled_dist = array_ops.tile(
-            array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
-        return updated_examples_per_class_seen, tiled_dist
-
-      initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
-                         .apply(scan_ops.scan(initial_examples_per_class_seen,
-                                              update_estimate_and_tile))
-                         .apply(batching.unbatch()))
-      acceptance_dist_ds = initial_dist_ds.map(
-          lambda initial: _calculate_acceptance_probs(initial, target_dist_t))
-
-    def maybe_warn_on_large_rejection(accept_dist, initial_dist):
-      proportion_rejected = math_ops.reduce_sum(
-          (1 - accept_dist) * initial_dist)
-      return control_flow_ops.cond(
-          math_ops.less(proportion_rejected, .5),
-          lambda: accept_dist,
-          lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
-              accept_dist, [proportion_rejected, initial_dist, accept_dist],
-              message="Proportion of examples rejected by sampler is high: ",
-              summarize=100,
-              first_n=10))
-
-    acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
-                                                   initial_dist_ds))
-                          .map(maybe_warn_on_large_rejection))
-
-    def _gather_and_copy(class_val, acceptance_prob, data):
-      return (class_val, array_ops.gather(acceptance_prob, class_val), data)
-    current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
-        (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
-    filtered_ds = (
-        current_probabilities_and_class_and_data_ds
-        .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
-    return filtered_ds.map(lambda class_value, _, data: (class_value, data))
+      return interleave_ops.sample_from_datasets(
+          [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds],
+          weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]),
+          seed=seed)
 
   return _apply_fn
 
 
-def _calculate_acceptance_probs(initial_probs, target_probs):
-  """Calculate the per-class acceptance rates.
+def _get_prob_original_static(initial_dist_t, target_dist_t):
+  """Returns the static probability of sampling from the original.
+
+  `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters
+  an Op that it isn't defined for. We have some custom logic to avoid this.
+
+  Args:
+    initial_dist_t: A tensor of the initial distribution.
+    target_dist_t: A tensor of the target distribution.
+
+  Returns:
+    The probability of sampling from the original distribution as a constant,
+    if it is a constant, or `None`.
+  """
+  init_static = tensor_util.constant_value(initial_dist_t)
+  target_static = tensor_util.constant_value(target_dist_t)
+
+  if init_static is None or target_static is None:
+    return None
+  else:
+    return np.min(target_static / init_static)
+
+
+def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds,
+               seed):
+  """Filters a dataset based on per-class acceptance probabilities.
 
   Args:
-    initial_probs: The class probabilities of the data.
-    target_probs: The desired class proportion in minibatches.
+    dataset: The dataset to be filtered.
+    acceptance_dist_ds: A dataset of acceptance probabilities.
+    initial_dist_ds: A dataset of the initial probability distribution, given or
+        estimated.
+    class_values_ds: A dataset of the corresponding classes.
+    seed: (Optional.) Python integer seed for the resampler.
+
   Returns:
-    A list of the per-class acceptance probabilities.
+    A dataset of (class value, data) after filtering.
+  """
+  def maybe_warn_on_large_rejection(accept_dist, initial_dist):
+    proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist)
+    return control_flow_ops.cond(
+        math_ops.less(proportion_rejected, .5),
+        lambda: accept_dist,
+        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
+            accept_dist, [proportion_rejected, initial_dist, accept_dist],
+            message="Proportion of examples rejected by sampler is high: ",
+            summarize=100,
+            first_n=10))
+
+  acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
+                                                 initial_dist_ds))
+                        .map(maybe_warn_on_large_rejection))
+
+  def _gather_and_copy(class_val, acceptance_prob, data):
+    return class_val, array_ops.gather(acceptance_prob, class_val), data
+
+  current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
+      (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
+  filtered_ds = (
+      current_probabilities_and_class_and_data_ds
+      .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
+  return filtered_ds.map(lambda class_value, _, data: (class_value, data))
+
+
+def _estimate_initial_dist_ds(
+    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
+    smoothing_constant=10):
+  num_classes = (target_dist_t.shape[0].value or
+                 array_ops.shape(target_dist_t)[0])
+  initial_examples_per_class_seen = array_ops.fill(
+      [num_classes], np.int64(smoothing_constant))
+
+  def update_estimate_and_tile(num_examples_per_class_seen, c):
+    updated_examples_per_class_seen, dist = _estimate_data_distribution(
+        c, num_examples_per_class_seen)
+    tiled_dist = array_ops.tile(
+        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
+    return updated_examples_per_class_seen, tiled_dist
 
-  This method is based on solving the following analysis:
+  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
+                     .apply(scan_ops.scan(initial_examples_per_class_seen,
+                                          update_estimate_and_tile))
+                     .apply(batching.unbatch()))
+
+  return initial_dist_ds
+
+
+def _get_target_to_initial_ratio(initial_probs, target_probs):
+  # Add tiny to initial_probs to avoid divide by zero.
+  denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny)
+  return target_probs / denom
+
+
+def _estimate_data_distribution(c, num_examples_per_class_seen):
+  """Estimate data distribution as labels are seen.
+
+  Args:
+    c: The class labels.  Type `int32`, shape `[batch_size]`.
+    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
+      containing counts.
+
+  Returns:
+    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
+      `[num_classes]`.
+    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
+  """
+  num_classes = num_examples_per_class_seen.get_shape()[0].value
+  # Update the class-count based on what labels are seen in batch.
+  num_examples_per_class_seen = math_ops.add(
+      num_examples_per_class_seen, math_ops.reduce_sum(
+          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
+  init_prob_estimate = math_ops.truediv(
+      num_examples_per_class_seen,
+      math_ops.reduce_sum(num_examples_per_class_seen))
+  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
+  return num_examples_per_class_seen, dist
+
+
+def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
+  """Calculates the acceptance probabilities and mixing ratio.
+
+  In this case, we assume that we can *either* sample from the original data
+  distribution with probability `m`, or sample from a reshaped distribution
+  that comes from rejection sampling on the original distribution. This
+  rejection sampling is done on a per-class basis, with `a_i` representing the
+  probability of accepting data from class `i`.
+
+  This method is based on solving the following analysis for the reshaped
+  distribution:
 
   Let F be the probability of a rejection (on any example).
   Let p_i be the proportion of examples in the data in class i (init_probs)
@@ -151,39 +256,39 @@ def _calculate_acceptance_probs(initial_probs, target_probs):
   0 <= t_i <= 1, sum_i(t_i) = 1
   ```
 
-
   A solution for a_i in terms of the other variables is the following:
     ```a_i = (t_i / p_i) / max_i[t_i / p_i]```
-  """
-  # Add tiny to initial_probs to avoid divide by zero.
-  denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny)
-  ratio_l = target_probs / denom
 
-  # Calculate list of acceptance probabilities.
-  max_ratio = math_ops.reduce_max(ratio_l)
-  return ratio_l / max_ratio
+  If we try to minimize the amount of data rejected, we get the following:
 
+  M_max = max_i [ t_i / p_i ]
+  M_min = min_i [ t_i / p_i ]
 
-def _estimate_data_distribution(c, num_examples_per_class_seen):
-  """Estimate data distribution as labels are seen.
+  The desired probability of accepting data if it comes from class `i`:
+
+  a_i = (t_i/p_i - m) / (M_max - m)
+
+  The desired probability of pulling a data element from the original dataset,
+  rather than the filtered one:
+
+  m = M_min
 
   Args:
-    c: The class labels.  Type `int32`, shape `[batch_size]`.
-    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
-      containing counts.
+    initial_probs: A Tensor of the initial probability distribution, given or
+      estimated.
+    target_probs: A Tensor of the corresponding classes.
 
   Returns:
-    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
-      `[num_classes]`.
-    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
+    (A 1D Tensor with the per-class acceptance probabilities, the desired
+    probability of pull from the original distribution.)
   """
-  num_classes = num_examples_per_class_seen.get_shape()[0].value
-  # Update the class-count based on what labels are seen in batch.
-  num_examples_per_class_seen = math_ops.add(
-      num_examples_per_class_seen, math_ops.reduce_sum(
-          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
-  init_prob_estimate = math_ops.truediv(
-      num_examples_per_class_seen,
-      math_ops.reduce_sum(num_examples_per_class_seen))
-  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
-  return num_examples_per_class_seen, dist
+  ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
+  max_ratio = math_ops.reduce_max(ratio_l)
+  min_ratio = math_ops.reduce_min(ratio_l)
+
+  # Target prob to sample from original distribution.
+  m = min_ratio
+
+  # TODO(joelshor): Simplify fraction, if possible.
+  a_i = (ratio_l - m) / (max_ratio - m)
+  return a_i, m
\ No newline at end of file
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
index ecdb8967f43e59..268c8d03426d43 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
@@ -53,7 +53,7 @@ class CholeskyOuterProduct(bijector.Bijector):
   its spectrum), and that the product of two positive-diagonal lower-triangular
   matrices is another positive-diagonal lower-triangular matrix.
 
-  A simple inductive argument (proceding one column of L_3 at a time) shows
+  A simple inductive argument (proceeding one column of L_3 at a time) shows
   that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive-
   diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g.
 
diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md
index 762685db14b968..4384431e7b9c3e 100644
--- a/tensorflow/contrib/eager/README.md
+++ b/tensorflow/contrib/eager/README.md
@@ -1,6 +1,6 @@
 # Eager Execution
 
-Eager execution provides an imperative interface to TensorFlow (similiar to
+Eager execution provides an imperative interface to TensorFlow (similar to
 [NumPy](http://www.numpy.org)). When you enable eager execution, TensorFlow
 operations execute immediately; you do not execute a pre-constructed graph with
 [`Session.run()`](https://www.tensorflow.org/api_docs/python/tf/Session).
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
index a8d5a0dd83fb50..bf2aa75545813f 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
@@ -53,7 +53,7 @@ Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
                        int32 samples_per_second, int32 channel_count,
                        const std::vector<float>& samples, string* output_data);
 
-// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8
+// Reads an video file using ffmpeg and converts it into a RGB24 in uint8
 // [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg.
 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
                      uint32* width, uint32* height, uint32* frames);
diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py
index bd764ed57a6da0..72835c3ad86e63 100644
--- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py
+++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py
@@ -202,7 +202,7 @@ def execute(self, fn, *args, **kwargs):
         or lazy way that may cause a deadlock.
       ValueError: If `exclusive_resource_access` is not provided (is `True`) and
         another `CriticalSection` has an execution requesting the same
-        resources as in `*args`, `**kwargs`, and any additionaly captured
+        resources as in `*args`, `**kwargs`, and any additionally captured
         inputs in `fn`.  Note, even if `exclusive_resource_access` is `True`,
         if another execution in another `CriticalSection` was created without
         `exclusive_resource_access=True`, a `ValueError` will be raised.
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index e3fc6bf0f03405..4092b320042162 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -112,6 +112,7 @@ def __init__(self,
                generator_optimizer=None,
                discriminator_optimizer=None,
                get_hooks_fn=None,
+               get_eval_metric_ops_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
                config=None):
@@ -146,6 +147,9 @@ def __init__(self,
         list of hooks. These hooks are run on the generator and discriminator
         train ops, and can be used to implement the GAN training scheme.
         Defaults to `train.get_sequential_train_hooks()`.
+      get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a
+        dict of metric results keyed by name. The output of this function is
+        passed into `tf.estimator.EstimatorSpec` during evaluation.
       add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
@@ -160,7 +164,8 @@ def _model_fn(features, labels, mode):
               else discriminator_optimizer)
       gan_head = head_lib.gan_head(
           generator_loss_fn, discriminator_loss_fn, gopt, dopt,
-          use_loss_summaries, get_hooks_fn=get_hooks_fn)
+          use_loss_summaries, get_hooks_fn=get_hooks_fn,
+          get_eval_metric_ops_fn=get_eval_metric_ops_fn)
       return _gan_model_fn(
           features, labels, mode, generator_fn, discriminator_fn, gan_head,
           add_summaries)
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
index 387a62bd741bd4..955482599b372b 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
@@ -38,6 +38,7 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 from tensorflow.python.summary.writer import writer_cache
@@ -194,6 +195,12 @@ def make_opt():
       lr = learning_rate_decay.exponential_decay(1.0, gstep, 10, 0.9)
       return training.GradientDescentOptimizer(lr)
 
+    def get_metrics(gan_model):
+      return {
+          'mse_custom_metric': metrics_lib.mean_squared_error(
+              gan_model.real_data, gan_model.generated_data)
+      }
+
     gopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0)
     dopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0)
     est = estimator.GANEstimator(
@@ -203,6 +210,7 @@ def make_opt():
         discriminator_loss_fn=losses.wasserstein_discriminator_loss,
         generator_optimizer=gopt,
         discriminator_optimizer=dopt,
+        get_eval_metric_ops_fn=get_metrics,
         model_dir=self._model_dir)
 
     # TRAIN
@@ -213,6 +221,9 @@ def make_opt():
     scores = est.evaluate(eval_input_fn)
     self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
     self.assertIn('loss', six.iterkeys(scores))
+    self.assertEqual(scores['discriminator_loss'] + scores['generator_loss'],
+                     scores['loss'])
+    self.assertIn('mse_custom_metric', six.iterkeys(scores))
 
     # PREDICT
     predictions = np.array([x for x in est.predict(predict_input_fn)])
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
index a21358c50bbdb4..ff903a78cc36c1 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py
@@ -25,17 +25,21 @@
 from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.estimator.canned import head
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import metrics as metrics_lib
 
 __all__ = [
     'GANHead',
     'gan_head',
 ]
 
+def _summary_key(head_name, val):
+  return '%s/%s' % (val, head_name) if head_name else val
+
 
 def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer,
              discriminator_optimizer, use_loss_summaries=True,
              get_hooks_fn=tfgan_train.get_sequential_train_hooks(),
-             name=None):
+             get_eval_metric_ops_fn=None, name=None):
   """Creates a `GANHead`.
 
   Args:
@@ -47,9 +51,12 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer,
     discriminator_optimizer: Same as `generator_optimizer`, but for the
       discriminator updates.
     use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
-        If `None`, uses defaults.
-    get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks.
+      If `None`, uses defaults.
+    get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
+      list of hooks.
+    get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a
+      dict of metric results keyed by name. The output of this function is
+      passed into `tf.estimator.EstimatorSpec` during evaluation.
     name: name of the head. If provided, summary and metrics keys will be
       suffixed by `"/" + name`.
 
@@ -62,6 +69,7 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer,
                  discriminator_optimizer=discriminator_optimizer,
                  use_loss_summaries=use_loss_summaries,
                  get_hooks_fn=get_hooks_fn,
+                 get_eval_metric_ops_fn=get_eval_metric_ops_fn,
                  name=name)
 
 
@@ -72,6 +80,7 @@ def __init__(self, generator_loss_fn, discriminator_loss_fn,
                generator_optimizer, discriminator_optimizer,
                use_loss_summaries=True,
                get_hooks_fn=None,
+               get_eval_metric_ops_fn=None,
                name=None):
     """`Head` for GAN training.
 
@@ -85,8 +94,11 @@ def __init__(self, generator_loss_fn, discriminator_loss_fn,
         discriminator updates.
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
-      get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
-        of hooks. Defaults to `train.get_sequential_train_hooks()`
+      get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a
+        list of hooks. Defaults to `train.get_sequential_train_hooks()`
+      get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a
+        dict of metric results keyed by name. The output of this function is
+        passed into `tf.estimator.EstimatorSpec` during evaluation.
       name: name of the head. If provided, summary and metrics keys will be
         suffixed by `"/" + name`.
     """
@@ -104,6 +116,8 @@ def __init__(self, generator_loss_fn, discriminator_loss_fn,
     self._generator_optimizer = generator_optimizer
     self._discriminator_optimizer = discriminator_optimizer
     self._get_hooks_fn = get_hooks_fn
+    self._get_eval_metric_ops_fn = get_eval_metric_ops_fn
+    self._name = name
 
   @property
   def name(self):
@@ -173,13 +187,26 @@ def create_estimator_spec(
         gan_loss = self.create_loss(
             features=None, mode=mode, logits=gan_model, labels=None)
         scalar_loss = gan_loss.generator_loss + gan_loss.discriminator_loss
+        with ops.name_scope(None, 'metrics',
+                            [gan_loss.generator_loss,
+                             gan_loss.discriminator_loss]):
+          eval_metric_ops = {
+              _summary_key(self._name, 'generator_loss'):
+                  metrics_lib.mean(gan_loss.generator_loss),
+              _summary_key(self._name, 'discriminator_loss'):
+                  metrics_lib.mean(gan_loss.discriminator_loss)
+          }
+          if self._get_eval_metric_ops_fn is not None:
+            custom_eval_metric_ops = self._get_eval_metric_ops_fn(gan_model)
+            if not isinstance(custom_eval_metric_ops, dict):
+              raise TypeError('get_eval_metric_ops_fn must return a dict, '
+                              'received: {}'.format(custom_eval_metric_ops))
+            eval_metric_ops.update(custom_eval_metric_ops)
         return model_fn_lib.EstimatorSpec(
             mode=model_fn_lib.ModeKeys.EVAL,
             predictions=gan_model.generated_data,
             loss=scalar_loss,
-            # TODO(joelshor): Add metrics. If head name provided, append it to
-            # metric keys.
-            eval_metric_ops={})
+            eval_metric_ops=eval_metric_ops)
       elif mode == model_fn_lib.ModeKeys.TRAIN:
         if train_op_fn is None:
           raise ValueError('train_op_fn can not be None.')
diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py
index 8168f005cd1105..6587f1fc600b94 100644
--- a/tensorflow/contrib/gan/python/estimator/python/head_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py
@@ -62,9 +62,14 @@ def setUp(self):
         generator_loss_fn=dummy_loss,
         discriminator_loss_fn=dummy_loss,
         generator_optimizer=training.GradientDescentOptimizer(1.0),
-        discriminator_optimizer=training.GradientDescentOptimizer(1.0))
+        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
+        get_eval_metric_ops_fn=self.get_metrics)
     self.assertTrue(isinstance(self.gan_head, head.GANHead))
 
+  def get_metrics(self, gan_model):
+    self.assertTrue(isinstance(gan_model, tfgan_tuples.GANModel))
+    return {}
+
   def _test_modes_helper(self, mode):
     self.gan_head.create_estimator_spec(
         features=None,
diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py
index df71187fbd98c8..a9b8faa7126253 100644
--- a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py
+++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Miscellanous utilities for TFGAN code and examples."""
+"""Miscellaneous utilities for TFGAN code and examples."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py
index a320a3f232fc1d..592d37b432ee60 100644
--- a/tensorflow/contrib/graph_editor/transform.py
+++ b/tensorflow/contrib/graph_editor/transform.py
@@ -677,7 +677,7 @@ def replace_t_with_replacement_handler(info, t):
 
 
 def _add_control_flow_ops(ops, control_ios):
-  """Complete `ops` so that the tranformed graph is valid.
+  """Complete `ops` so that the transformed graph is valid.
 
   Partially copying a graph can lead to a malformed graph. For instance,
   copying half of a while construct is likely to result in an invalid graph.
diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc
index 60281951dda940..66939fbb0f0d3b 100644
--- a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc
+++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc
@@ -115,7 +115,7 @@ static void CheckOpsSupport(const GraphDef& graph_def,
       HexagonOpsDefinitions::getInstance();
   LOG(INFO) << "Checking " << graph_def.node_size() << " nodes";
   LOG(INFO) << "dump_all_nodes = " << dump_all_nodes
-            << ", dump_shape_and_tpye = " << dump_shape_and_type;
+            << ", dump_shape_and_type = " << dump_shape_and_type;
 
   std::unordered_set<string> unsupported_ops;
   bool all_supported = true;
diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py
index 8f406ace1d5dcc..f230d93da4a9c0 100755
--- a/tensorflow/contrib/image/__init__.py
+++ b/tensorflow/contrib/image/__init__.py
@@ -17,7 +17,7 @@
 ### API
 
 This module provides functions for image manipulation; currently, chrominance
-transformas (including changing saturation and hue) in YIQ space and
+transforms (including changing saturation and hue) in YIQ space and
 projective transforms (including rotation) are supported.
 
 ## Image Transformation `Ops`
diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py
index b261f41bf97db1..d6b1a61b716ab7 100644
--- a/tensorflow/contrib/kfac/examples/convnet.py
+++ b/tensorflow/contrib/kfac/examples/convnet.py
@@ -325,7 +325,7 @@ def distributed_grads_only_and_ops_chief_worker(
 
   All workers perform gradient computation. Chief worker applies gradient after
   averaging the gradients obtained from all the workers. All workers block
-  execution untill the update is applied. Chief worker runs covariance and
+  execution until the update is applied. Chief worker runs covariance and
   inverse update ops. Covariance and inverse matrices are placed on parameter
   servers in a round robin manner. For further details on synchronous
   distributed optimization check `tf.train.SyncReplicasOptimizer`.
diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py
index 45a760c9f1013d..b7f63d8d94a7a4 100644
--- a/tensorflow/contrib/kfac/python/ops/optimizer.py
+++ b/tensorflow/contrib/kfac/python/ops/optimizer.py
@@ -66,7 +66,7 @@ def __init__(self,
           the local approximation with the Fisher information matrix, and to
           regularize the update direction by making it closer to the gradient.
           If damping is adapted during training then this value is used for
-          initializing damping varaible.
+          initializing damping variable.
           (Higher damping means the update looks more like a standard gradient
           update - see Tikhonov regularization.)
       layer_collection: The layer collection object, which holds the fisher
@@ -114,7 +114,7 @@ def __init__(self,
     self._estimation_mode = estimation_mode
     self._colocate_gradients_with_ops = colocate_gradients_with_ops
 
-    # The below paramaters are required only if damping needs to be adapated.
+    # The below parameters are required only if damping needs to be adapated.
     # These parameters can be set by calling
     # set_damping_adaptation_params() explicitly.
     self._damping_adaptation_decay = 0.95
@@ -195,7 +195,7 @@ def set_damping_adaptation_params(self,
       min_damping: `float`(Optional), Minimum value the damping parameter
         can take. Default value 1e-5.
       damping_adaptation_decay: `float`(Optional), The `damping` parameter is
-        multipled by the `damping_adaptation_decay` every
+        multiplied by the `damping_adaptation_decay` every
         `damping_adaptation_interval` number of iterations. Default value 0.99.
       damping_adaptation_interval: `int`(Optional), Number of steps in between
         updating the `damping` parameter. Default value 5.
diff --git a/tensorflow/contrib/kfac/python/ops/placement.py b/tensorflow/contrib/kfac/python/ops/placement.py
index 8a20ebe19844e6..c4454325aebe13 100644
--- a/tensorflow/contrib/kfac/python/ops/placement.py
+++ b/tensorflow/contrib/kfac/python/ops/placement.py
@@ -51,7 +51,7 @@ def __init__(self, cov_devices=None, inv_devices=None, **kwargs):
     self._inv_devices = inv_devices
 
   def make_vars_and_create_op_thunks(self, scope=None):
-    """Make vars and create op thunks w/ a round-robin device placement strat.
+    """Make vars and create op thunks w/ a round-robin device placement start.
 
     For each factor, all of that factor's cov variables and their associated
     update ops will be placed on a particular device.  A new device is chosen
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index f708da66937ed7..b7194ae3330450 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -932,7 +932,8 @@ def convolution(inputs,
                 variables_collections=None,
                 outputs_collections=None,
                 trainable=True,
-                scope=None):
+                scope=None,
+                conv_dims=None):
   """Adds an N-D convolution followed by an optional batch_norm layer.
 
   It is required that 1 <= N <= 3.
@@ -993,6 +994,10 @@ def convolution(inputs,
     trainable: If `True` also add variables to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
     scope: Optional scope for `variable_scope`.
+    conv_dims: Optional convolution dimensionality, when set it would use the
+      corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
+      leaved to None it would select the convolution dimensionality based on
+      the input rank (i.e. Conv ND, with N = input_rank - 2).
 
   Returns:
     A tensor representing the output of the operation.
@@ -1015,6 +1020,9 @@ def convolution(inputs,
     inputs = ops.convert_to_tensor(inputs)
     input_rank = inputs.get_shape().ndims
 
+    if conv_dims is not None and conv_dims + 2 != input_rank:
+      raise ValueError('Convolution expects input with rank %d, got %d' %
+                       (conv_dims + 2, input_rank))
     if input_rank == 3:
       layer_class = convolutional_layers.Convolution1D
     elif input_rank == 4:
@@ -1061,10 +1069,134 @@ def convolution(inputs,
       outputs = activation_fn(outputs)
     return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
 
+@add_arg_scope
+def convolution1d(inputs,
+                  num_outputs,
+                  kernel_size,
+                  stride=1,
+                  padding='SAME',
+                  data_format=None,
+                  rate=1,
+                  activation_fn=nn.relu,
+                  normalizer_fn=None,
+                  normalizer_params=None,
+                  weights_initializer=initializers.xavier_initializer(),
+                  weights_regularizer=None,
+                  biases_initializer=init_ops.zeros_initializer(),
+                  biases_regularizer=None,
+                  reuse=None,
+                  variables_collections=None,
+                  outputs_collections=None,
+                  trainable=True,
+                  scope=None):
+  return convolution(inputs,
+                     num_outputs,
+                     kernel_size,
+                     stride,
+                     padding,
+                     data_format,
+                     rate,
+                     activation_fn,
+                     normalizer_fn,
+                     normalizer_params,
+                     weights_initializer,
+                     weights_regularizer,
+                     biases_initializer,
+                     biases_regularizer,
+                     reuse,
+                     variables_collections,
+                     outputs_collections,
+                     trainable,
+                     scope,
+                     conv_dims=1)
+
+convolution1d.__doc__ = convolution.__doc__
 
-convolution2d = convolution
-convolution3d = convolution
+@add_arg_scope
+def convolution2d(inputs,
+                  num_outputs,
+                  kernel_size,
+                  stride=1,
+                  padding='SAME',
+                  data_format=None,
+                  rate=1,
+                  activation_fn=nn.relu,
+                  normalizer_fn=None,
+                  normalizer_params=None,
+                  weights_initializer=initializers.xavier_initializer(),
+                  weights_regularizer=None,
+                  biases_initializer=init_ops.zeros_initializer(),
+                  biases_regularizer=None,
+                  reuse=None,
+                  variables_collections=None,
+                  outputs_collections=None,
+                  trainable=True,
+                  scope=None):
+  return convolution(inputs,
+                     num_outputs,
+                     kernel_size,
+                     stride,
+                     padding,
+                     data_format,
+                     rate,
+                     activation_fn,
+                     normalizer_fn,
+                     normalizer_params,
+                     weights_initializer,
+                     weights_regularizer,
+                     biases_initializer,
+                     biases_regularizer,
+                     reuse,
+                     variables_collections,
+                     outputs_collections,
+                     trainable,
+                     scope,
+                     conv_dims=2)
+
+convolution2d.__doc__ = convolution.__doc__
 
+@add_arg_scope
+def convolution3d(inputs,
+                  num_outputs,
+                  kernel_size,
+                  stride=1,
+                  padding='SAME',
+                  data_format=None,
+                  rate=1,
+                  activation_fn=nn.relu,
+                  normalizer_fn=None,
+                  normalizer_params=None,
+                  weights_initializer=initializers.xavier_initializer(),
+                  weights_regularizer=None,
+                  biases_initializer=init_ops.zeros_initializer(),
+                  biases_regularizer=None,
+                  reuse=None,
+                  variables_collections=None,
+                  outputs_collections=None,
+                  trainable=True,
+                  scope=None):
+  return convolution(inputs,
+                     num_outputs,
+                     kernel_size,
+                     stride,
+                     padding,
+                     data_format,
+                     rate,
+                     activation_fn,
+                     normalizer_fn,
+                     normalizer_params,
+                     weights_initializer,
+                     weights_regularizer,
+                     biases_initializer,
+                     biases_regularizer,
+                     reuse,
+                     variables_collections,
+                     outputs_collections,
+                     trainable,
+                     scope,
+                     conv_dims=3)
+
+convolution3d.__doc__ = convolution.__doc__
 
 @add_arg_scope
 def convolution2d_in_plane(
@@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
   Args:
      tensor: An `int` `Tensor` to be converted to a `Sparse`.
      eos_token: An integer.
-       It is part of the target label that signfies the end of a sentence.
+       It is part of the target label that signifies the end of a sentence.
      outputs_collections: Collection to add the outputs.
      scope: Optional scope for name_scope.
   """
@@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None):
     output_collections: Collection to which the outputs will be added.
     scope: Optional scope for `name_scope`.
   Returns:
-    A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but
+    A `Tensor` or `SparseTensor` containing the same values as `inputs`, but
     with innermost dimensions flattened to obtain rank `new_rank`.
 
   Raises:
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 997f910a2a9756..b01fd5d5c95ac1 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -310,6 +310,17 @@ def testCreateDimensions(self):
 
 class ConvolutionTest(test.TestCase):
 
+  def testInvalidShape(self):
+    with self.test_session():
+      images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1)
+      with self.assertRaisesRegexp(
+          ValueError, 'Convolution expects input with rank 5, got 4'):
+        layers_lib.convolution3d(images_2d, 32, 3)
+      images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1)
+      with self.assertRaisesRegexp(
+          ValueError, 'Convolution expects input with rank 4, got 5'):
+        layers_lib.convolution2d(images_3d, 32, 3)
+
   def testInvalidDataFormat(self):
     height, width = 7, 9
     with self.test_session():
@@ -3155,7 +3166,7 @@ def testRepeat(self):
     with self.test_session():
       images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32)
       output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3])
-      self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu')
+      self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu')
       self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32])
 
   def testRepeatWithScope(self):
@@ -3749,7 +3760,7 @@ def testStackConvolution2d(self):
           layers_lib.convolution2d, [10, 20, 30],
           kernel_size=[3, 3],
           padding='SAME')
-      self.assertEqual(output.op.name, 'Stack/convolution_3/Relu')
+      self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu')
       self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30])
 
   def testStackWithScope(self):
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
index c7cdb4131215c3..f8106d1e4a7e79 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
@@ -343,7 +343,8 @@ def get_temp_export_dir(timestamped_export_dir):
   """
   (dirname, basename) = os.path.split(timestamped_export_dir)
   temp_export_dir = os.path.join(
-      compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename)))
+      compat.as_bytes(dirname),
+      compat.as_bytes('temp-{}'.format(compat.as_text(basename))))
   return temp_export_dir
 
 
diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 01c76b7a66e93a..55b984f260ec49 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -6,8 +6,6 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
 
-exports_files(["LICENSE"])
-
 exports_files(glob([
     "testdata/*.bin",
     "testdata/*.pb",
diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile
index 1053cce3857420..cc8a8035d1dade 100644
--- a/tensorflow/contrib/lite/Makefile
+++ b/tensorflow/contrib/lite/Makefile
@@ -1,4 +1,3 @@
-
 # Find where we're running from, so we can store generated files here.
 ifeq ($(origin MAKEFILE_DIR), undefined)
 	MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
@@ -69,12 +68,12 @@ LIB_NAME := libtensorflow-lite.a
 LIB_PATH := $(LIBDIR)$(LIB_NAME)
 
 # A small example program that shows how to link against the library.
-BENCHMARK_PATH := $(BINDIR)benchmark_model
+MINIMAL_PATH := $(BINDIR)minimal
 
-BENCHMARK_SRCS := \
-tensorflow/contrib/lite/tools/benchmark_model.cc
-BENCHMARK_OBJS := $(addprefix $(OBJDIR), \
-$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(BENCHMARK_SRCS))))
+MINIMAL_SRCS := \
+tensorflow/contrib/lite/examples/minimal/minimal.cc
+MINIMAL_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MINIMAL_SRCS))))
 
 # What sources we want to compile, must be kept in sync with the main Bazel
 # build files.
@@ -100,7 +99,7 @@ $(wildcard tensorflow/contrib/lite/*/*test.cc) \
 $(wildcard tensorflow/contrib/lite/*/*/*test.cc) \
 $(wildcard tensorflow/contrib/lite/*/*/*/*test.cc) \
 $(wildcard tensorflow/contrib/lite/kernels/test_util.cc) \
-$(BENCHMARK_SRCS)
+$(MINIMAL_SRCS)
 # Filter out all the excluded files.
 TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS))
 # File names of the intermediate files target compilation generates.
@@ -119,17 +118,17 @@ $(OBJDIR)%.o: %.c
 	$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
 
 # The target that's compiled if there's no command-line arguments.
-all: $(LIB_PATH) $(BENCHMARK_PATH)
+all: $(LIB_PATH)  $(MINIMAL_PATH)
 
 # Gathers together all the objects we've compiled into a single '.a' archive.
 $(LIB_PATH): $(LIB_OBJS)
 	@mkdir -p $(dir $@)
 	$(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS)
 
-$(BENCHMARK_PATH): $(BENCHMARK_OBJS) $(LIB_PATH)
+$(MINIMAL_PATH): $(MINIMAL_OBJS) $(LIB_PATH)
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) \
-	-o $(BENCHMARK_PATH) $(BENCHMARK_OBJS) \
+	-o $(MINIMAL_PATH) $(MINIMAL_OBJS) \
 	$(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS)
 
 # Gets rid of all generated files.
diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc
new file mode 100644
index 00000000000000..106e3b027055b6
--- /dev/null
+++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc
@@ -0,0 +1,71 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include <cstdio>
+
+// This is an example that is minimal to read a model
+// from disk and perform inference. There is no data being loaded
+// that is up to you to add as a user.
+//
+// NOTE: Do not add any dependencies to this that cannot be built with
+// the minimal makefile. This example must remain trivial to build with
+// the minimal build tool.
+//
+// Usage: minimal <tflite model>
+
+using namespace tflite;
+
+#define TFLITE_MINIMAL_CHECK(x) \
+  if(!(x)) {                                                    \
+    fprintf(stderr, "Error at %s:%d\n",  __FILE__, __LINE__); \
+    exit(1); \
+  }
+
+
+int main(int argc, char *argv[]) {
+  if(argc != 2) {
+    fprintf(stderr, "Usage: %s <model>\n");
+    return 1;
+  }
+  const char* filename = argv[1];
+
+  // Load model
+  std::unique_ptr<tflite::FlatBufferModel> model
+      = tflite::FlatBufferModel::BuildFromFile(filename);
+  TFLITE_MINIMAL_CHECK(model != nullptr);
+
+  // Build the interpreter
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model.get(), resolver);
+  std::unique_ptr<Interpreter> interpreter;
+  builder(&interpreter);
+  TFLITE_MINIMAL_CHECK(interpreter != nullptr);
+
+  // Allocate tensor buffers.
+  TFLITE_MINIMAL_CHECK(interpreter->AllocateTensors() == kTfLiteOk);
+
+  // Fill input buffers
+  // TODO(user): Insert code to fill input tensors
+
+  // Run inference
+  TFLITE_MINIMAL_CHECK(interpreter->Invoke() == kTfLiteOk);
+
+  // Read output buffers
+  // TODO(user): Insert getting data out code.
+
+  return 0;
+}
diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md
index 7a3a231626d0e1..ab507893074142 100644
--- a/tensorflow/contrib/lite/g3doc/rpi.md
+++ b/tensorflow/contrib/lite/g3doc/rpi.md
@@ -32,7 +32,7 @@ This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc v
 
 Log in to you RPI, install the toolchain.
 ```bash
-sudo apt-get instal build-essential
+sudo apt-get install build-essential
 ```
 
 First, clone this TensorFlow repository. Run this at the root of the repository:
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index f23b90d9dce694..d48178d608b905 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -3387,7 +3387,7 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data,
                           const int32 output_zeropoint,
                           const float output_scale) {
   // The arguments input_zeropoint and input_scale are expected to be an array
-  // that have the quantization paramaters for all the inputs to the concat
+  // that have the quantization parameters for all the inputs to the concat
   // operator.
   gemmlowp::ScopedProfilingLabel label("Concatenation");
   TFLITE_DCHECK_GT(inputs_count, 1);
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index f6d8d3257b8795..62d6fe0bb300cf 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -1789,7 +1789,7 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data,
                           const int32 output_zeropoint,
                           const float output_scale) {
   // The arguments input_zeropoint and input_scale are expected to be an array
-  // that have the quantization paramaters for all the inputs to the concat
+  // that have the quantization parameters for all the inputs to the concat
   // operator.
   TFLITE_DCHECK_GT(inputs_count, 1);
   int64_t concat_size = 0;
@@ -1975,7 +1975,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims,
 // requiring a power-of-two representation interval. Thus, we should right
 // away quantize this array to a power-of-two interval; otherwise,
 // implementation will need to rescale that, losing any benefit that a tighter
-// representation interval might otherwise yield, while introducting some
+// representation interval might otherwise yield, while introducing some
 // numerical error and computational overhead.
 //
 // Now, Logistic and Tanh
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index e5b640fcee82d2..8bdeb035f5a778 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -65,7 +65,7 @@ table Tensor {
   quantization:QuantizationParameters;  // Optional.
 }
 
-// A list of builtin operators. Builtin operators a slighlty faster than custom
+// A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
diff --git a/tensorflow/contrib/lite/schema/schema_v0.fbs b/tensorflow/contrib/lite/schema/schema_v0.fbs
index 852ea988f3ddc7..891d8366ccae35 100644
--- a/tensorflow/contrib/lite/schema/schema_v0.fbs
+++ b/tensorflow/contrib/lite/schema/schema_v0.fbs
@@ -48,7 +48,7 @@ table Tensor {
   quantization:QuantizationParameters;  // Optional.
 }
 
-// A list of builtin operators. Builtin operators a slighlty faster than custom
+// A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
diff --git a/tensorflow/contrib/lite/schema/schema_v1.fbs b/tensorflow/contrib/lite/schema/schema_v1.fbs
index 06cd9408edb710..b438b569e67ac5 100644
--- a/tensorflow/contrib/lite/schema/schema_v1.fbs
+++ b/tensorflow/contrib/lite/schema/schema_v1.fbs
@@ -53,7 +53,7 @@ table Tensor {
   quantization:QuantizationParameters;  // Optional.
 }
 
-// A list of builtin operators. Builtin operators a slighlty faster than custom
+// A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
diff --git a/tensorflow/contrib/lite/schema/schema_v2.fbs b/tensorflow/contrib/lite/schema/schema_v2.fbs
index 96731c8aaebf69..b90408ff6d09fd 100644
--- a/tensorflow/contrib/lite/schema/schema_v2.fbs
+++ b/tensorflow/contrib/lite/schema/schema_v2.fbs
@@ -54,7 +54,7 @@ table Tensor {
   quantization:QuantizationParameters;  // Optional.
 }
 
-// A list of builtin operators. Builtin operators a slighlty faster than custom
+// A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
diff --git a/tensorflow/contrib/lite/schema/schema_v3.fbs b/tensorflow/contrib/lite/schema/schema_v3.fbs
index cedefe08f35cbb..020da38493980d 100644
--- a/tensorflow/contrib/lite/schema/schema_v3.fbs
+++ b/tensorflow/contrib/lite/schema/schema_v3.fbs
@@ -53,7 +53,7 @@ table Tensor {
   type:TensorType;
   // An index that refers to the buffers table at the root of the model. Or,
   // if there is no data buffer associated (i.e. intermediate results), then
-  // this is 0 (which refers to an always existant empty buffer).
+  // this is 0 (which refers to an always existent empty buffer).
   //
   // The data_buffer itself is an opaque container, with the assumption that the
   // target device is little-endian. In addition, all builtin operators assume
@@ -64,7 +64,7 @@ table Tensor {
   quantization:QuantizationParameters;  // Optional.
 }
 
-// A list of builtin operators. Builtin operators a slighlty faster than custom
+// A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 07d2b28bbe1f1d..0e036bda92e4c4 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -109,7 +109,7 @@
 
 
 class ExtraTocoOptions(object):
-  """Additonal toco options besides input, output, shape."""
+  """Additional toco options besides input, output, shape."""
 
   def __init__(self):
     # Whether to ignore control dependency nodes.
@@ -2016,7 +2016,7 @@ def build_graph(parameters):
     return inputs_after_split, [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
-    """Feed inputs, assign vairables, and freeze graph."""
+    """Feed inputs, assign variables, and freeze graph."""
 
     with tf.variable_scope("", reuse=True):
       kernel = tf.get_variable("rnn/basic_lstm_cell/kernel")
diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc
index 1f07068aee2be4..8cab6cd8cdc41d 100644
--- a/tensorflow/contrib/lite/testing/tflite_driver.cc
+++ b/tensorflow/contrib/lite/testing/tflite_driver.cc
@@ -227,8 +227,8 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
   if (!IsValid()) return;
   auto* tensor = interpreter_->tensor(id);
   if (expected_output_.count(id) != 0) {
-    fprintf(stderr, "Overriden expectation for tensor %d\n", id);
-    Invalidate("Overriden expectation");
+    fprintf(stderr, "Overridden expectation for tensor %d\n", id);
+    Invalidate("Overridden expectation");
   }
   expected_output_[id].reset(new Expectation);
   switch (tensor->type) {
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
index 495014c6fc67ab..7680cdd344814b 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
@@ -115,7 +115,7 @@ bazel run --config=opt \
 
 In order to evaluate the possible benefit of generating a quantized graph, TOCO
 allows "dummy-quantization" on float graphs. The flags `--default_ranges_min`
-and `--default_ranges_max` accept plausable values for the min-max ranges of the
+and `--default_ranges_max` accept plausible values for the min-max ranges of the
 values in all arrays that do not have min-max information. "Dummy-quantization"
 will produce lower accuracy but will emulate the performance of a correctly
 quantized model.
@@ -338,7 +338,7 @@ below outline the use cases for each.
 ### Using `--output_format=GRAPHVIZ_DOT`
 
 The first way to get a graphviz rendering is to pass `GRAPHVIZ_DOT` into
-`--output_format`. This results in a plausable visualization of the graph. This
+`--output_format`. This results in a plausible visualization of the graph. This
 reduces the requirements that normally exist during conversion between other
 input and output formats. For example, this may be useful if conversion from
 TENSORFLOW_GRAPHDEF to TFLITE is failing.
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 50f0620b3cce49..5e9c20e40dd627 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -25,10 +25,10 @@ namespace tflite {
 
 class BaseOperator;
 
-// Return a map contained all knwo TF Lite Operators, keyed by their names.
+// Return a map contained all know TF Lite Operators, keyed by their names.
 std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap();
 
-// Return a map contained all knwo TF Lite Operators, keyed by the type of
+// Return a map contained all know TF Lite Operators, keyed by the type of
 // their tf.mini counterparts.
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap();
 
diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto
index 253f022e6b3ade..8589ca361dae25 100644
--- a/tensorflow/contrib/lite/toco/toco_flags.proto
+++ b/tensorflow/contrib/lite/toco/toco_flags.proto
@@ -127,7 +127,7 @@ message TocoFlags {
   // transformations that are necessary in order to generate inference
   // code for these graphs. Such graphs should be fixed, but as a
   // temporary work-around, setting this reorder_across_fake_quant flag
-  // allows toco to perform necessary graph transformaitons on them,
+  // allows toco to perform necessary graph transformations on them,
   // at the cost of no longer faithfully matching inference and training
   // arithmetic.
   optional bool reorder_across_fake_quant = 8;
diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
index 37539b959959b5..5ed8057b865cf4 100644
--- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py
@@ -58,7 +58,7 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"):
 
 
 # Creates the workers and return their sessions, graphs, train_ops.
-# Cheif worker will update at last
+# Chief worker will update at last
 def _get_workers(num_workers, period, workers, moving_rate):
   sessions = []
   graphs = []
diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py
index 6cca0a8a009456..3acd9402684fa2 100644
--- a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py
@@ -57,7 +57,7 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"):
 
 
 # Creates the workers and return their sessions, graphs, train_ops.
-# Cheif worker will update at last
+# Chief worker will update at last
 def _get_workers(num_workers, steps, workers):
   sessions = []
   graphs = []
@@ -146,7 +146,7 @@ def test1Workers2Period(self):
     self.assertAllEqual(1.0, sessions[0].run(global_var_1))
     self.assertAllEqual(0, sessions[0].run(global_step))
 
-    # iteration 2, global varibale update
+    # iteration 2, global variable update
     thread_0 = self.checkedThread(
         target=self._run, args=(train_ops[0], sessions[0]))
     thread_1 = self.checkedThread(
diff --git a/tensorflow/contrib/signal/python/ops/window_ops.py b/tensorflow/contrib/signal/python/ops/window_ops.py
index 50094010dc75cf..59e67e8ba414df 100644
--- a/tensorflow/contrib/signal/python/ops/window_ops.py
+++ b/tensorflow/contrib/signal/python/ops/window_ops.py
@@ -47,7 +47,7 @@ def hann_window(window_length, periodic=True, dtype=dtypes.float32, name=None):
   Raises:
     ValueError: If `dtype` is not a floating point type.
 
-  [hann]: https://en.wikipedia.org/wiki/Window_function#Hann_window
+  [hann]: https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows
   """
   return _raised_cosine_window(name, 'hann_window', window_length, periodic,
                                dtype, 0.5, 0.5)
@@ -72,7 +72,7 @@ def hamming_window(window_length, periodic=True, dtype=dtypes.float32,
   Raises:
     ValueError: If `dtype` is not a floating point type.
 
-  [hamming]: https://en.wikipedia.org/wiki/Window_function#Hamming_window
+  [hamming]: https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows
   """
   return _raised_cosine_window(name, 'hamming_window', window_length, periodic,
                                dtype, 0.54, 0.46)
diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
index f2d31dc8db5688..d877831fce99a3 100644
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
@@ -102,7 +102,7 @@ class BoundingBox(ItemHandler):
   """An ItemHandler that concatenates a set of parsed Tensors to Bounding Boxes.
   """
 
-  def __init__(self, keys=None, prefix=None):
+  def __init__(self, keys=None, prefix=''):
     """Initialize the bounding box handler.
 
     Args:
diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index 8a2c74742a8ebb..6e55b9407bce5c 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -571,7 +571,7 @@ def train(train_op,
       default, two `Boolean`, scalar ops called "should_stop" and "should_log"
       are provided.
     log_every_n_steps: The frequency, in terms of global steps, that the loss
-      and global step and logged.
+      and global step are logged.
     graph: The graph to pass to the supervisor. If no graph is supplied the
       default graph is used.
     master: The address of the tensorflow master.
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index d5d8e4100f11ca..cfdc884277a025 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -1080,14 +1080,20 @@ class SummaryDbWriter : public SummaryWriterInterface {
     // See tensorboard/plugins/histogram/summary.py and data_compat.py
     Tensor t{DT_DOUBLE, {k, 3}};
     auto data = t.flat<double>();
-    for (int i = 0; i < k; ++i) {
-      double left_edge = ((i - 1 >= 0) ? histo.bucket_limit(i - 1)
-                                       : std::numeric_limits<double>::min());
-      double right_edge = ((i + 1 < k) ? histo.bucket_limit(i + 1)
-                                       : std::numeric_limits<double>::max());
-      data(i + 0) = left_edge;
-      data(i + 1) = right_edge;
-      data(i + 2) = histo.bucket(i);
+    for (int i = 0, j = 0; i < k; ++i) {
+      // TODO(nickfelt): reconcile with TensorBoard's data_compat.py
+      // From summary.proto
+      // Parallel arrays encoding the bucket boundaries and the bucket values.
+      // bucket(i) is the count for the bucket i.  The range for
+      // a bucket is:
+      //   i == 0:  -DBL_MAX .. bucket_limit(0)
+      //   i != 0:  bucket_limit(i-1) .. bucket_limit(i)
+      double left_edge = (i == 0) ? std::numeric_limits<double>::min()
+                                  : histo.bucket_limit(i - 1);
+
+      data(j++) = left_edge;
+      data(j++) = histo.bucket_limit(i);
+      data(j++) = histo.bucket(i);
     }
     int64 tag_id;
     PatchPluginName(s->mutable_metadata(), kHistogramPluginName);
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
index c34b6763a1420d..2e8d4109dd624a 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc
@@ -100,6 +100,56 @@ class SummaryDbWriterTest : public ::testing::Test {
   SummaryWriterInterface* writer_ = nullptr;
 };
 
+TEST_F(SummaryDbWriterTest, WriteHistogram_VerifyTensorValues) {
+  TF_ASSERT_OK(CreateSummaryDbWriter(db_, "histtest", "test1", "user1", &env_,
+                                     &writer_));
+  int step = 0;
+  std::unique_ptr<Event> e{new Event};
+  e->set_step(step);
+  e->set_wall_time(123);
+  Summary::Value* s = e->mutable_summary()->add_value();
+  s->set_tag("normal/myhisto");
+
+  double dummy_value = 10.123;
+  HistogramProto* proto = s->mutable_histo();
+  proto->Clear();
+  proto->set_min(dummy_value);
+  proto->set_max(dummy_value);
+  proto->set_num(dummy_value);
+  proto->set_sum(dummy_value);
+  proto->set_sum_squares(dummy_value);
+
+  int size = 3;
+  double bucket_limits[] = {-30.5, -10.5, -5.5};
+  double bucket[] = {-10, 10, 20};
+  for (int i = 0; i < size; i++) {
+    proto->add_bucket_limit(bucket_limits[i]);
+    proto->add_bucket(bucket[i]);
+  }
+  TF_ASSERT_OK(writer_->WriteEvent(std::move(e)));
+  TF_ASSERT_OK(writer_->Flush());
+  writer_->Unref();
+  writer_ = nullptr;
+
+  // TODO(nickfelt): implement QueryTensor() to encapsulate this
+  // Verify the data
+  string result = QueryString("SELECT data FROM Tensors");
+  const double* val = reinterpret_cast<const double*>(result.data());
+  double histarray[] = {std::numeric_limits<double>::min(),
+                        -30.5,
+                        -10,
+                        -30.5,
+                        -10.5,
+                        10,
+                        -10.5,
+                        -5.5,
+                        20};
+  int histarray_size = 9;
+  for (int i = 0; i < histarray_size; i++) {
+    EXPECT_EQ(histarray[i], val[i]);
+  }
+}
+
 TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) {
   TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_,
                                      &writer_));
diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 6d6feb3c399996..a5d8b061b6b26f 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -67,6 +67,7 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = [
         ":trt_logging",
+        ":trt_plugins",
     ] + if_tensorrt([
         "@local_config_tensorrt//:nv_infer",
     ]) + tf_custom_op_library_additional_deps(),
@@ -86,6 +87,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":trt_logging",
+        ":trt_plugins",
         ":trt_resources",
         "//tensorflow/core:gpu_headers_lib",
         "//tensorflow/core:lib_proto_parsing",
@@ -197,10 +199,12 @@ tf_py_wrap_cc(
 tf_cuda_library(
     name = "trt_resources",
     srcs = [
+        "resources/trt_allocator.cc",
         "resources/trt_int8_calibrator.cc",
         "resources/trt_resource_manager.cc",
     ],
     hdrs = [
+        "resources/trt_allocator.h",
         "resources/trt_int8_calibrator.h",
         "resources/trt_resource_manager.h",
         "resources/trt_resources.h",
@@ -221,18 +225,25 @@ tf_cuda_library(
     srcs = [
         "convert/convert_graph.cc",
         "convert/convert_nodes.cc",
+        "convert/trt_optimization_pass.cc",
     ],
     hdrs = [
         "convert/convert_graph.h",
         "convert/convert_nodes.h",
+        "convert/trt_optimization_pass.h",
     ],
     deps = [
         ":segment",
+        ":trt_plugins",
         ":trt_logging",
         ":trt_resources",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core:framework",
+        "//tensorflow/core:gpu_runtime",
         "//tensorflow/core:framework_lite",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
@@ -241,8 +252,7 @@ tf_cuda_library(
         "//tensorflow/core/grappler:devices",
         "//tensorflow/core/grappler/clusters:virtual_cluster",
         "//tensorflow/core/grappler/costs:graph_properties",
-        "//tensorflow/core/grappler/optimizers:constant_folding",
-        "//tensorflow/core/grappler/optimizers:layout_optimizer",
+        "//tensorflow/core/grappler/optimizers:meta_optimizer",
     ] + if_tensorrt([
         "@local_config_tensorrt//:nv_infer",
     ]) + tf_custom_op_library_additional_deps(),
@@ -256,7 +266,6 @@ cc_library(
         "segment/segment.h",
         "segment/union_find.h",
     ],
-    linkstatic = 1,
     deps = [
         "//tensorflow/core:graph",
         "//tensorflow/core:lib_proto_parsing",
@@ -279,6 +288,46 @@ tf_cc_test(
     ],
 )
 
+# Library for the plugin factory
+tf_cuda_library(
+    name = "trt_plugins",
+    srcs = [
+        "plugin/trt_plugin.cc",
+        "plugin/trt_plugin_factory.cc",
+        "plugin/trt_plugin_utils.cc",
+    ],
+    hdrs = [
+        "plugin/trt_plugin.h",
+        "plugin/trt_plugin_factory.h",
+        "plugin/trt_plugin_utils.h",
+    ],
+    deps = [
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:lib_proto_parsing",
+    ] + if_tensorrt([
+        "@local_config_tensorrt//:nv_infer",
+    ]),
+)
+
+tf_cuda_cc_test(
+    name = "trt_plugin_factory_test",
+    size = "small",
+    srcs = ["plugin/trt_plugin_factory_test.cc"],
+    tags = [
+        "manual",
+        "notap",
+    ],
+    deps = [
+        ":trt_plugins",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ] + if_tensorrt([
+        "@local_config_cuda//cuda:cuda_headers",
+        "@local_config_tensorrt//:nv_infer",
+    ]),
+)
+
 py_test(
     name = "tf_trt_integration_test",
     srcs = ["test/tf_trt_integration_test.py"],
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 07740277115fe4..b7b26cfb1c05ae 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
 
 #include <list>
 #include <map>
@@ -24,6 +25,9 @@ limitations under the License.
 
 #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
 #include "tensorflow/contrib/tensorrt/segment/segment.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
+#include "tensorflow/core/common_runtime/gpu/process_state.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_constructor.h"
@@ -31,8 +35,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/devices.h"
 #include "tensorflow/core/grappler/grappler_item.h"
-#include "tensorflow/core/grappler/optimizers/constant_folding.h"
-#include "tensorflow/core/grappler/optimizers/layout_optimizer.h"
+#include "tensorflow/core/grappler/optimizers/meta_optimizer.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -75,7 +78,8 @@ bool IsTensorRTCandidate(const tensorflow::Node* node) {
       // TODO(ben,jie): ...
   };
   // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h)
-  return candidate_ops.count(node->type_string());
+  return (candidate_ops.count(node->type_string()) ||
+          PluginFactoryTensorRT::GetInstance()->IsPlugin(node->type_string()));
 }
 
 void GetSubGraphIncomingEdges(const tensorflow::Graph& graph,
@@ -144,7 +148,8 @@ struct ConvertGraphParams {
       size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes,
       const tensorflow::grappler::GraphProperties& current_graph_properties,
       std::unordered_map<string, std::pair<int, string>>* output_edges,
-      int engine_precision_mode)
+      int engine_precision_mode, const string& device_name,
+      std::shared_ptr<nvinfer1::IGpuAllocator> allocator, int cuda_gpu_id)
       : graph(inp_graph),
         output_names(output_node_names),
         subgraph_node_ids(subgraph_node_id_numbers),
@@ -152,7 +157,10 @@ struct ConvertGraphParams {
         max_workspace_size_bytes(max_consumed_workspace_size_bytes),
         graph_properties(current_graph_properties),
         output_edge_map(output_edges),
-        precision_mode(engine_precision_mode) {}
+        precision_mode(engine_precision_mode),
+        device_name_(device_name),
+        allocator_(allocator),
+        cuda_gpu_id_(cuda_gpu_id) {}
   tensorflow::Graph& graph;
   const std::vector<string>& output_names;
   const std::set<int>& subgraph_node_ids;
@@ -161,6 +169,9 @@ struct ConvertGraphParams {
   const tensorflow::grappler::GraphProperties& graph_properties;
   std::unordered_map<string, std::pair<int, string>>* output_edge_map;
   int precision_mode;
+  string device_name_;
+  std::shared_ptr<nvinfer1::IGpuAllocator> allocator_;
+  int cuda_gpu_id_;
   std::vector<std::pair<int, int>> subgraph_inputs;
   std::vector<std::pair<int, int>> subgraph_outputs;
   tensorflow::EdgeSet subgraph_incoming_edges;
@@ -194,7 +205,7 @@ static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) {
                              subgraph_outputs_set.begin(),
                              subgraph_outputs_set.end());
   return tensorflow::Status::OK();
-};
+}
 
 tensorflow::Status GetCalibNode(ConvertGraphParams* params) {
   TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params));
@@ -203,7 +214,8 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) {
                    params->subgraph_inputs, params->subgraph_outputs,
                    params->max_batch_size, params->max_workspace_size_bytes,
                    params->graph_properties, params->output_edge_map,
-                   &trt_node_def, params->precision_mode);
+                   &trt_node_def, params->precision_mode, params->device_name_,
+                   params->allocator_, params->cuda_gpu_id_);
   TF_RETURN_IF_ERROR(InjectCalibrationNode(s));
   tensorflow::Status status;
   tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status);
@@ -233,7 +245,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) {
                    params->subgraph_inputs, params->subgraph_outputs,
                    params->max_batch_size, params->max_workspace_size_bytes,
                    params->graph_properties, params->output_edge_map,
-                   &trt_node_def, params->precision_mode);
+                   &trt_node_def, params->precision_mode, params->device_name_,
+                   params->allocator_, params->cuda_gpu_id_);
   TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s));
   tensorflow::Status status;
   tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status);
@@ -331,19 +344,12 @@ tensorflow::Status ConvertGraphDefToTensorRT(
   // optimization pass
   tensorflow::grappler::GrapplerItem item;
   item.fetch = output_names;
-  tensorflow::GraphDef gdef;
-
-  // Layout optimization
   item.graph = graph_def;
-  tensorflow::grappler::LayoutOptimizer optimizer;
-  tensorflow::grappler::Cluster* cluster;
 
-  // virtual cluster
   tensorflow::DeviceProperties device_properties;
-
   device_properties.set_type("GPU");
   device_properties.mutable_environment()->insert({"architecture", "6"});
-  cluster =
+  tensorflow::grappler::Cluster* cluster =
       new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}});
 
   // single machine
@@ -351,27 +357,38 @@ tensorflow::Status ConvertGraphDefToTensorRT(
   int num_gpus = tensorflow::grappler::GetNumAvailableGPUs();
   VLOG(2) << "cpu_cores: " << num_cpu_cores;
   VLOG(2) << "gpus: " << num_gpus;
-
-  TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef));
-
-  // constant folding
+  tensorflow::RewriterConfig rw_cfg;
+  tensorflow::grappler::MetaOptimizer meta_opt(nullptr, rw_cfg);
+  tensorflow::GraphDef gdef;
+  TF_RETURN_IF_ERROR(meta_opt.Optimize(cluster, item, &gdef));
   item.graph = gdef;
-  tensorflow::grappler::ConstantFolding fold(nullptr);
-  TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef));
 
   // AJ refactoring shape inference through grappler/GraphProperties.
   tensorflow::grappler::GraphProperties static_graph_properties(item);
-  TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false));
+  TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true));
   // Build full graph
+
+  return ConvertAfterShapes(gdef, output_names, max_batch_size,
+                            max_workspace_size_bytes, new_graph_def,
+                            precision_mode, minimum_segment_size,
+                            static_graph_properties, nullptr);
+}
+
+tensorflow::Status ConvertAfterShapes(
+    const tensorflow::GraphDef& gdef, const std::vector<string>& output_names,
+    size_t max_batch_size, size_t max_workspace_size_bytes,
+    tensorflow::GraphDef* new_graph_def, int precision_mode,
+    int minimum_segment_size,
+    const tensorflow::grappler::GraphProperties& graph_properties,
+    const tensorflow::grappler::Cluster* cluster) {
+  // Segment the graph into subgraphs that can be converted to TensorRT
+  tensorflow::tensorrt::segment::SegmentOptions segment_options;
   tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(),
                                              gdef.library());
   tensorflow::Graph graph(flib);
   TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph(
       tensorflow::GraphConstructorOptions(), gdef, &graph));
 
-  // Segment the graph into subgraphs that can be converted to TensorRT
-  tensorflow::tensorrt::segment::SegmentOptions segment_options;
-
   // TODO(ben,jie,sami): exclude output nodes (DISCUSS IT)
   for (auto node : output_names) {
     segment_options.exclude_node_list.insert(node);
@@ -381,7 +398,7 @@ tensorflow::Status ConvertGraphDefToTensorRT(
   segment_options.minimum_segment_size = minimum_segment_size;
   tensorflow::tensorrt::segment::SegmentNodesVector segments;
   TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph(
-      gdef, IsTensorRTCandidate, segment_options, &segments));
+      &graph, IsTensorRTCandidate, segment_options, &segments));
   if (segments.size() > 1) {
     VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size();
   }
@@ -391,9 +408,21 @@ tensorflow::Status ConvertGraphDefToTensorRT(
   int count = 0;
   float total_num_nodes_in_segments = 0.;
   for (auto s : segments) {
-    total_num_nodes_in_segments += s.size();
+    total_num_nodes_in_segments += s.first.size();
   }
-  for (const std::set<string>& subgraph_node_names : segments) {
+  // We create the map here since cluster may not be available in all cases.
+  std::map<string, tensorflow::Device*> name_to_device_map;
+  if (cluster) {
+    // TODO(aaroey): consider using DeviceSet::FindDeviceByName(), as in a
+    // distributed environment, devices from different workers can have same
+    // short name.
+    for (const auto dm : cluster->GetDeviceSet()->devices()) {
+      name_to_device_map[dm->name()] = dm;
+    }
+  }
+  for (const auto& segment_nodes_and_device : segments) {
+    const std::set<string>& subgraph_node_names =
+        segment_nodes_and_device.first;
     std::set<int> subgraph_node_ids;
     size_t max_mem_per_engine =
         max_workspace_size_bytes *
@@ -403,10 +432,40 @@ tensorflow::Status ConvertGraphDefToTensorRT(
       oss << " " << node_name;
       subgraph_node_ids.insert(node_map.at(node_name)->id());
     }
-    VLOG(2) << "Subgraph nodes" << oss.str();
+    VLOG(1) << "Subgraph nodes at device " << segment_nodes_and_device.second
+            << " : " << oss.str();
+    auto target_device =
+        name_to_device_map.find(segment_nodes_and_device.second);
+    std::shared_ptr<nvinfer1::IGpuAllocator> allocator(0);
+
+    int cuda_device_id = 0;
+    if (target_device != name_to_device_map.end()) {
+      tensorflow::TfGpuId tf_gpu_id(target_device->second->parsed_name().id);
+      CudaGpuId cuda_gpu_id;
+      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
+      if (!s.ok()) {
+        LOG(ERROR)
+            << "Cuda device identification failed, using device 0. Error= "
+            << s;
+      } else {
+        cuda_device_id = cuda_gpu_id.value();
+      }
+      tensorflow::GPUOptions gpuoptions;
+      // we need to us PM here since in python path there is no way to get to
+      // allocators
+      auto pm = tensorflow::ProcessState::singleton();
+      // this should be instantiated by now
+      auto dev_allocator = pm->GetGPUAllocator(gpuoptions, tf_gpu_id, 1);
+      VLOG(1) << "Got an allocator for device tf_device=" << tf_gpu_id.value()
+              << " cuda device= " << cuda_device_id << " at " << dev_allocator;
+      allocator = std::make_shared<TRTDeviceAllocator>(dev_allocator);
+    } else {  // device unknown or not available
+      allocator = std::make_shared<TRTCudaAllocator>();
+    }
     ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size,
-                         max_mem_per_engine, static_graph_properties,
-                         &output_edge_map, precision_mode);
+                         max_mem_per_engine, graph_properties, &output_edge_map,
+                         precision_mode, segment_nodes_and_device.second,
+                         allocator, cuda_device_id);
     if (precision_mode == INT8MODE) {
       tensorflow::Status status = GetCalibNode(&p);
       if (status != tensorflow::Status::OK()) {
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h
index e01e4a5328061a..65a67d7e73e32f 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h
@@ -18,6 +18,8 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -43,6 +45,14 @@ tensorflow::Status ConvertGraphDefToTensorRT(
     size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def,
     int precision_mode, int minimum_segment_size);
 
+// Method to call from optimization pass
+tensorflow::Status ConvertAfterShapes(
+    const tensorflow::GraphDef& graph, const std::vector<string>& output_names,
+    size_t max_batch_size, size_t max_workspace_size_bytes,
+    tensorflow::GraphDef* new_graph_def, int precision_mode,
+    int minimum_segment_size,
+    const tensorflow::grappler::GraphProperties& graph_properties,
+    const tensorflow::grappler::Cluster* cluster);
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index b81ae9dc3eeed6..32b211dcd1e282 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
 
 #include <algorithm>
 #include <list>
@@ -240,35 +241,49 @@ class TFAttrs {
     return attrs_.at(key);
   }
   template <typename T>
-  T get(string key) const;
+  T get(const string& key) const;
   template <typename T>
-  T get(string key, const T& default_value) const {
+  T get(const string& key, const T& default_value) const {
     return attrs_.count(key) ? this->get<T>(key) : default_value;
   }
 
+  std::vector<string> GetAllAttrKey() {
+    std::vector<string> attr_list;
+    for (const auto& attr_item : attrs_) {
+      attr_list.emplace_back(attr_item.first);
+    }
+    return attr_list;
+  }
+
  private:
   typedef std::map<string, tensorflow::AttrValue const*> AttrMap;
   AttrMap attrs_;
 };
 
 template <>
-string TFAttrs::get<string>(string key) const {
+string TFAttrs::get<string>(const string& key) const {
   return this->at(key)->s();
 }
 
 template <>
-std::vector<int> TFAttrs::get<std::vector<int>>(string key) const {
+std::vector<int> TFAttrs::get<std::vector<int>>(const string& key) const {
   auto attr = this->at(key)->list().i();
   return std::vector<int>(attr.begin(), attr.end());
 }
 
 template <>
-std::vector<string> TFAttrs::get<std::vector<string>>(string key) const {
+std::vector<float> TFAttrs::get<std::vector<float>>(const string& key) const {
+  auto attr = this->at(key)->list().f();
+  return std::vector<float>(attr.begin(), attr.end());
+}
+
+template <>
+std::vector<string> TFAttrs::get<std::vector<string>>(const string& key) const {
   auto attr = this->at(key)->list().s();
   return std::vector<string>(attr.begin(), attr.end());
 }
 template <>
-nvinfer1::Dims TFAttrs::get<nvinfer1::Dims>(string key) const {
+nvinfer1::Dims TFAttrs::get<nvinfer1::Dims>(const string& key) const {
   auto values = this->get<std::vector<int>>(key);
   nvinfer1::Dims dims;
   dims.nbDims = values.size();
@@ -278,24 +293,25 @@ nvinfer1::Dims TFAttrs::get<nvinfer1::Dims>(string key) const {
 }
 
 template <>
-nvinfer1::DataType TFAttrs::get<nvinfer1::DataType>(string key) const {
+nvinfer1::DataType TFAttrs::get<nvinfer1::DataType>(const string& key) const {
   nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT);
   TF_CHECK_OK(ConvertDType(this->at(key)->type(), &trt_dtype));
   return trt_dtype;
 }
 
 template <>
-tensorflow::DataType TFAttrs::get<tensorflow::DataType>(string key) const {
+tensorflow::DataType TFAttrs::get<tensorflow::DataType>(
+    const string& key) const {
   return this->at(key)->type();
 }
 
 template <>
-float TFAttrs::get<float>(string key) const {
+float TFAttrs::get<float>(const string& key) const {
   return this->at(key)->f();
 }
 
 template <>
-bool TFAttrs::get<bool>(string key) const {
+bool TFAttrs::get<bool>(const string& key) const {
   return this->at(key)->b();
 }
 
@@ -424,6 +440,7 @@ using OpConverter =
 class Converter {
   std::unordered_map<string, TRT_TensorOrWeights> trt_tensors_;
   std::unordered_map<string, OpConverter> op_registry_;
+  OpConverter plugin_converter_;
   nvinfer1::INetworkDefinition* trt_network_;
   std::list<std::vector<uint8_t>> temp_bufs_;
   tensorflow::tensorrt::TRTWeightStore* weight_store_;
@@ -481,7 +498,7 @@ class Converter {
     weights.SetValues(weight_store_->store_.back().data());
     return weights;
   }
-  bool isFP16() { return fp16_; };
+  bool isFP16() { return fp16_; }
   TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) {
     return this->get_temp_weights(weights.type_, weights.shape_);
   }
@@ -490,13 +507,17 @@ class Converter {
     std::vector<TRT_TensorOrWeights> inputs;
     TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs));
     string op = node_def.op();
-    if (!op_registry_.count(op)) {
-      return tensorflow::errors::Unimplemented(
-          "No converter registered for op: " + op);
-    }
-    OpConverter op_converter = op_registry_.at(op);
     std::vector<TRT_TensorOrWeights> outputs;
-    TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs));
+    if (PluginFactoryTensorRT::GetInstance()->IsPlugin(op)) {
+      TF_RETURN_IF_ERROR(plugin_converter_(*this, node_def, inputs, &outputs));
+    } else {
+      if (!op_registry_.count(op)) {
+        return tensorflow::errors::Unimplemented(
+            "No converter registered for op: " + op);
+      }
+      OpConverter op_converter = op_registry_.at(op);
+      TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs));
+    }
     for (size_t i = 0; i < outputs.size(); ++i) {
       TRT_TensorOrWeights output = outputs.at(i);
       // TODO(jie): tf protobuf seems to be omitting the :0 suffix
@@ -672,7 +693,7 @@ std::function<Eigen::half(Eigen::half)> LambdaFactory::unary<Eigen::half>() {
     case OP_CATEGORY::RSQRT: {
       VLOG(2) << "RSQRT GETS DONE";
       return [](Eigen::half t) -> Eigen::half {
-        return Eigen::half(1.0 / sqrt(float(t)));
+        return Eigen::half(1.0 / sqrt(static_cast<float>(t)));
       };
     }
     case OP_CATEGORY::NEG:
@@ -1158,9 +1179,9 @@ tensorflow::Status BinaryTensorOpTensor(
   CHECK_EQ_TYPE(tensor_r->getType(), dtype);
   auto op_pair = ops.find(node_def.op());
   if (op_pair == ops.end())
-    return tensorflow::errors::Unimplemented(
-        "binary op: " + node_def.op() +
-        " not supported at: " + node_def.name());
+    return tensorflow::errors::Unimplemented("binary op: " + node_def.op() +
+                                             " not supported at: " +
+                                             node_def.name());
 
   nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise(
       *const_cast<nvinfer1::ITensor*>(tensor_l),
@@ -1173,6 +1194,45 @@ tensorflow::Status BinaryTensorOpTensor(
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status ConvertPlugin(Converter& ctx,
+                                 const tensorflow::NodeDef& node_def,
+                                 const std::vector<TRT_TensorOrWeights>& inputs,
+                                 std::vector<TRT_TensorOrWeights>* outputs) {
+  // prepare input
+  std::vector<nvinfer1::ITensor*> all_inputs;
+  for (auto input : inputs) {
+    all_inputs.emplace_back(const_cast<nvinfer1::ITensor*>(input.tensor()));
+  }
+
+  // plugin is owned by PluginFactory
+  // TODO(jie): destroy plugins later (resource management)
+  PluginTensorRT* plugin =
+      PluginFactoryTensorRT::GetInstance()->CreatePlugin(node_def.op());
+
+  // passing attributes
+  // TODO(jie): support more general attribute
+  TFAttrs attrs(node_def);
+  auto attr_key_vector = attrs.GetAllAttrKey();
+  for (auto attr_key : attr_key_vector) {
+    // TODO(jie): support only list of float for toy example here.
+    auto data = attrs.get<std::vector<float>>(attr_key);
+    size_t size_data = data.size() * sizeof(float);
+    if (!plugin->SetAttribute(attr_key, static_cast<void*>(data.data()),
+                              size_data)) {
+      return tensorflow::errors::InvalidArgument("plugin SetAttribute failed");
+    }
+  }
+
+  nvinfer1::IPluginLayer* layer = ctx.network()->addPlugin(
+      &all_inputs[0], static_cast<int>(inputs.size()), *plugin);
+
+  for (int i = 0; i < layer->getNbOutputs(); i++) {
+    nvinfer1::ITensor* output_tensor = layer->getOutput(i);
+    outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  }
+  return tensorflow::Status::OK();
+}
+
 tensorflow::Status ConvertPlaceholder(
     Converter& ctx, const tensorflow::NodeDef& node_def,
     const std::vector<TRT_TensorOrWeights>& inputs,
@@ -2073,6 +2133,8 @@ void Converter::register_op_converters() {
   op_registry_["Reshape"] = ConvertReshape;
   op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm;
   op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm;
+
+  plugin_converter_ = ConvertPlugin;
 }
 
 }  // namespace
@@ -2144,7 +2206,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(
   if (!status.ok() || !calib_res->calibrator_) {
     return tensorflow::errors::FailedPrecondition(
         "You must run calibration"
-        " and inference conversion in the same proces");
+        " and inference conversion in the same process");
   }
 
   calib_res->calibrator_->setDone();
@@ -2213,60 +2275,63 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(
   return tensorflow::Status::OK();
 }
 
-tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
-  // Visit nodes in reverse topological order and construct the TRT network.
-
-  // Toposort
+tensorflow::Status ReverseTopologicalSort(
+    const tensorrt::convert::SubGraphParams& s,
+    std::list<tensorflow::Node*>* order) {
   std::vector<tensorflow::Node*> order_vec;
   tensorflow::GetPostOrder(s.graph, &order_vec);
   // Select just the subgraph
-  std::list<tensorflow::Node*> order;
   for (tensorflow::Node* node : order_vec) {
     if (s.subgraph_node_ids.count(node->id())) {
-      order.push_front(node);  // we want topological order to construct the
+      // We want topological order to contstruct the
       // network layer by layer
+      order->push_front(node);
     }
   }
-  // topological order is needed to build TRT network
-  static int static_id = 0;
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SetInputList(
+    const tensorrt::convert::SubGraphParams& s,
+    tensorflow::NodeDefBuilder* op_builder,
+    const std::vector<string>* input_names,
+    std::vector<tensorflow::DataType>* input_dtypes) {
+  std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
+  VLOG(2) << "input edge size: " << input_names->size();
+  for (size_t i = 0; i < input_names->size(); ++i) {
+    VLOG(2) << "input edges: " << i << " " << input_names->at(i);
+    int output_idx = s.input_inds.at(i).second;
+    // we wired up the input here already, it is redundant to do it again in
+    //  ConvertSubGraphToTensorRT(convert_graph.cc)
+    auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(
+        input_names->at(i), output_idx, input_dtypes->at(i));
+    income_edges.push_back(incoming_edge);
+  }
+  tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list(
+      income_edges);
+  op_builder->Input(input_list);
+  return tensorflow::Status::OK();
+}
+
+string SubgraphNameScopeGenerator(const std::list<tensorflow::Node*>* order) {
   string subgraph_name_scope;
-  if (!order.empty()) {
-    subgraph_name_scope = order.front()->name();
+  if (!order->empty()) {
+    subgraph_name_scope = order->front()->name();
   }
-  for (const tensorflow::Node* node : order) {
+  for (const tensorflow::Node* node : *order) {
     subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name());
   }
   // TODO(sami,ben,jie): proper naming!
-  string calib_op_name =
-      StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id);
-  string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id);
-  static_id++;
-  auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance();
-  auto op_rmgr = trt_rmgr->getManager("TRTCalibOps");
-  auto op_res = new tensorflow::tensorrt::TRTCalibrationResource();
-  TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res));
-  op_res->logger_ = new tensorflow::tensorrt::Logger();
-  op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_));
-
-  if (!op_res->builder_) {
-    return tensorflow::errors::Internal(
-        "failed to create TensorRT builder object");
-  }
-
-  op_res->network_ = op_res->builder_->createNetwork();
-  if (!op_res->network_) {
-    return tensorflow::errors::Internal(
-        "failed to create TensorRT network object");
-  }
-
-  // Build the network
-  auto weight_rmgr = trt_rmgr->getManager("WeightStore");
-  auto ws = new tensorflow::tensorrt::TRTWeightStore();
-  TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws));
-  Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE);
+  return subgraph_name_scope;
+}
 
-  std::vector<string> input_names;
-  std::vector<tensorflow::DataType> input_dtypes;
+tensorflow::Status ConvertSubgraph(
+    Converter& converter, tensorrt::convert::SubGraphParams& s,
+    std::list<tensorflow::Node*>* order, std::vector<string>* input_names,
+    std::vector<tensorflow::DataType>* input_dtypes,
+    std::vector<string>* output_names,
+    std::vector<tensorflow::DataType>* output_dtypes,
+    const string& engine_name) {
   for (const std::pair<int, int>& input : s.input_inds) {
     VLOG(2) << "parsing input. Node id= " << input.first;
     int node_id = input.first;
@@ -2309,22 +2374,21 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
 
     auto op_info = op_info_vec.at(shape_inference_output_idx);
     tensorflow::DataType tf_dtype = op_info.dtype();
-    input_dtypes.push_back(tf_dtype);
+    input_dtypes->push_back(tf_dtype);
 
     nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT);
     auto type_status = ConvertDType(tf_dtype, &dtype);
     if (type_status != tensorflow::Status::OK()) {
-      LOG(WARNING) << "Data type conversion for input '" << node_name
-                   << "' failed";
+      LOG(WARNING) << "Type conversion failed for " << node_name;
       return type_status;
     }
 
-    VLOG(2) << "accessing output index of: " << output_idx
+    VLOG(2) << "Accessing output index of: " << output_idx
             << ", at node: " << node_name
-            << "with output entry from shape_map: " << op_info_vec.size();
+            << " with output entry from shape_map: " << op_info_vec.size();
     // TODO(ben,jie): update TRT input format/dimension
-    nvinfer1::DimsCHW input_dim_psuedo_chw;
-    for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1;
+    nvinfer1::DimsCHW input_dim_pseudo_chw;
+    for (int i = 0; i < 3; i++) input_dim_pseudo_chw.d[i] = 1;
 
     // TODO(jie): TRT 3.x only support 4 dimensional input tensor.
     //            update the code once TRT 4.0 comes out.
@@ -2338,7 +2402,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
     for (int i = 1; i < op_info.shape().dim_size(); i++) {
       VLOG(2) << "dimension: " << i
               << " , size: " << op_info.shape().dim(i).size();
-      input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size();
+      input_dim_pseudo_chw.d[i - 1] = op_info.shape().dim(i).size();
     }
 
     // TODO(ben,jie): proper way to restore input tensor name?
@@ -2347,33 +2411,29 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
       input_tensor_name = StrCat(node_name, ":", output_idx);
     }
 
-    input_names.push_back(input_tensor_name);
+    input_names->push_back(input_tensor_name);
     nvinfer1::ITensor* input_tensor = converter.network()->addInput(
-        input_tensor_name.c_str(), dtype, input_dim_psuedo_chw);
+        input_tensor_name.c_str(), dtype, input_dim_pseudo_chw);
 
     if (!input_tensor)
       return tensorflow::errors::InvalidArgument(
           "Failed to create Input layer");
-    VLOG(2) << "input tensor name :" << input_tensor_name;
+    VLOG(2) << "Input tensor name :" << input_tensor_name;
 
     if (!converter.insert_input_tensor(input_tensor_name, input_tensor))
       return tensorflow::errors::AlreadyExists(
-          "output tensor already exists for op: " + input_tensor_name);
+          "Output tensor already exists for op: " + input_tensor_name);
   }
 
-  VLOG(2) << "finished sorting";
-
-  for (const tensorflow::Node* node : order) {
+  for (const tensorflow::Node* node : *order) {
     const tensorflow::NodeDef& node_def = node->def();
-    VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op();
+    VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op();
     TF_RETURN_IF_ERROR(converter.convert_node(node_def));
   }
 
-  VLOG(2) << "finished conversion";
+  VLOG(2) << "Finished conversion";
 
   // Gather output metadata
-  std::vector<string> output_names;
-  std::vector<tensorflow::DataType> output_dtypes;
   int trt_engine_op_output_idx = 0;
   for (const std::pair<int, int>& output : s.output_inds) {
     int node_id = output.first;
@@ -2388,14 +2448,13 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
              : StrCat(engine_name, ":", trt_engine_op_output_idx),
          {output_idx, tensor_name}});
     trt_engine_op_output_idx++;
-    if (output_idx != 0) {
-      tensor_name = StrCat(tensor_name, ":", output_idx);
-    }
-    VLOG(1) << "output tensor name: " << tensor_name;
-    output_names.push_back(tensor_name);
+    if (output_idx != 0)
+      tensorflow::strings::StrAppend(&tensor_name, ":", output_idx);
+    VLOG(2) << "Output tensor name: " << tensor_name;
+    output_names->push_back(tensor_name);
     auto tensor_or_weights = converter.get_tensor(tensor_name);
     if (!tensor_or_weights.is_tensor()) {
-      return tensorflow::errors::InvalidArgument("Output node'" + tensor_name +
+      return tensorflow::errors::InvalidArgument("Output node '" + tensor_name +
                                                  "' is weights not tensor");
     }
     nvinfer1::ITensor* tensor = tensor_or_weights.tensor();
@@ -2405,12 +2464,65 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
     }
     converter.network()->markOutput(*tensor);
     tensorflow::DataType tf_dtype = node->output_type(output_idx);
-    output_dtypes.push_back(tf_dtype);
+    output_dtypes->push_back(tf_dtype);
     nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT;
     TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype));
     tensor->setType(trt_dtype);
   }
 
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
+  // Visit nodes in reverse topological order and construct the TRT network.
+  // Toposort
+  std::list<tensorflow::Node*> order;
+  TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order));
+
+  static int static_id = 0;
+  string subgraph_name_scope = SubgraphNameScopeGenerator(&order);
+  // TODO(sami,ben,jie): proper naming!
+  string calib_op_name =
+      StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id);
+  string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id);
+  static_id++;
+
+  auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance();
+  auto op_rmgr = trt_rmgr->getManager("TRTCalibOps");
+  auto op_res = new tensorflow::tensorrt::TRTCalibrationResource();
+  TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res));
+  op_res->logger_ = new tensorflow::tensorrt::Logger();
+  cudaSetDevice(s.cuda_gpu_id_);
+  op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_));
+  op_res->allocator_ = s.allocator_;
+#if NV_TENSORRT_MAJOR > 3
+  op_res->builder_->setGpuAllocator(s.allocator_.get());
+#endif
+  if (!op_res->builder_) {
+    return tensorflow::errors::Internal(
+        "failed to create TensorRT builder object");
+  }
+
+  op_res->network_ = op_res->builder_->createNetwork();
+  if (!op_res->network_) {
+    return tensorflow::errors::Internal(
+        "failed to create TensorRT network object");
+  }
+
+  // Build the network
+  auto weight_rmgr = trt_rmgr->getManager("WeightStore");
+  auto ws = new tensorflow::tensorrt::TRTWeightStore();
+  TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws));
+  Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE);
+
+  std::vector<string> input_names;
+  std::vector<tensorflow::DataType> input_dtypes;
+  std::vector<string> output_names;
+  std::vector<tensorflow::DataType> output_dtypes;
+  TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names,
+                                     &input_dtypes, &output_names,
+                                     &output_dtypes, engine_name));
+
   VLOG(2) << "Finished processing outputs";
 
   // Build the engine
@@ -2422,21 +2534,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
   // Build the TRT op
   // TODO(sami,ben,jie): proper naming!
   tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp");
-  std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
-  for (size_t i = 0; i < input_names.size(); ++i) {
-    int output_idx = s.input_inds.at(i).second;
-    // we wired up the input here already, it is redundant to do it again in
-    //  ConvertSubGraphToTensorRT(convert_graph.cc)
-    auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(
-        input_names.at(i), output_idx, input_dtypes.at(i));
-    VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i)
-            << ":" << output_idx
-            << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i));
-    income_edges.push_back(incoming_edge);
-  }
-  tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list(
-      income_edges);
-  op_builder.Input(input_list);
+  SetInputList(s, &op_builder, &input_names, &input_dtypes);
+
   std::vector<string> segment_names;
   segment_names.reserve(s.subgraph_node_ids.size());
   for (int i : s.subgraph_node_ids) {
@@ -2460,46 +2559,29 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) {
 tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
     tensorrt::convert::SubGraphParams& s) {
   // Visit nodes in reverse topological order and construct the TRT network.
-
-  // Toposort
-  std::vector<tensorflow::Node*> order_vec;
-  tensorflow::GetPostOrder(s.graph, &order_vec);
-  // Select just the subgraph
   std::list<tensorflow::Node*> order;
-  for (tensorflow::Node* node : order_vec) {
-    if (s.subgraph_node_ids.count(node->id())) {
-      // We want topological order to contstruct the
-      // network layer by layer
-      order.push_front(node);
-    }
-  }
-  // Topological order is needed to build TRT network
+  TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order));
 
-  tensorflow::tensorrt::Logger trt_logger;
+  static int static_id = 0;
+  string subgraph_name_scope = SubgraphNameScopeGenerator(&order);
+  string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id++);
 
+  tensorflow::tensorrt::Logger trt_logger;
+  cudaSetDevice(s.cuda_gpu_id_);
   auto trt_builder = infer_object(nvinfer1::createInferBuilder(trt_logger));
   if (!trt_builder) {
     return tensorflow::errors::Internal(
         "Failed to create TensorRT builder object");
   }
-
+#if NV_TENSORRT_MAJOR > 3
+  trt_builder->setGpuAllocator(s.allocator_.get());
+#endif
   auto trt_network = infer_object(trt_builder->createNetwork());
   if (!trt_network) {
     return tensorflow::errors::Internal(
         "Failed to create TensorRT network object");
   }
 
-  string subgraph_name_scope;
-  if (!order.empty()) {
-    subgraph_name_scope = order.front()->name();
-  }
-  for (const tensorflow::Node* node : order) {
-    subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name());
-  }
-  static int static_id = 0;
-  // TODO(sami,ben,jie): proper naming!
-  string engine_name = StrCat(subgraph_name_scope, "my_trt_op");
-  engine_name = StrCat(engine_name, static_id++);
   auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance();
   auto weight_rmgr = trt_rmgr->getManager("WeightStore");
   auto ws = new tensorflow::tensorrt::TRTWeightStore();
@@ -2510,147 +2592,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
 
   std::vector<string> input_names;
   std::vector<tensorflow::DataType> input_dtypes;
-  for (const std::pair<int, int>& input : s.input_inds) {
-    VLOG(2) << "parsing input. Node id= " << input.first;
-    int node_id = input.first;
-    int output_idx = input.second;
-    tensorflow::Node* node = s.graph.FindNodeId(node_id);
-    auto node_name = node->name();
-    // input_names should use the node name in the graph
-    // here it should be the input tensor name -> matching the binding
-    // insert original node name without port
-    auto tensor_name = node_name;
-    if (output_idx != 0) {
-      tensor_name = StrCat(tensor_name, ":", output_idx);
-    }
-
-    VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name
-            << " idx: " << output_idx;
-
-    auto shape_inference_node_name = node_name;
-    auto shape_inference_output_idx = output_idx;
-    // rewire the shape inference to original node in the graph
-    if (s.output_edge_map->count(tensor_name)) {
-      shape_inference_node_name = s.output_edge_map->at(tensor_name).second;
-      shape_inference_output_idx = s.output_edge_map->at(tensor_name).first;
-    }
-    if (shape_inference_output_idx < 0) continue;
-    VLOG(2) << "shapeinference name: " << shape_inference_node_name
-            << " idx: " << shape_inference_output_idx;
-
-    if (!s.graph_properties.HasOutputProperties(shape_inference_node_name))
-      return tensorflow::errors::Internal("failed to find input node: " +
-                                          shape_inference_node_name);
-
-    auto op_info_vec =
-        s.graph_properties.GetOutputProperties(shape_inference_node_name);
-    if (static_cast<int>(op_info_vec.size()) <= shape_inference_output_idx)
-      return tensorflow::errors::Internal(
-          "accessing output index of: ", shape_inference_output_idx,
-          ", at node: ", shape_inference_node_name,
-          " with output entry from shape_map: ", op_info_vec.size());
-
-    auto op_info = op_info_vec.at(shape_inference_output_idx);
-    tensorflow::DataType tf_dtype = op_info.dtype();
-    input_dtypes.push_back(tf_dtype);
-
-    nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT);
-    auto type_status = ConvertDType(tf_dtype, &dtype);
-    if (type_status != tensorflow::Status::OK()) {
-      LOG(WARNING) << "Type conversion failed for " << node_name;
-      return type_status;
-    }
-
-    VLOG(2) << "Accessing output index of: " << output_idx
-            << ", at node: " << node_name
-            << " with output entry from shape_map: " << op_info_vec.size();
-    // TODO(ben,jie): update TRT input format/dimension
-    nvinfer1::DimsCHW input_dim_psuedo_chw;
-    for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1;
-
-    // TODO(jie): TRT 3.x only support 4 dimensional input tensor.
-    //            update the code once TRT 4.0 comes out.
-    if (op_info.shape().dim_size() != 4) {
-      string err_str = "Require 4 dimensional input.";
-      StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ",
-                shape_inference_node_name);
-      return tensorflow::errors::Unimplemented(err_str);
-    }
-
-    for (int i = 1; i < op_info.shape().dim_size(); i++) {
-      VLOG(2) << "dimension: " << i
-              << " , size: " << op_info.shape().dim(i).size();
-      input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size();
-    }
-
-    // TODO(ben,jie): proper way to restore input tensor name?
-    auto input_tensor_name = node_name;
-    if (output_idx != 0) {
-      input_tensor_name = StrCat(node_name, ":", output_idx);
-    }
-
-    input_names.push_back(input_tensor_name);
-    nvinfer1::ITensor* input_tensor = converter.network()->addInput(
-        input_tensor_name.c_str(), dtype, input_dim_psuedo_chw);
-
-    if (!input_tensor)
-      return tensorflow::errors::InvalidArgument(
-          "Failed to create Input layer");
-    VLOG(2) << "Input tensor name :" << input_tensor_name;
-
-    if (!converter.insert_input_tensor(input_tensor_name, input_tensor))
-      return tensorflow::errors::AlreadyExists(
-          "Output tensor already exists for op: " + input_tensor_name);
-  }
-
-  VLOG(2) << "Finished sorting";
-
-  for (const tensorflow::Node* node : order) {
-    const tensorflow::NodeDef& node_def = node->def();
-    VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op();
-    TF_RETURN_IF_ERROR(converter.convert_node(node_def));
-  }
-
-  VLOG(2) << "Finished conversion";
-
-  // Gather output metadata
   std::vector<string> output_names;
   std::vector<tensorflow::DataType> output_dtypes;
-  int trt_engine_op_output_idx = 0;
-  for (const std::pair<int, int>& output : s.output_inds) {
-    int node_id = output.first;
-    int output_idx = output.second;
-    tensorflow::Node* node = s.graph.FindNodeId(node_id);
-    string op_name = node->name();
-    string tensor_name = op_name;
-
-    s.output_edge_map->insert(
-        {trt_engine_op_output_idx == 0
-             ? engine_name
-             : StrCat(engine_name, ":", trt_engine_op_output_idx),
-         {output_idx, tensor_name}});
-    trt_engine_op_output_idx++;
-    if (output_idx != 0)
-      tensorflow::strings::StrAppend(&tensor_name, ":", output_idx);
-    VLOG(2) << "Output tensor name: " << tensor_name;
-    output_names.push_back(tensor_name);
-    auto tensor_or_weights = converter.get_tensor(tensor_name);
-    if (!tensor_or_weights.is_tensor()) {
-      return tensorflow::errors::InvalidArgument("Output node '" + tensor_name +
-                                                 "' is weights not tensor");
-    }
-    nvinfer1::ITensor* tensor = tensor_or_weights.tensor();
-    if (!tensor) {
-      return tensorflow::errors::NotFound("Output tensor not found: " +
-                                          tensor_name);
-    }
-    converter.network()->markOutput(*tensor);
-    tensorflow::DataType tf_dtype = node->output_type(output_idx);
-    output_dtypes.push_back(tf_dtype);
-    nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT;
-    TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype));
-    tensor->setType(trt_dtype);
-  }
+  TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names,
+                                     &input_dtypes, &output_names,
+                                     &output_dtypes, engine_name));
 
   VLOG(2) << "Finished output";
 
@@ -2686,20 +2632,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
 
   // Build the TRT op
   tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp");
-  std::vector<tensorflow::NodeDefBuilder::NodeOut> income_edges;
-  VLOG(2) << "input edge size: " << input_names.size();
-  for (size_t i = 0; i < input_names.size(); ++i) {
-    VLOG(2) << "input edges: " << i << " " << input_names.at(i);
-    int output_idx = s.input_inds.at(i).second;
-    // we wired up the input here already, it is redundant to do it again in
-    //  ConvertSubGraphToTensorRT(convert_graph.cc)
-    auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(
-        input_names.at(i), output_idx, input_dtypes.at(i));
-    income_edges.push_back(incoming_edge);
-  }
-  tensorflow::gtl::ArraySlice<tensorflow::NodeDefBuilder::NodeOut> input_list(
-      income_edges);
-  op_builder.Input(input_list);
+  SetInputList(s, &op_builder, &input_names, &input_dtypes);
 
   VLOG(0) << "Finished op preparation";
 
@@ -2707,9 +2640,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
                     .Attr("input_nodes", input_names)
                     .Attr("output_nodes", output_names)
                     .Attr("OutT", output_dtypes)
+                    .Device(s.device_name_)
                     .Finalize(s.trt_node);
 
-  VLOG(0) << status.ToString() << " finished op building";
+  VLOG(0) << status.ToString() << " finished op building for " << engine_name
+          << " on device " << s.device_name_;
 
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index 954a1e72f86043..3f6592cd25ff01 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -22,11 +22,11 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/lib/core/status.h"
-
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
 
@@ -48,7 +48,9 @@ struct SubGraphParams {
       const tensorflow::grappler::GraphProperties& current_graph_properties,
       std::unordered_map<string, std::pair<int, string>>* output_edges,
       tensorflow::NodeDef* constructed_trt_node,
-      int engine_precision_mode = FP32MODE)
+      int engine_precision_mode = FP32MODE, const string& device_name = "",
+      std::shared_ptr<nvinfer1::IGpuAllocator> allocator = nullptr,
+      int cuda_gpu_id = 0)
       : graph(inp_graph),
         subgraph_node_ids(subgraph_node_id_numbers),
         input_inds(input_indices),
@@ -58,7 +60,10 @@ struct SubGraphParams {
         graph_properties(current_graph_properties),
         output_edge_map(output_edges),
         trt_node(constructed_trt_node),
-        precision_mode(engine_precision_mode) {}
+        precision_mode(engine_precision_mode),
+        device_name_(device_name),
+        allocator_(allocator),
+        cuda_gpu_id_(cuda_gpu_id) {}
 
   tensorflow::Graph& graph;
   const std::set<int>& subgraph_node_ids;
@@ -70,6 +75,9 @@ struct SubGraphParams {
   std::unordered_map<string, std::pair<int, string>>* output_edge_map;
   tensorflow::NodeDef* trt_node;
   const int precision_mode;
+  const string device_name_;
+  std::shared_ptr<nvinfer1::IGpuAllocator> allocator_;
+  const int cuda_gpu_id_;
 };
 
 // TODO(sami): Replace references with const reference or pointers
diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
new file mode 100644
index 00000000000000..8f634b1f747173
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
@@ -0,0 +1,246 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h"
+#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+namespace tensorflow {
+namespace tensorrt {
+namespace convert {
+// TODO(sami): Remove VLOG messages once the code matures
+using tensorflow::str_util::Uppercase;
+using tensorflow::strings::StrAppend;
+using tensorflow::strings::StrCat;
+
+tensorflow::Status TRTOptimizationPass::Init(
+    const tensorflow::RewriterConfig_CustomGraphOptimizer* config) {
+  VLOG(1) << "Called INIT for " << name_ << " with config = " << config;
+  if (config == nullptr) {
+    maximum_workspace_size_ = 2 << 30;
+    return tensorflow::Status::OK();
+  }
+  const auto params = config->parameter_map();
+  if (params.count("minimum_segment_size")) {
+    minimum_segment_size_ = params.at("minimum_segment_size").i();
+  }
+  if (params.count("max_batch_size")) {
+    maximum_batch_size_ = params.at("max_batch_size").i();
+  }
+  if (params.count("max_workspace_size_bytes"))
+    maximum_workspace_size_ = params.at("max_workspace_size_bytes").i();
+  if (params.count("precision_mode")) {
+    string pm = Uppercase(params.at("precision_mode").s());
+    if (pm == "FP32") {
+      precision_mode_ = 0;
+    } else if (pm == "FP16") {
+      precision_mode_ = 1;
+    } else if (pm == "INT8") {
+      precision_mode_ = 2;
+    } else {
+      LOG(ERROR) << "Unknown precision mode '" << pm << "'";
+      return tensorflow::errors::InvalidArgument(
+          "Unknown precision mode argument" + pm +
+          " Valid values are FP32, FP16, INT8");
+    }
+  }
+  return tensorflow::Status::OK();
+}
+
+void TRTOptimizationPass::PrintDebugInfo(
+    tensorflow::grappler::Cluster* cluster,
+    const tensorflow::grappler::GrapplerItem& item) {
+  VLOG(1) << "Cluster = " << cluster;
+  string offset("  ");
+  string offset2 = StrCat(offset, offset);
+  string offset3 = StrCat(offset2, offset);
+  string offset4 = StrCat(offset2, offset2);
+  if (cluster) {
+    VLOG(1) << offset << "type             = " << cluster->type();
+    VLOG(1) << offset << "num warmup steps = " << cluster->NumWarmupSteps();
+    const auto dev_names = cluster->GetDeviceNames();
+    if (dev_names.size()) {
+      VLOG(1) << offset << " Device names:";
+      for (const auto s : dev_names) {
+        VLOG(1) << offset2 << s;
+      }
+    }
+    std::unordered_map<string, uint64> peak_mem;
+    auto status = cluster->GetPeakMemoryUsage(&peak_mem);
+    if (status == tensorflow::Status::OK()) {
+      VLOG(1) << offset << "Peak Memory Usage :";
+      for (auto s : peak_mem) {
+        VLOG(1) << offset2 << s.first << " = " << s.second;
+      }
+    }
+
+    const auto dev_props = cluster->GetDevices();
+    if (dev_props.size()) {
+      VLOG(1) << offset << "Device properties:";
+      for (auto k : dev_props) {
+        VLOG(1) << offset2 << k.first;
+        const auto& dt = k.second;
+        VLOG(1) << offset3 << "type          = " << dt.type();
+        VLOG(1) << offset3 << "vendor        = " << dt.vendor();
+        VLOG(1) << offset3 << "model         = " << dt.model();
+        VLOG(1) << offset3 << "frequency     = " << dt.frequency();
+        VLOG(1) << offset3 << "num cores     = " << dt.num_cores();
+        VLOG(1) << offset3 << "num registers = " << dt.num_registers();
+        VLOG(1) << offset3 << "L1 cache size = " << dt.l1_cache_size();
+        VLOG(1) << offset3 << "L2 cache size = " << dt.l2_cache_size();
+        VLOG(1) << offset3 << "L3 cache size = " << dt.l3_cache_size();
+        VLOG(1) << offset3 << "SHMem per SMP = "
+                << dt.shared_memory_size_per_multiprocessor();
+        VLOG(1) << offset3 << "memory size   = " << dt.memory_size();
+        VLOG(1) << offset3 << "bandwidth     = " << dt.bandwidth();
+        if (dt.environment_size()) {
+          VLOG(1) << offset3 << "environment   :";
+          for (const auto e : dt.environment()) {
+            VLOG(1) << offset4 << e.first << " = " << e.second;
+          }
+        }
+      }
+    }
+  }
+  VLOG(1) << "item: " << item.id;
+  if (item.feed.size()) {
+    VLOG(1) << offset << "Feeds  :";
+    for (const auto& f : item.feed) {
+      const auto& shape = f.second.shape();
+      VLOG(1) << offset2 << f.first << " = shaped " << shape.DebugString();
+    }
+  } else {
+    VLOG(1) << offset << "No Feeds";
+  }
+  if (item.fetch.size()) {
+    VLOG(1) << offset << "Fetches  :";
+    for (const auto& f : item.fetch) {
+      VLOG(1) << offset2 << f;
+    }
+  } else {
+    VLOG(1) << offset << "No Fetches";
+  }
+
+  if (item.init_ops.size()) {
+    VLOG(1) << offset << "init ops  :";
+    for (const auto& f : item.init_ops) {
+      VLOG(1) << offset2 << f;
+    }
+  } else {
+    VLOG(1) << offset << "No init ops";
+  }
+  VLOG(1) << "Save Op = " << item.save_op;
+  VLOG(1) << "Restore Op = " << item.restore_op;
+  VLOG(1) << "save_restore_loc_tensor = " << item.save_restore_loc_tensor;
+  if (item.keep_ops.size()) {
+    VLOG(1) << offset << "keep ops  :";
+    for (const auto& f : item.keep_ops) {
+      VLOG(1) << offset2 << f;
+    }
+  } else {
+    VLOG(1) << offset << "No keep ops";
+  }
+  VLOG(3) << item.graph.DebugString();
+  for (const auto dev : cluster->GetDeviceSet()->devices()) {
+    const auto& pname = dev->parsed_name();
+    VLOG(1) << "Device name= " << dev->name()
+            << " parsedname job= " << pname.job << " id= " << pname.id
+            << " has_id: " << pname.has_id << " has_job: " << pname.has_job
+            << "has_type: " << pname.has_type << " type =" << pname.type;
+  }
+}
+
+tensorflow::Status TRTOptimizationPass::Optimize(
+    tensorflow::grappler::Cluster* cluster,
+    const tensorflow::grappler::GrapplerItem& item, GraphDef* optimized_graph) {
+  VLOG(1) << "Called TRTOptimization Pass " << name_;
+  if (VLOG_IS_ON(1)) {
+    PrintDebugInfo(cluster, item);
+  }
+  int max_dim = -1;
+  if (item.feed.size()) {
+    for (const auto& f : item.feed) {
+      const auto& shape = f.second.shape();
+      if (shape.dims() > 0) {
+        if (shape.dim_size(0) > max_dim) max_dim = shape.dim_size(0);
+      }
+    }
+  }
+  if (maximum_batch_size_ < 0) {  // automatic batch size from input
+    if (max_dim > 0) {
+      maximum_batch_size_ = max_dim;
+      VLOG(1) << "Setting maximum batch size to " << max_dim;
+    } else {
+      maximum_batch_size_ = 128;
+      LOG(WARNING) << "Maximum batch size is not set"
+                      " and can't be deduced from inputs setting it to"
+                   << maximum_batch_size_
+                   << ". Suggest configuring it from configuration parameters";
+    }
+  } else {
+    if (max_dim > maximum_batch_size_) {
+      LOG(WARNING) << "Configured batch size " << maximum_batch_size_
+                   << " is less than input batch size " << max_dim
+                   << " adjusting maximum batch size to match input batch size";
+    }
+  }
+  tensorflow::grappler::GraphProperties static_graph_properties(item);
+  TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true));
+  auto status = tensorflow::tensorrt::convert::ConvertAfterShapes(
+      item.graph, item.fetch, maximum_batch_size_, maximum_workspace_size_,
+      optimized_graph, precision_mode_, minimum_segment_size_,
+      static_graph_properties, cluster);
+  VLOG(2) << optimized_graph->DebugString();
+  return status;
+}
+
+void TRTOptimizationPass::Feedback(
+    tensorflow::grappler::Cluster* cluster,
+    const tensorflow::grappler::GrapplerItem& item,
+    const GraphDef& optimized_graph, double result) {}
+
+}  // namespace convert
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+class VerboseCustomGraphOptimizerRegistrar
+    : public tensorflow::grappler::CustomGraphOptimizerRegistrar {
+ public:
+  VerboseCustomGraphOptimizerRegistrar(
+      const tensorflow::grappler::CustomGraphOptimizerRegistry::Creator& cr,
+      const tensorflow::string& name)
+      : tensorflow::grappler::CustomGraphOptimizerRegistrar(cr, name) {
+    VLOG(1) << "Constructing a CustomOptimizationPass registration object for "
+            << name;
+  }
+};
+
+static VerboseCustomGraphOptimizerRegistrar TRTOptimizationPass_Registrar(
+    []() {
+      VLOG(1)
+          << "Instantiating CustomOptimizationPass object TensorRTOptimizer";
+      return new tensorflow::tensorrt::convert::TRTOptimizationPass(
+          "TensorRTOptimizer");
+    },
+    ("TensorRTOptimizer"));
+
+#endif
+#endif
diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h
new file mode 100644
index 00000000000000..d8ecead23efaa5
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h
@@ -0,0 +1,73 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_
+
+#include <string>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+#include "tensorflow/core/platform/logging.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+namespace convert {
+
+class TRTOptimizationPass : public tensorflow::grappler::CustomGraphOptimizer {
+ public:
+  TRTOptimizationPass(const string& name = "TRTOptimizationPass")
+      : name_(name),
+        minimum_segment_size_(3),
+        precision_mode_(0),
+        maximum_batch_size_(-1),
+        maximum_workspace_size_(-1) {
+    VLOG(1) << "Constructing " << name_;
+  }
+
+  string name() const override { return name_; };
+
+  tensorflow::Status Init(const tensorflow::RewriterConfig_CustomGraphOptimizer*
+                              config = nullptr) override;
+
+  tensorflow::Status Optimize(tensorflow::grappler::Cluster* cluster,
+                              const tensorflow::grappler::GrapplerItem& item,
+                              GraphDef* optimized_graph) override;
+
+  void Feedback(tensorflow::grappler::Cluster* cluster,
+                const tensorflow::grappler::GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override;
+
+  void PrintDebugInfo(tensorflow::grappler::Cluster* cluster,
+                      const tensorflow::grappler::GrapplerItem& item);
+
+ private:
+  string name_;
+  int minimum_segment_size_;
+  int precision_mode_;
+  int maximum_batch_size_;
+  int64_t maximum_workspace_size_;
+};
+
+}  // namespace convert
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD b/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD
new file mode 100644
index 00000000000000..a89cf3ab8bfaec
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD
@@ -0,0 +1,118 @@
+# Description:
+#   Example for plugin support in TensorRT(http://developer.nvidia.com/tensorrt)
+#   through TensorFlow integration. Targeting TensorRT 3.0.4
+#   APIs are meant to change while upgrading TRT.
+#   add init_py into pip package BUILD dependency to install it.
+
+package(default_visibility = ["//tensorflow:__subpackages__"])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_custom_op_library",
+    "tf_custom_op_library_additional_deps",
+    "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
+)
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+load(
+    "@local_config_tensorrt//:build_defs.bzl",
+    "if_tensorrt",
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["inc_op"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "inc_op",
+    deps = [":inc_op_op_lib"],
+)
+
+tf_custom_op_library(
+    name = "_inc_op.so",
+    srcs = [
+        "inc_op_kernel.h",
+        "inc_op_plugin.cc",
+        "inc_op_plugin.h",
+        "ops/inc_op.cc",
+    ],
+    gpu_srcs = [
+        "inc_op_kernel.h",
+        "inc_op_kernel.cu.cc",
+    ],
+    deps = [
+        "//tensorflow/contrib/tensorrt:trt_plugins",
+        "//tensorflow/core:framework_lite",
+    ] + if_tensorrt([
+        "@local_config_tensorrt//:nv_infer",
+    ]),
+)
+
+tf_kernel_library(
+    name = "inc_op_plugin_kernel",
+    srcs = ["inc_op_plugin.cc"],
+    hdrs = [
+        "inc_op_kernel.h",
+        "inc_op_plugin.h",
+    ],
+    gpu_srcs = [
+        "inc_op_kernel.h",
+        "inc_op_kernel.cu.cc",
+    ],
+    deps = [
+        "//tensorflow/contrib/tensorrt:trt_plugins",
+        "//tensorflow/core:stream_executor_headers_lib",
+    ] + if_tensorrt([
+        "@local_config_tensorrt//:nv_infer",
+    ]) + tf_custom_op_library_additional_deps(),
+)
+
+tf_custom_op_py_library(
+    name = "inc_op_loader",
+    srcs = ["inc_op.py"],
+    dso = [
+        ":_inc_op.so",
+    ],
+    kernels = [
+        ":inc_op_op_lib",
+        ":inc_op_plugin_kernel",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:resources",
+    ],
+)
+
+py_library(
+    name = "init_py",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":inc_op",
+        ":inc_op_loader",
+    ],
+)
+
+cuda_py_test(
+    name = "plugin_test",
+    size = "small",
+    srcs = ["plugin_test.py"],
+    additional_deps = [
+        ":init_py",
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/contrib/tensorrt:init_py",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:tf_optimizer",
+    ],
+    tags = [
+        "manual",
+        "noguitar",
+        "notap",
+    ],
+)
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py
new file mode 100644
index 00000000000000..363edab2e80ada
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py
@@ -0,0 +1,24 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Import custom op for plugin and register it in plugin factory registry."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tensorrt.custom_plugin_examples import inc_op as import_inc_op_so
+from tensorflow.contrib.tensorrt.custom_plugin_examples.ops import gen_inc_op
+
+inc_op = gen_inc_op.inc_plugin_trt
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py
new file mode 100644
index 00000000000000..a007c3f54e208b
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py
@@ -0,0 +1,32 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Loader for the custom inc_op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import platform
+
+if platform.system() != "Windows":
+  # pylint: disable=g-import-not-at-top
+  from tensorflow.contrib.util import loader
+  from tensorflow.python.platform import resource_loader
+  # pylint: enable=g-import-not-at-top
+
+  _inc_op = loader.load_op_library(
+      resource_loader.get_path_to_datafile("_inc_op.so"))
+else:
+  raise RuntimeError("Windows not supported")
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc
new file mode 100644
index 00000000000000..988b35f74f3989
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc
@@ -0,0 +1,84 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h"
+
+#include <vector>
+
+#include "tensorflow/core/framework/op_kernel.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "cuda/include/cuda_runtime_api.h"
+#include "tensorflow/core/platform/stream_executor.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+__global__ void VecInc(const float* vec, float inc, float* dest, int n) {
+  int i = blockDim.x * blockIdx.x + threadIdx.x;
+  if (i < n) dest[i] = vec[i] + inc;
+}
+
+void IncrementKernel(const float* d_input, float inc, float* d_output,
+                     int count, cudaStream_t stream) {
+  int threads_per_block = 256;
+  int blocks_per_grid = (count + threads_per_block - 1) / threads_per_block;
+
+  VecInc<<<threads_per_block, blocks_per_grid, 0, stream>>>(d_input, inc,
+                                                            d_output, count);
+}
+
+// Note: this kernel definition is not needed in the plugin_test rule, but it is
+// required for correctness of the TF program, i.e. if not using plugin or when
+// run with trt optimization pass, the test should work.
+class IncPluginTRT : public OpKernel {
+ public:
+  explicit IncPluginTRT(OpKernelConstruction* context) : OpKernel(context) {
+    std::vector<float> inc_list;
+    OP_REQUIRES_OK(context, context->GetAttr("inc", &inc_list));
+    OP_REQUIRES(context, inc_list.size() == 1,
+                errors::InvalidArgument(
+                    "The increment list should contain single element."));
+    inc_ = inc_list[0];
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input_tensor = context->input(0);
+    const TensorShape& input_shape = input_tensor.shape();
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input_shape, &output_tensor));
+    const cudaStream_t* stream = CHECK_NOTNULL(
+        reinterpret_cast<const cudaStream_t*>(context->op_device_context()
+                                                  ->stream()
+                                                  ->implementation()
+                                                  ->CudaStreamMemberHack()));
+    IncrementKernel(input_tensor.flat<float>().data(), inc_,
+                    output_tensor->flat<float>().data(),
+                    input_shape.num_elements(), *stream);
+  }
+
+ private:
+  float inc_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("IncPluginTRT").Device(DEVICE_GPU), IncPluginTRT);
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h
new file mode 100644
index 00000000000000..c35955e105798b
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h
@@ -0,0 +1,35 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "cuda/include/cuda_runtime_api.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+void IncrementKernel(const float* d_input, float inc, float* d_output,
+                     int count, cudaStream_t stream);
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc
new file mode 100644
index 00000000000000..8d4c893af56689
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc
@@ -0,0 +1,86 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h"
+
+#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+
+const char* kPluginName = "IncPluginTRT";
+
+IncOpPlugin* CreateIncPlugin() { return new IncOpPlugin(); }
+
+IncOpPlugin* CreateIncPluginDeserialize(const void* buffer, size_t length) {
+  return new IncOpPlugin(buffer, length);
+}
+
+REGISTER_TRT_PLUGIN(kPluginName, CreateIncPluginDeserialize, CreateIncPlugin);
+
+IncOpPlugin::IncOpPlugin() : plugin_name_(kPluginName) {}
+
+IncOpPlugin::IncOpPlugin(const void* serialized_data, size_t length)
+    : PluginTensorRT(serialized_data, length), plugin_name_(kPluginName) {
+  // account for the consumed pointer.
+  size_t consumed_data = PluginTensorRT::getSerializationSize();
+  assert(length - consumed_data >= sizeof(float));
+  const char* buffer = reinterpret_cast<const char*>(serialized_data);
+  SetAttribute("inc", buffer + consumed_data, sizeof(float));
+}
+
+bool IncOpPlugin::SetAttribute(const string& key, const void* ptr,
+                               const size_t size) {
+  if (strcmp(key.c_str(), "inc") == 0 && size == sizeof(float)) {
+    StoreAttribute(key, ptr, size);  // save the attribute to own the data;
+    inc_ = *static_cast<const float*>(ptr);
+    return true;
+  }
+  return false;
+}
+
+bool IncOpPlugin::GetAttribute(const string& key, const void** ptr,
+                               size_t* size) const {
+  const auto& iter = attr_map_.find(key);
+  if (iter != attr_map_.end()) {
+    *ptr = iter->second.data();
+    *size = iter->second.size();
+    return true;
+  }
+  return false;
+}
+
+int IncOpPlugin::enqueue(int batch_size, const void* const* inputs,
+                         void** outputs, void*, cudaStream_t stream) {
+  int count = 1;
+  for (int i = 0; i < input_dim_list_[0].nbDims; i++) {
+    count *= input_dim_list_[0].d[i];
+  }
+  count *= batch_size;
+  const float* input = reinterpret_cast<const float*>(inputs[0]);
+  float* output = reinterpret_cast<float*>(outputs[0]);
+  IncrementKernel(input, inc_, output, count, stream);
+  return 0;
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h
new file mode 100644
index 00000000000000..189e9c939b9ffd
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h
@@ -0,0 +1,102 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_
+
+#include <cassert>
+#include <cstring>
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+class IncOpPlugin : public PluginTensorRT {
+ public:
+  IncOpPlugin();
+
+  IncOpPlugin(const void* serialized_data, size_t length);
+
+  const string& GetPluginName() const override { return plugin_name_; };
+
+  bool Finalize() override { return true; };
+
+  bool SetAttribute(const string& key, const void* ptr,
+                    const size_t size) override;
+
+  bool GetAttribute(const string& key, const void** ptr,
+                    size_t* size) const override;
+
+  int getNbOutputs() const override { return 1; }
+
+  nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
+                                     int num_input_dims) override {
+    assert(index == 0);
+    assert(num_input_dims == 1);
+    return inputs[0];
+  }
+
+  // use configure to setup input dimensions
+  void configure(const nvinfer1::Dims* inputs, int num_inputs,
+                 const nvinfer1::Dims* outputs, int num_outputs,
+                 int max_batch_size) override {
+    assert(num_inputs == 1);
+    PluginTensorRT::configure(inputs, num_inputs, outputs, num_outputs,
+                              max_batch_size);
+  }
+
+  int initialize() override { return 0; }
+
+  void terminate() override {}
+
+  size_t getWorkspaceSize(int max_batch_size) const override { return 0; }
+
+  int enqueue(int batch_size, const void* const* inputs, void** outputs,
+              void* workspace, cudaStream_t stream) override;
+
+  size_t getSerializationSize() override {
+    return PluginTensorRT::getSerializationSize() + sizeof(float);
+  }
+
+  void serialize(void* buffer) override {
+    // Serialize parent data.
+    PluginTensorRT::serialize(buffer);
+    // Incremented buffer after parent serialization.
+    buffer =
+        static_cast<char*>(buffer) + PluginTensorRT::getSerializationSize();
+    std::memcpy(buffer, &inc_, sizeof(float));
+    buffer = static_cast<char*>(buffer) + sizeof(float);
+  }
+
+ protected:
+  float inc_;
+  nvinfer1::Dims dim_;
+
+ private:
+  const string plugin_name_;
+};
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc
new file mode 100644
index 00000000000000..d0eb0d299dd61d
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc
@@ -0,0 +1,36 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+
+REGISTER_OP("IncPluginTRT")
+    .Attr("inc: list(float)")
+    .Input("input: float32")
+    .Output("output: float32")
+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py
new file mode 100644
index 00000000000000..bc4d270bec4fb8
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py
@@ -0,0 +1,95 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to show usage of TensorRT custom op & plugin."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy
+
+from tensorflow.contrib import tensorrt
+from tensorflow.contrib.tensorrt import custom_plugin_examples
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.client import session
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import importer
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+
+
+class TrtPluginTest(test_util.TensorFlowTestCase):
+
+  def _get_plugin_graph_def(self):
+    """Create a simple graph and return its graph_def."""
+    g = ops.Graph()
+    with g.as_default():
+      a = array_ops.placeholder(
+          dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input")
+      relu = nn.relu(a, "relu")
+      v = nn_ops.max_pool(
+          relu, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool")
+
+      # insert custom_op in the graph
+      v = custom_plugin_examples.inc_op(v, inc=[16.5], name="plugin_test")
+
+      v *= 2.0
+      v = nn.relu(v)
+      v = nn.relu(v)
+      array_ops.squeeze(v, name="output")
+    return g.as_graph_def()
+
+  def _run_graph(self, gdef, dumm_inp):
+    """Run given graphdef once."""
+    gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
+    ops.reset_default_graph()
+    g = ops.Graph()
+    with g.as_default():
+      inp, out = importer.import_graph_def(
+          graph_def=gdef, return_elements=["input", "output"])
+      inp = inp.outputs[0]
+      out = out.outputs[0]
+
+    with session.Session(
+        config=config_pb2.ConfigProto(gpu_options=gpu_options),
+        graph=g) as sess:
+      val = sess.run(out, {inp: dumm_inp})
+    return val
+
+  def testIncOpPlugin(self):
+    inp_dims = (5, 24, 24, 2)
+    dummy_input = numpy.ones(inp_dims).astype(numpy.float32)
+    orig_graph = self._get_plugin_graph_def()  # graph with plugin node
+
+    # trigger conversion.
+    # plugin nodes have been registered during import, converter will be able to
+    # create corresponding plugin layer during conversion.
+    trt_graph = tensorrt.create_inference_graph(
+        input_graph_def=orig_graph,
+        outputs=["output"],
+        max_batch_size=inp_dims[0],
+        max_workspace_size_bytes=1 << 25,
+        precision_mode="FP32",
+        minimum_segment_size=2)
+    o2 = self._run_graph(trt_graph, dummy_input)
+    self.assertEqual(35, o2.reshape([-1])[0])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
index b8f881ceb16a48..9ac80479448741 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
@@ -15,6 +15,7 @@ limitations under the License.
 #include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h"
 
 #include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
@@ -32,38 +33,40 @@ namespace tensorrt {
 
 TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) {
   // read serialized_engine
-  string serialized_engine;
   OP_REQUIRES_OK(context,
-                 context->GetAttr("serialized_engine", &serialized_engine));
+                 context->GetAttr("serialized_engine", &serialized_engine_));
 
   // register input output node name in trt_sub_graph
   OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_));
   OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_));
+}
 
-  // TODO(samikama) runtime should be taken from a resourcemanager as well.
-  // Only engine should be in the op and context and runtime should be taken
-  // from resourcemanager
-  // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same
-  // gpu where the input/output is also located.
-  int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id;
-  cudaSetDevice(gpu_id);
-  int device;
-  cudaGetDevice(&device);
-  if (gpu_id != device) LOG(FATAL) << "set device failed!";
-
+void TRTEngineOp::Compute(OpKernelContext* context) {
   // TODO(samikama) runtime should be taken from a resourcemanager as well.
   // Only engine should be in the op and context and runtime should be taken
   // from resourcemanager
 
-  IRuntime* infer = nvinfer1::createInferRuntime(logger);
-  trt_engine_ptr_.reset(infer->deserializeCudaEngine(
-      serialized_engine.c_str(), serialized_engine.size(), nullptr));
-  trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext());
-  // Runtime is safe to delete after engine creation
-  infer->destroy();
-}
-
-void TRTEngineOp::Compute(OpKernelContext* context) {
+  if (!trt_execution_context_ptr_) {
+    IRuntime* infer = nvinfer1::createInferRuntime(logger);
+#if NV_TENSORRT_MAJOR > 3
+    auto device = context->device();
+    auto dev_allocator =
+        device->GetAllocator(tensorflow::AllocatorAttributes());
+    if (!dev_allocator) {
+      LOG(FATAL) << "Can't find device allocator for gpu device "
+                 << device->name();
+    }
+    allocator_ = std::make_shared<TRTDeviceAllocator>(dev_allocator);
+    infer->setGpuAllocator(allocator_.get());
+#endif
+    trt_engine_ptr_.reset(infer->deserializeCudaEngine(
+        serialized_engine_.c_str(), serialized_engine_.size(),
+        PluginFactoryTensorRT::GetInstance()));
+    trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext());
+    // Runtime is safe to delete after engine creation
+    infer->destroy();
+    serialized_engine_.clear();
+  }
   int num_binding = context->num_inputs() + context->num_outputs();
   std::vector<void*> buffers(num_binding);
 
@@ -154,7 +157,12 @@ void TRTEngineOp::Compute(OpKernelContext* context) {
   VLOG(2) << "enqueue returns: " << ret;
   // sync should be done by TF.
 }
-
+TRTEngineOp::~TRTEngineOp() {
+  // Order matters!
+  trt_execution_context_ptr_.reset();
+  trt_engine_ptr_.reset();
+  allocator_.reset();
+}
 REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp);
 
 }  // namespace tensorrt
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
index 0964b4b18a7811..e613a71422852e 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
@@ -17,25 +17,28 @@ limitations under the License.
 #define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_ENGINE_OP_H_
 
 #include <memory>
-#include <string>
 #include <vector>
 
+#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
 #include "cuda/include/cuda_runtime_api.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorrt/include/NvInfer.h"
 
 namespace tensorflow {
 namespace tensorrt {
 class Logger;
 
+//  TODO(Sami): Remove this file?
 class TRTEngineOp : public OpKernel {
  public:
   explicit TRTEngineOp(OpKernelConstruction* context);
 
   void Compute(OpKernelContext* context) override;
+  ~TRTEngineOp();
 
  private:
   template <typename T>
@@ -51,6 +54,8 @@ class TRTEngineOp : public OpKernel {
 
   std::vector<string> input_nodes_;
   std::vector<string> output_nodes_;
+  std::shared_ptr<nvinfer1::IGpuAllocator> allocator_;
+  string serialized_engine_;
 };
 
 }  // namespace tensorrt
diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h
index 7f3544f8cfda8d..96ccacb791e401 100644
--- a/tensorflow/contrib/tensorrt/log/trt_logger.h
+++ b/tensorflow/contrib/tensorrt/log/trt_logger.h
@@ -28,7 +28,7 @@ namespace tensorrt {
 // Logger for GIE info/warning/errors
 class Logger : public nvinfer1::ILogger {
  public:
-  Logger(string name = "DefaultLogger") : name_(name){};
+  Logger(string name = "DefaultLogger") : name_(name) {}
   void log(nvinfer1::ILogger::Severity severity, const char* msg) override;
 
  private:
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc
new file mode 100644
index 00000000000000..062f86e8bb4dc7
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc
@@ -0,0 +1,106 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h"
+#include <cassert>
+#include <cstring>
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+
+PluginTensorRT::PluginTensorRT(const void* serialized_data, size_t length) {
+  const char* buffer = static_cast<const char*>(serialized_data);
+  size_t op_name_char_count = *reinterpret_cast<const size_t*>(buffer);
+  buffer += sizeof(size_t);
+  buffer += op_name_char_count;
+
+  size_t count = *reinterpret_cast<const size_t*>(buffer);
+  buffer += sizeof(size_t);
+
+  for (int i = 0; i < count; i++) {
+    nvinfer1::Dims dim;
+    std::memcpy(&(dim.nbDims), buffer, sizeof(dim.nbDims));
+    buffer += sizeof(dim.nbDims);
+    std::memcpy(dim.d, buffer, sizeof(dim.d));
+    buffer += sizeof(dim.d);
+    std::memcpy(dim.type, buffer, sizeof(dim.type));
+    buffer += sizeof(dim.type);
+    input_dim_list_.emplace_back(dim);
+  }
+}
+
+void PluginTensorRT::configure(const nvinfer1::Dims* inputs, int num_inputs,
+                               const nvinfer1::Dims* outputs, int num_outputs,
+                               int max_batch_size) {
+  for (int index = 0; index < num_inputs; index++) {
+    nvinfer1::Dims dim;
+    dim.nbDims = inputs[index].nbDims;
+    for (int i = 0; i < dim.nbDims; i++) {
+      dim.d[i] = inputs[index].d[i];
+      dim.type[i] = inputs[index].type[i];
+    }
+    input_dim_list_.emplace_back(dim);
+  }
+}
+
+size_t PluginTensorRT::getSerializationSize() {
+  nvinfer1::Dims dim;
+  return sizeof(size_t) + GetPluginName().size() +
+         sizeof(input_dim_list_.size()) + sizeof(dim.nbDims) + sizeof(dim.d) +
+         sizeof(dim.type);
+}
+
+void PluginTensorRT::serialize(void* serialized_data) {
+  size_t op_name_size = GetPluginName().size();
+  char* buffer = static_cast<char*>(serialized_data);
+  std::memcpy(buffer, &op_name_size, sizeof(size_t));
+  buffer += sizeof(size_t);
+
+  std::memcpy(buffer, GetPluginName().data(), op_name_size);
+  buffer += op_name_size;
+
+  auto list_size = input_dim_list_.size();
+  std::memcpy(buffer, &list_size, sizeof(input_dim_list_.size()));
+  buffer += sizeof(input_dim_list_.size());
+
+  for (int i = 0; i < input_dim_list_.size(); i++) {
+    auto dim = input_dim_list_[i];
+    std::memcpy(buffer, &(dim.nbDims), sizeof(dim.nbDims));
+    buffer += sizeof(dim.nbDims);
+    std::memcpy(buffer, dim.d, sizeof(dim.d));
+    buffer += sizeof(dim.d);
+    std::memcpy(buffer, dim.type, sizeof(dim.type));
+    buffer += sizeof(dim.type);
+  }
+}
+
+bool PluginTensorRT::StoreAttribute(const string& key, const void* ptr,
+                                    const size_t size) {
+  if (attr_map_.count(key) != 0) return false;
+
+  attr_map_.emplace(key, std::vector<char>(size));
+  std::memcpy(attr_map_[key].data(), ptr, size);
+  return true;
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin.h
new file mode 100644
index 00000000000000..754920b60ca743
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin.h
@@ -0,0 +1,74 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_
+
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/core/platform/types.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+// A wrapper class for TensorRT plugin
+// User application should inherit from this class to write custom kernels.
+// Allows user to insert custom op in TensorRT engine
+// To register plugin in converter, user should also register custom
+// PluginDeserializeFunc & PluginConstructFunc through PluginFactoryTensorRT
+class PluginTensorRT : public nvinfer1::IPlugin {
+ public:
+  PluginTensorRT() {}
+  PluginTensorRT(const void* serialized_data, size_t length);
+
+  virtual const string& GetPluginName() const = 0;
+
+  virtual bool Finalize() = 0;
+
+  virtual bool SetAttribute(const string& key, const void* ptr,
+                            const size_t size) = 0;
+  virtual bool GetAttribute(const string& key, const void** ptr,
+                            size_t* size) const = 0;
+
+  void configure(const nvinfer1::Dims* inputs, int num_inputs,
+                 const nvinfer1::Dims* outputs, int num_outputs,
+                 int max_batch_size) override;
+
+  virtual bool StoreAttribute(const string& key, const void* ptr,
+                              const size_t size);
+
+  size_t getSerializationSize() override;
+
+  void serialize(void* buffer) override;
+
+ protected:
+  std::unordered_map<string, std::vector<char> > attr_map_;
+
+  std::vector<nvinfer1::Dims> input_dim_list_;
+};
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc
new file mode 100644
index 00000000000000..2bc591484dcaf5
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc
@@ -0,0 +1,78 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+
+PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name,
+                                                    const void* serial_data,
+                                                    size_t serial_length) {
+  size_t parsed_byte = 0;
+  // extract op_name from serial_data
+  string encoded_op_name =
+      ExtractOpName(serial_data, serial_length, &parsed_byte);
+
+  if (!IsPlugin(encoded_op_name)) {
+    return nullptr;
+  }
+
+  tensorflow::mutex_lock lock(instance_m_);
+  auto plugin_ptr =
+      plugin_registry_[encoded_op_name].first(serial_data, serial_length);
+  owned_plugins_.emplace_back(plugin_ptr);
+
+  return plugin_ptr;
+}
+
+PluginTensorRT* PluginFactoryTensorRT::CreatePlugin(const string& op_name) {
+  if (!IsPlugin(op_name)) return nullptr;
+
+  tensorflow::mutex_lock lock(instance_m_);
+  auto plugin_ptr = plugin_registry_[op_name].second();
+  owned_plugins_.emplace_back(plugin_ptr);
+
+  return plugin_ptr;
+}
+
+bool PluginFactoryTensorRT::RegisterPlugin(
+    const string& op_name, PluginDeserializeFunc deserialize_func,
+    PluginConstructFunc construct_func) {
+  if (IsPlugin(op_name)) return false;
+
+  tensorflow::mutex_lock lock(instance_m_);
+  auto ret = plugin_registry_.emplace(
+      op_name, std::make_pair(deserialize_func, construct_func));
+
+  return ret.second;
+}
+
+void PluginFactoryTensorRT::DestroyPlugins() {
+  tensorflow::mutex_lock lock(instance_m_);
+  for (auto& owned_plugin_ptr : owned_plugins_) {
+    owned_plugin_ptr.release();
+  }
+  owned_plugins_.clear();
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h
new file mode 100644
index 00000000000000..bbae9fb65c22cf
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h
@@ -0,0 +1,102 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_
+
+#include <memory>
+#include <unordered_map>
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+class PluginFactoryTensorRT : public nvinfer1::IPluginFactory {
+ public:
+  // TODO(aaroey): this static method has to be inlined to make the singleton a
+  // unique global symbol. Find a way to fix it.
+  static PluginFactoryTensorRT* GetInstance() {
+    static PluginFactoryTensorRT* factory_instance =
+        new PluginFactoryTensorRT();
+    return factory_instance;
+  }
+
+  // Deserialization method
+  PluginTensorRT* createPlugin(const char* layer_name, const void* serial_data,
+                               size_t serial_length) override;
+
+  // Plugin construction, PluginFactoryTensorRT owns the plugin.
+  PluginTensorRT* CreatePlugin(const string& op_name);
+
+  bool RegisterPlugin(const string& op_name,
+                      PluginDeserializeFunc deserialize_func,
+                      PluginConstructFunc construct_func);
+
+  bool IsPlugin(const string& op_name) {
+    return plugin_registry_.find(op_name) != plugin_registry_.end();
+  }
+
+  size_t CountOwnedPlugins() { return owned_plugins_.size(); }
+
+  void DestroyPlugins();
+
+ protected:
+  std::unordered_map<string,
+                     std::pair<PluginDeserializeFunc, PluginConstructFunc>>
+      plugin_registry_;
+
+  // TODO(jie): Owned plugin should be associated with different sessions;
+  //            should really hand ownership of plugins to resource management;
+  std::vector<std::unique_ptr<PluginTensorRT>> owned_plugins_;
+  tensorflow::mutex instance_m_;
+};
+
+class TrtPluginRegistrar {
+ public:
+  TrtPluginRegistrar(const string& name, PluginDeserializeFunc deserialize_func,
+                     PluginConstructFunc construct_func) {
+    auto factory = PluginFactoryTensorRT::GetInstance();
+    QCHECK(factory->RegisterPlugin(name, deserialize_func, construct_func))
+        << "Failed to register plugin: " << name;
+  }
+};
+
+#define REGISTER_TRT_PLUGIN(name, deserialize_func, construct_func)    \
+  REGISTER_TRT_PLUGIN_UNIQ_HELPER(__COUNTER__, name, deserialize_func, \
+                                  construct_func)
+#define REGISTER_TRT_PLUGIN_UNIQ_HELPER(ctr, name, deserialize_func, \
+                                        construct_func)              \
+  REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func, construct_func)
+#define REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func, construct_func) \
+  static ::tensorflow::tensorrt::TrtPluginRegistrar trt_plugin_registrar##ctr \
+      TF_ATTRIBUTE_UNUSED = ::tensorflow::tensorrt::TrtPluginRegistrar(       \
+          name, deserialize_func, construct_func)
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc
new file mode 100644
index 00000000000000..129bdcdbc2f8d9
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc
@@ -0,0 +1,125 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
+
+namespace tensorflow {
+namespace tensorrt {
+namespace test {
+
+class StubPlugin : public PluginTensorRT {
+ public:
+  static const char* kPluginName;
+
+  StubPlugin() : plugin_name_(kPluginName) {}
+
+  StubPlugin(const void* serialized_data, size_t length)
+      : PluginTensorRT(serialized_data, length) {}
+
+  const string& GetPluginName() const override { return plugin_name_; }
+
+  bool Finalize() override { return true; }
+
+  bool SetAttribute(const string& key, const void* ptr,
+                    const size_t size) override {
+    return true;
+  }
+
+  bool GetAttribute(const string& key, const void** ptr,
+                    size_t* size) const override {
+    return true;
+  }
+
+  int getNbOutputs() const override { return 1; }
+
+  nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
+                                     int nbInputDims) override {
+    return inputs[0];
+  }
+
+  int initialize() override { return 0; }
+
+  void terminate() override {}
+
+  size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }
+
+  int enqueue(int batch_size, const void* const* inputs, void** outputs,
+              void* workspace, cudaStream_t stream) override {
+    return 0;
+  }
+
+ private:
+  const string plugin_name_;
+};
+
+const char* StubPlugin::kPluginName = "StubPlugin";
+
+StubPlugin* CreateStubPlugin() { return new StubPlugin(); }
+
+StubPlugin* CreateStubPluginDeserialize(const void* serialized_data,
+                                        size_t length) {
+  return new StubPlugin(serialized_data, length);
+}
+
+class TrtPluginFactoryTest : public ::testing::Test {
+ public:
+  bool RegisterStubPlugin() {
+    if (PluginFactoryTensorRT::GetInstance()->IsPlugin(
+            StubPlugin::kPluginName)) {
+      return true;
+    }
+    return PluginFactoryTensorRT::GetInstance()->RegisterPlugin(
+        StubPlugin::kPluginName, CreateStubPluginDeserialize, CreateStubPlugin);
+  }
+};
+
+TEST_F(TrtPluginFactoryTest, Registration) {
+  EXPECT_FALSE(
+      PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName));
+  EXPECT_TRUE(RegisterStubPlugin());
+
+  ASSERT_TRUE(
+      PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName));
+}
+
+TEST_F(TrtPluginFactoryTest, CreationDeletion) {
+  EXPECT_TRUE(RegisterStubPlugin());
+  ASSERT_TRUE(
+      PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName));
+
+  PluginFactoryTensorRT::GetInstance()->DestroyPlugins();
+  ASSERT_TRUE(PluginFactoryTensorRT::GetInstance()->CreatePlugin(
+      StubPlugin::kPluginName));
+  ASSERT_EQ(1, PluginFactoryTensorRT::GetInstance()->CountOwnedPlugins());
+  PluginFactoryTensorRT::GetInstance()->DestroyPlugins();
+  ASSERT_EQ(0, PluginFactoryTensorRT::GetInstance()->CountOwnedPlugins());
+}
+
+}  // namespace test
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc
new file mode 100644
index 00000000000000..a8f60886c03c17
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h"
+#include <cassert>
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+namespace tensorflow {
+namespace tensorrt {
+
+string ExtractOpName(const void* serial_data, size_t serial_length,
+                     size_t* incremental) {
+  size_t op_name_char_count = *static_cast<const size_t*>(serial_data);
+  *incremental = sizeof(size_t) + op_name_char_count;
+
+  assert(serial_length >= *incremental);
+
+  const char* buffer = static_cast<const char*>(serial_data) + sizeof(size_t);
+  string op_name(buffer, op_name_char_count);
+
+  return op_name;
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_TENSORRT
diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h
new file mode 100644
index 00000000000000..274ce42fec9283
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_
+
+#include <functional>
+
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h"
+#include "tensorflow/core/platform/types.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+typedef std::function<PluginTensorRT*(const void*, size_t)>
+    PluginDeserializeFunc;
+
+typedef std::function<PluginTensorRT*(void)> PluginConstructFunc;
+
+// TODO(jie): work on error handling here
+string ExtractOpName(const void* serial_data, size_t serial_length,
+                     size_t* incremental);
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
new file mode 100644
index 00000000000000..0f0508331c1305
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@@ -0,0 +1,62 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
+
+#include "tensorflow/core/platform/logging.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
+#if NV_TENSORRT_MAJOR > 2
+#include "cuda/include/cuda_runtime_api.h"
+
+namespace tensorflow {
+namespace tensorrt {
+void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
+                                 uint32_t flags) {
+  assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
+  void* memory;
+  cudaMalloc(&memory, size);
+  return memory;
+}
+
+void TRTCudaAllocator::free(void* memory) { cudaFree(memory); }
+
+void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
+                                   uint32_t flags) {
+  assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
+  void* mem = allocator_->AllocateRaw(alignment, size);
+  VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment
+          << " @ " << mem;
+  return mem;
+}
+
+TRTDeviceAllocator::TRTDeviceAllocator(tensorflow::Allocator* allocator)
+    : allocator_(allocator) {
+  VLOG(1) << "Using " << allocator->Name() << " allocator from TensorFlow";
+}
+
+void TRTDeviceAllocator::free(void* memory) {
+  VLOG(2) << "Deallocating " << memory;
+  allocator_->DeallocateRaw(memory);
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif
+#endif
+#endif
diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
new file mode 100644
index 00000000000000..a0c2540a7698bc
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@@ -0,0 +1,68 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
+
+
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include "tensorflow/core/framework/allocator.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
+
+#if NV_TENSORRT_MAJOR == 3
+// Define interface here temporarily until TRT 4.0 is released
+namespace nvinfer1 {
+class IGpuAllocator {
+ public:
+  virtual void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0;
+  virtual void free(void* memory) = 0;
+};
+}  // namespace nvinfer1
+#endif
+
+namespace tensorflow {
+namespace tensorrt {
+
+class TRTCudaAllocator : public nvinfer1::IGpuAllocator {
+  // Allocator implementation that is using cuda allocator instead of device
+  // allocator in case we can't get device allocator from TF.
+ public:
+  TRTCudaAllocator() {}
+  virtual ~TRTCudaAllocator() {}
+  void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override;
+  void free(void* memory) override;
+};
+
+class TRTDeviceAllocator : public nvinfer1::IGpuAllocator {
+  // Allocator implementation wrapping TF device allocators.
+ public:
+  TRTDeviceAllocator(tensorflow::Allocator* allocator);
+  virtual ~TRTDeviceAllocator() {}
+  void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override;
+  void free(void* memory) override;
+
+ private:
+  tensorflow::Allocator* allocator_;
+};
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h
index 3c85968ae7acf5..e3469124acd4b9 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_resources.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h
@@ -13,20 +13,23 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_
-#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_
 
 #include <list>
 #include <sstream>
 #include <string>
 #include <thread>
 #include <vector>
+
 #include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
+#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
-#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
+
 #include "tensorrt/include/NvInfer.h"
 
 namespace tensorflow {
@@ -40,6 +43,11 @@ class TRTCalibrationResource : public tensorflow::ResourceBase {
         engine_(nullptr),
         logger_(nullptr),
         thr_(nullptr) {}
+
+  ~TRTCalibrationResource() {
+    VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString();
+  }
+
   string DebugString() override {
     std::stringstream oss;
     oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl
@@ -47,16 +55,17 @@ class TRTCalibrationResource : public tensorflow::ResourceBase {
         << " Network    = " << std::hex << network_ << std::dec << std::endl
         << " Engine     = " << std::hex << engine_ << std::dec << std::endl
         << " Logger     = " << std::hex << logger_ << std::dec << std::endl
+        << " Allocator  = " << std::hex << allocator_.get() << std::dec
+        << std::endl
         << " Thread     = " << std::hex << thr_ << std::dec << std::endl;
     return oss.str();
   }
-  ~TRTCalibrationResource() {
-    VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString();
-  }
+
   TRTInt8Calibrator* calibrator_;
   nvinfer1::IBuilder* builder_;
   nvinfer1::INetworkDefinition* network_;
   nvinfer1::ICudaEngine* engine_;
+  std::shared_ptr<nvinfer1::IGpuAllocator> allocator_;
   tensorflow::tensorrt::Logger* logger_;
   // TODO(sami): Use threadpool threads!
   std::thread* thr_;
@@ -65,31 +74,28 @@ class TRTCalibrationResource : public tensorflow::ResourceBase {
 class TRTWeightStore : public tensorflow::ResourceBase {
  public:
   TRTWeightStore() {}
-  std::list<std::vector<uint8_t>> store_;
+
+  virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); }
+
   string DebugString() override {
     std::stringstream oss;
-    size_t lenBytes = 0;
+    size_t len_bytes = 0;
     for (const auto& v : store_) {
-      lenBytes += v.size() * sizeof(uint8_t);
+      len_bytes += v.size() * sizeof(uint8_t);
     }
     oss << " Number of entries     = " << store_.size() << std::endl
         << " Total number of bytes = "
-        << store_.size() * sizeof(std::vector<uint8_t>) + lenBytes << std::endl;
+        << store_.size() * sizeof(std::vector<uint8_t>) + len_bytes
+        << std::endl;
     return oss.str();
   }
-  virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); }
-};
 
-class TRTEngineResource : public tensorflow::ResourceBase {
- public:
-  TRTEngineResource() : runtime_(nullptr), ctx_(nullptr){};
-  string DebugString() override { return string(""); }
-  nvinfer1::IRuntime* runtime_;
-  nvinfer1::IExecutionContext* ctx_;
+  std::list<std::vector<uint8_t>> store_;
 };
 
 }  // namespace tensorrt
 }  // namespace tensorflow
-#endif  // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_
+
 #endif
 #endif
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_
diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc
index 8fc4697c513057..cc42913ecadc3e 100644
--- a/tensorflow/contrib/tensorrt/segment/segment.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment.cc
@@ -25,18 +25,239 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace tensorrt {
 namespace segment {
+using ::tensorflow::strings::StrAppend;
+// A simple graph representation to mirror tensorflow::Graph. This structure
+// helps saving memory since segmenter modifies the graph in place, preventing
+// the need to create a copy of the graph. It is composed of edges and nodes.
+// Nodes keep pointers to original TF nodes.
+class SimpleNode;
+class SimpleGraph;
+class SimpleEdge {
+ public:
+  SimpleEdge(int id, SimpleNode* src, int src_port, SimpleNode* dst,
+             int dst_port, bool is_control = false)
+      : id_(id),
+        src_(src),
+        src_port_(src_port),
+        dst_(dst),
+        dst_port_(dst_port),
+        control_(is_control) {}
+  ~SimpleEdge() {}
+
+  SimpleNode* src() const { return src_; }
+  SimpleNode* dst() const { return dst_; }
+  int src_output() const { return src_port_; }
+  int dst_input() const { return dst_port_; }
+  int id() const { return id_; }
+  bool IsControlEdge() const { return control_; }
+
+ private:
+  int id_;
+  SimpleNode* src_;
+  int src_port_;
+  SimpleNode* dst_;
+  int dst_port_;
+  bool control_;
+};
+
+class SimpleNode {
+ public:
+  SimpleNode(const tensorflow::Node* node, const int id);
+
+  const std::vector<SimpleEdge*>& in_edges() const { return in_edges_; }
+  const std::vector<SimpleEdge*>& out_edges() const { return out_edges_; }
+  std::vector<SimpleNode*> in_nodes() const {
+    std::vector<SimpleNode*> res;
+    res.reserve(in_edges_.size());
+    for (const auto e : in_edges_) {
+      if (e) res.push_back(e->src());
+    }
+    return res;
+  }
+  const string& name() const { return node_->name(); }
+  const tensorflow::Node* tf_node() const { return node_; }
+  int id() const { return id_; }
+
+ private:
+  const tensorflow::Node* node_;
+  std::vector<SimpleEdge*> in_edges_;
+  std::vector<SimpleEdge*> out_edges_;
+  int id_;
+
+  friend class SimpleGraph;
+};
+
+class SimpleGraph {
+ public:
+  explicit SimpleGraph(const tensorflow::Graph* g);
+  ~SimpleGraph();
+
+  void AddControlEdge(SimpleNode* src, SimpleNode* dst);
+  void AddEdge(SimpleNode* src, int out_port, SimpleNode* dst, int in_port);
+  void RemoveEdge(const SimpleEdge*);
+  SimpleNode* FindNodeId(int node_id) {
+    if (node_id < 0 || node_id > static_cast<int>(nodes_.size())) {
+      return nullptr;
+    }
+    return nodes_[node_id];
+  }
+  int num_node_ids() const { return nodes_.size(); }
+  const SimpleNode* source_node() const {
+    return nodes_[tensorflow::Graph::kSourceId];
+  }
+  const SimpleNode* sink_node() const {
+    return nodes_[tensorflow::Graph::kSinkId];
+  }
+
+ private:
+  const tensorflow::Graph* g_;
+  std::vector<SimpleNode*> nodes_;
+  std::vector<SimpleEdge*> edges_;
+  // free_edge_ids_ and free_node_ids_ contain freed indices.
+  std::set<int> free_edge_ids_;
+  std::set<int> free_node_ids_;
+};
+
+SimpleNode::SimpleNode(const tensorflow::Node* node, const int id)
+    : node_(node), id_(id) {
+  if (node_) {
+    in_edges_.reserve(node_->in_edges().size());
+    out_edges_.reserve(node_->out_edges().size());
+  }
+}
+
+SimpleGraph::SimpleGraph(const tensorflow::Graph* g) : g_(g) {
+  int n_nodes = g_->num_node_ids();
+  nodes_.resize(n_nodes, nullptr);
+  nodes_[g->kSourceId] = new SimpleNode(g->source_node(), g->kSourceId);
+  nodes_[g->kSinkId] = new SimpleNode(g->sink_node(), g->kSinkId);
+  int n_edges = g->num_edge_ids();
+  edges_.resize(n_edges, nullptr);
+  for (int i = 2; i < n_nodes; i++) {
+    const auto n = g->FindNodeId(i);
+    if (n) {
+      nodes_[i] = new SimpleNode(n, i);
+    } else {
+      free_node_ids_.insert(i);
+    }
+  }
+  for (int i = 0; i < n_edges; i++) {
+    const auto e = g->FindEdgeId(i);
+    if (e) {
+      const auto tfsrc = e->src();
+      const auto tfdst = e->dst();
+      bool is_control = e->IsControlEdge();
+      auto src = nodes_[tfsrc->id()];
+      auto dst = nodes_[tfdst->id()];
+      auto edge = new SimpleEdge(i, src, e->src_output(), dst, e->dst_input(),
+                                 is_control);
+      edges_[i] = edge;
+      src->out_edges_.push_back(edge);
+      dst->in_edges_.push_back(edge);
+    } else {
+      free_edge_ids_.insert(i);
+    }
+  }
+}
+
+void SimpleGraph::AddEdge(SimpleNode* src, int out_port, SimpleNode* dst,
+                          int in_port) {
+  int i = edges_.size();
+  if (!free_edge_ids_.empty()) {
+    auto it = free_edge_ids_.begin();
+    i = *it;
+    free_edge_ids_.erase(it);
+  } else {
+    edges_.push_back(nullptr);
+  }
+  bool is_control = (out_port == tensorflow::Graph::kControlSlot);
+  is_control |= (in_port == tensorflow::Graph::kControlSlot);
+  auto edge = new SimpleEdge(i, src, out_port, dst, in_port, is_control);
+  edges_[i] = edge;
+  src->out_edges_.push_back(edge);
+  dst->in_edges_.push_back(edge);
+}
+
+void SimpleGraph::AddControlEdge(SimpleNode* src, SimpleNode* dst) {
+  AddEdge(src, tensorflow::Graph::kControlSlot, dst,
+          tensorflow::Graph::kControlSlot);
+}
+
+void SimpleGraph::RemoveEdge(const SimpleEdge* edge) {
+  auto src = edge->src();
+  auto dst = edge->dst();
+  for (auto it = src->out_edges_.begin(); it != src->out_edges_.end(); ++it) {
+    if (*it == edge) {
+      src->out_edges_.erase(it);
+      break;
+    }
+  }
+  for (auto it = dst->in_edges_.begin(); it != dst->in_edges_.end(); ++it) {
+    if (*it == edge) {
+      dst->in_edges_.erase(it);
+      break;
+    }
+  }
+}
+
+SimpleGraph::~SimpleGraph() {
+  for (auto x : nodes_) delete x;
+  for (auto x : edges_) delete x;
+}
 
 namespace {
 
-bool CanContractEdge(const tensorflow::Edge* edge,
-                     const tensorflow::Graph& graph) {
-  const tensorflow::Node* src = edge->src();
-  const tensorflow::Node* dst = edge->dst();
+bool CheckCycles(const std::unique_ptr<SimpleGraph>& g, const SimpleNode* src,
+                 const std::vector<SimpleNode*>& start) {
+  // copied from TF ReverseDFS.
+  struct Work {
+    SimpleNode* node;
+    bool leave;  // Are we entering or leaving n?
+  };
+
+  std::vector<Work> stack(start.size());
+  for (int i = 0; i < start.size(); ++i) {
+    stack[i] = Work{start[i], false};
+  }
+
+  std::vector<bool> visited(g->num_node_ids(), false);
+  while (!stack.empty()) {
+    Work w = stack.back();
+    stack.pop_back();
+
+    auto n = w.node;
+    if (w.leave) {
+      if (n == src) {
+        return true;
+      }
+      continue;
+    }
+
+    if (visited[n->id()]) continue;
+    visited[n->id()] = true;
+    // Arrange to call leave(n) when all done with descendants.
+    stack.push_back(Work{n, true});
+
+    auto nodes = n->in_nodes();
+    for (const auto node : nodes) {
+      if (!visited[node->id()]) {
+        stack.push_back(Work{node, false});
+      }
+    }
+  }
+  return false;
+}
+
+bool CanContractEdge(const SimpleEdge* edge,
+                     const std::unique_ptr<SimpleGraph>& graph) {
+  const auto src = edge->src();
+  const auto dst = edge->dst();
 
   // Can't contract edge if doing so would cause a cycle in the
   // graph. So, if there is a directed path from 'src' to 'dst', other
@@ -48,46 +269,38 @@ bool CanContractEdge(const tensorflow::Edge* edge,
   //   1. Get all nodes incoming to 'dst', excluding 'src'
   //   2. Reverse DFS from those nodes
   //   3. If reverse DFS reaches 'src' then we have a cycle
-  std::vector<tensorflow::Node*> dfs_start_nodes;
-  for (tensorflow::Node* node : dst->in_nodes()) {
+  std::vector<SimpleNode*> dfs_start_nodes;
+  for (SimpleNode* node : dst->in_nodes()) {
     if (node != src) {
       dfs_start_nodes.push_back(node);
     }
   }
 
-  bool is_cycle = false;
-  if (!dfs_start_nodes.empty()) {
-    tensorflow::ReverseDFSFrom(graph, dfs_start_nodes, {},
-                               [&is_cycle, src](tensorflow::Node* node) {
-                                 if (node == src) {
-                                   is_cycle = true;
-                                 }
-                               });
-  }
-
+  bool is_cycle = CheckCycles(graph, src, dfs_start_nodes);
   return !is_cycle;
 }
+}  // namespace
 
-void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph,
-                  std::vector<const tensorflow::Edge*>* remove_edges) {
+void ContractEdge(SimpleEdge* edge, SimpleGraph* graph,
+                  std::vector<const SimpleEdge*>* remove_edges) {
   // Transfer all inputs and outputs of 'dst' to 'src' except edges
   // connecting the two.
-  tensorflow::Node* src = edge->src();
-  tensorflow::Node* dst = edge->dst();
+  auto src = edge->src();
+  auto dst = edge->dst();
 
   // We can use '0' for input/output index because we don't need them
   // to be accurate for the way we are using the graph.
-  std::vector<const tensorflow::Edge*> in_edges(dst->in_edges().begin(),
-                                                dst->in_edges().end());
-  for (const tensorflow::Edge* in_edge : in_edges) {
+  std::vector<const SimpleEdge*> in_edges(dst->in_edges().begin(),
+                                          dst->in_edges().end());
+  for (const SimpleEdge* in_edge : in_edges) {
     if (in_edge->IsControlEdge()) {
       if (in_edge->src() != src) {
-        tensorflow::Edge* e = const_cast<tensorflow::Edge*>(in_edge);
+        SimpleEdge* e = const_cast<SimpleEdge*>(in_edge);
         graph->AddControlEdge(e->src(), src);
       }
     } else {
       if (in_edge->src() != src) {
-        tensorflow::Edge* e = const_cast<tensorflow::Edge*>(in_edge);
+        SimpleEdge* e = const_cast<SimpleEdge*>(in_edge);
         if (e->src() == graph->source_node()) {
           graph->AddEdge(e->src(), e->src_output(), src,
                          tensorflow::Graph::kControlSlot);
@@ -98,14 +311,14 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph,
     }
   }
 
-  std::vector<const tensorflow::Edge*> out_edges(dst->out_edges().begin(),
-                                                 dst->out_edges().end());
-  for (const tensorflow::Edge* out_edge : out_edges) {
+  std::vector<const SimpleEdge*> out_edges(dst->out_edges().begin(),
+                                           dst->out_edges().end());
+  for (const SimpleEdge* out_edge : out_edges) {
     if (out_edge->IsControlEdge()) {
-      tensorflow::Edge* e = const_cast<tensorflow::Edge*>(out_edge);
+      SimpleEdge* e = const_cast<SimpleEdge*>(out_edge);
       graph->AddControlEdge(src, e->dst());
     } else {
-      tensorflow::Edge* e = const_cast<tensorflow::Edge*>(out_edge);
+      SimpleEdge* e = const_cast<SimpleEdge*>(out_edge);
       if (e->dst() == graph->sink_node()) {
         VLOG(1) << " edge to sink node " << src->name() << " -> "
                 << e->dst()->name();
@@ -128,8 +341,6 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph,
   }
 }
 
-}  // namespace
-
 tensorflow::Status SegmentGraph(
     const tensorflow::GraphDef& gdef,
     const std::function<bool(const tensorflow::Node*)>& candidate_fn,
@@ -140,17 +351,22 @@ tensorflow::Status SegmentGraph(
   tensorflow::Graph graph(flib);
   TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph(
       tensorflow::GraphConstructorOptions(), gdef, &graph));
+  return SegmentGraph(&graph, candidate_fn, options, segments);
+}
 
-  // tensorflow::DumpGraph("Pre-Segment", &graph);
-
+tensorflow::Status SegmentGraph(
+    tensorflow::Graph* tf_graph,
+    const std::function<bool(const tensorflow::Node*)>& candidate_fn,
+    const SegmentOptions& options, SegmentNodesVector* segments) {
+  auto graph = std::unique_ptr<SimpleGraph>(new SimpleGraph(tf_graph));
   // Use a union-find to collect the nodes that belong to the same
-  // segment. A node value of nullptr indicates that the node is not a
-  // candidate for TRT.
-  std::vector<UnionFind<tensorflow::Node*>> node_segments;
-  for (int i = 0; i < graph.num_node_ids(); ++i) {
-    tensorflow::Node* node = graph.FindNodeId(i);
+  // segment. A node value of nullptr indicates that the node is not a candidate
+  // for TRT.
+  std::vector<UnionFind<SimpleNode*>> node_segments;
+  for (int i = 0; i < graph->num_node_ids(); ++i) {
+    SimpleNode* node = graph->FindNodeId(i);
     if (options.exclude_node_list.count(node->name()) != 0 ||
-        !candidate_fn(node)) {
+        !candidate_fn(node->tf_node())) {
       node = nullptr;
     }
     node_segments.emplace_back(node);
@@ -164,10 +380,16 @@ tensorflow::Status SegmentGraph(
   // a measure of how beneficial it is to include a given node in a
   // TRT subgraph then we can revisit this algorithm to take advantage
   // of that information.
-  std::vector<tensorflow::Node*> order;
-  tensorflow::GetPostOrder(graph, &order);
-
-  for (const tensorflow::Node* node : order) {
+  std::vector<tensorflow::Node*> tforder;
+  tensorflow::GetPostOrder(*tf_graph, &tforder);
+  // use postorder implementation from tensorflow and construct mirror in
+  // internal format
+  std::vector<SimpleNode*> order;
+  order.reserve(tforder.size());
+  for (const auto tfnode : tforder) {
+    order.push_back(graph->FindNodeId(tfnode->id()));
+  }
+  for (const SimpleNode* node : order) {
     // All output nodes of 'node' have been visited...
     VLOG(2) << "Trying node " << node->name() << " id=" << node->id();
 
@@ -181,8 +403,8 @@ tensorflow::Status SegmentGraph(
     // nodes. Iterate since combining two nodes may unblock other
     // combining.
     while (true) {
-      std::set<const tensorflow::Edge*> contract_edges;
-      for (const tensorflow::Edge* out_edge : node->out_edges()) {
+      std::set<const SimpleEdge*> contract_edges;
+      for (const SimpleEdge* out_edge : node->out_edges()) {
         VLOG(2) << "... out node " << out_edge->dst()->name() << " ( "
                 << out_edge->dst()->id() << " <- " << node->id() << " )";
         if (out_edge->IsControlEdge()) {
@@ -210,9 +432,9 @@ tensorflow::Status SegmentGraph(
       // Contract edges and collect the adjacent nodes into the same
       // segment/subgraph.
       while (!contract_edges.empty()) {
-        const tensorflow::Edge* contract_edge = *contract_edges.begin();
-        const tensorflow::Node* src = contract_edge->src();
-        const tensorflow::Node* dst = contract_edge->dst();
+        const SimpleEdge* contract_edge = *contract_edges.begin();
+        const SimpleNode* src = contract_edge->src();
+        const SimpleNode* dst = contract_edge->dst();
 
         VLOG(2) << "Merge " << src->name() << " <- " << dst->name() << " ("
                 << src->id() << " <- " << dst->id();
@@ -221,13 +443,13 @@ tensorflow::Status SegmentGraph(
         // Contracting the edge leaves disconnected graph edges.
         // Remove these from the graph and from 'contract_edges' so we
         // don't visit them again.
-        tensorflow::Edge* e = const_cast<tensorflow::Edge*>(contract_edge);
-        std::vector<const tensorflow::Edge*> remove_edges;
-        ContractEdge(e, &graph, &remove_edges);
+        SimpleEdge* e = const_cast<SimpleEdge*>(contract_edge);
+        std::vector<const SimpleEdge*> remove_edges;
+        ContractEdge(e, graph.get(), &remove_edges);
 
-        for (const tensorflow::Edge* r : remove_edges) {
+        for (const SimpleEdge* r : remove_edges) {
           contract_edges.erase(r);
-          graph.RemoveEdge(r);
+          graph->RemoveEdge(r);
         }
       }
     }
@@ -236,9 +458,27 @@ tensorflow::Status SegmentGraph(
   // Collect the segments/subgraphs. Each subgraph is represented by a
   // set of the names of the nodes in that subgraph.
   std::unordered_map<string, std::set<string>> sg_map;
+  std::unordered_map<string, std::set<string>> device_maps;
   for (auto& u : node_segments) {
     if ((u.Value() != nullptr) && (u.ParentValue() != nullptr)) {
       sg_map[u.ParentValue()->name()].insert(u.Value()->name());
+      auto tf_node = u.Value()->tf_node();
+      // has_assigned_device_name() is expected to return true
+      // when called from optimization pass. However, since graph
+      // is converted back and forth between graph and graphdef,
+      // assigned devices demoted to requested devices. If the graph
+      // is passed directly to this module, assigned devices will be set.
+      if (tf_node->has_assigned_device_name()) {
+        device_maps[u.ParentValue()->name()].insert(
+            tf_node->assigned_device_name());
+      } else if (!tf_node->requested_device().empty()) {
+        device_maps[u.ParentValue()->name()].insert(
+            tf_node->requested_device());
+      } else {
+        VLOG(1) << "Node " << tf_node->name()
+                << " has no device assigned requested device is: "
+                << tf_node->requested_device();
+      }
     }
   }
 
@@ -260,10 +500,35 @@ tensorflow::Status SegmentGraph(
               << segment_node_names.size() << " nodes, dropping";
       continue;
     }
-
-    segments->emplace_back(segment_node_names);
+    // TODO(sami): Make segmenter placement aware once trtscopes are in place
+    const auto& dev_itr = device_maps.find(itr.first);
+    if (dev_itr == device_maps.end() || dev_itr->second.empty()) {
+      VLOG(1) << "No device assigned to segment " << segments->size();
+      segments->emplace_back(std::make_pair(segment_node_names, string()));
+    } else if (dev_itr->second.size() > 1) {
+      string s("Segment ");
+      StrAppend(&s, segments->size(), " has multiple devices attached: ");
+      for (const auto& dev : dev_itr->second) {
+        StrAppend(&s, dev, ", ");
+      }
+      LOG(WARNING) << s << " choosing " << *(dev_itr->second.begin());
+      segments->emplace_back(
+          std::make_pair(segment_node_names, *(dev_itr->second.begin())));
+    } else {
+      segments->emplace_back(
+          std::make_pair(segment_node_names, *(dev_itr->second.begin())));
+    }
+  }
+  if (VLOG_IS_ON(1)) {
+    for (const auto& d : device_maps) {
+      string s("Segment ");
+      StrAppend(&s, ": '", d.first, "' ");
+      for (const auto& dd : d.second) {
+        StrAppend(&s, dd, ", ");
+      }
+      VLOG(1) << "Devices " << s;
+    }
   }
-
   return tensorflow::Status::OK();
 }
 
diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h
index 7e8685f44a8c8a..1568dd915344e6 100644
--- a/tensorflow/contrib/tensorrt/segment/segment.h
+++ b/tensorflow/contrib/tensorrt/segment/segment.h
@@ -29,7 +29,9 @@ namespace tensorflow {
 namespace tensorrt {
 namespace segment {
 
-using SegmentNodesVector = std::vector<std::set<string>>;
+// vector of segments, each entry contains a device name and a set of nodes in
+// segment
+using SegmentNodesVector = std::vector<std::pair<std::set<string>, string>>;
 
 struct SegmentOptions {
   // Segment must contain at least this many nodes.
@@ -51,6 +53,20 @@ tensorflow::Status SegmentGraph(
     const std::function<bool(const tensorflow::Node*)>& candidate_fn,
     const SegmentOptions& options, SegmentNodesVector* segments);
 
+// Get the subgraphs of a graph that can be handled by TensorRT.
+//
+// @param graph tensorflow::Graph of the network
+// @param candidate_fn A function that returns true for a Node* if
+// that node can be handled by TensorRT.
+// @param segments Returns the TensorRT segments/subgraphs. Each entry
+// in the vector describes a subgraph by giving a set of the names of
+// all the NodeDefs in that subgraph.
+// @return the status.
+tensorflow::Status SegmentGraph(
+    tensorflow::Graph* tf_graph,
+    const std::function<bool(const tensorflow::Node*)>& candidate_fn,
+    const SegmentOptions& options, SegmentNodesVector* segments);
+
 }  // namespace segment
 }  // namespace tensorrt
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/segment/segment_test.cc b/tensorflow/contrib/tensorrt/segment/segment_test.cc
index 6f7655fcabeec0..2de3923b06a8dd 100644
--- a/tensorflow/contrib/tensorrt/segment/segment_test.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc
@@ -34,7 +34,7 @@ class SegmentTest : public ::testing::Test {
   TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
                     TF_Status* s, const char* name);
 
-  std::function<bool(const Node*)> MakeCandidateFn(
+  std::function<bool(const tensorflow::Node*)> MakeCandidateFn(
       const std::set<string>& node_names);
 
  protected:
@@ -59,9 +59,9 @@ bool SegmentTest::GetGraphDef(TF_Graph* graph,
   return ret;
 }
 
-std::function<bool(const Node*)> SegmentTest::MakeCandidateFn(
+std::function<bool(const tensorflow::Node*)> SegmentTest::MakeCandidateFn(
     const std::set<string>& node_names) {
-  return [node_names](const Node* node) -> bool {
+  return [node_names](const tensorflow::Node* node) -> bool {
     return node_names.find(node->name()) != node_names.end();
   };
 }
@@ -164,7 +164,7 @@ TEST_F(SegmentTest, Simple) {
   ASSERT_EQ(segments.size(), 1);
   std::vector<string> expected{"add0", "add1", "add2", "add3", "add4"};
   for (const auto& ex : expected) {
-    EXPECT_TRUE(segments[0].find(ex) != segments[0].end())
+    EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end())
         << "Missing expected node " << ex;
   }
   TF_DeleteGraph(graph);
@@ -277,13 +277,13 @@ TEST_F(SegmentTest, Multiple) {
 
   std::vector<string> expected0{"add0", "add1", "add2", "add3"};
   for (const auto& ex : expected0) {
-    EXPECT_TRUE(segments[0].find(ex) != segments[0].end())
+    EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end())
         << "Missing expected node " << ex;
   }
 
   std::vector<string> expected1{"add6", "add8"};
   for (const auto& ex : expected1) {
-    EXPECT_TRUE(segments[1].find(ex) != segments[1].end())
+    EXPECT_TRUE(segments[1].first.find(ex) != segments[1].first.end())
         << "Missing expected node " << ex;
   }
   TF_DeleteGraph(graph);
@@ -347,13 +347,13 @@ TEST_F(SegmentTest, BigIfElse) {
 
   std::vector<string> expected0{"add3", "add4", "add5", "add6", "add7"};
   for (const auto& ex : expected0) {
-    EXPECT_TRUE(segments[0].find(ex) != segments[0].end())
+    EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end())
         << "Missing expected node " << ex;
   }
 
   std::vector<string> expected1{"add0", "add1"};
   for (const auto& ex : expected1) {
-    EXPECT_TRUE(segments[1].find(ex) != segments[1].end())
+    EXPECT_TRUE(segments[1].first.find(ex) != segments[1].first.end())
         << "Missing expected node " << ex;
   }
   TF_DeleteGraph(graph);
diff --git a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc
index 8b475177bc670d..f36495f6b69ecb 100644
--- a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc
+++ b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/tensorrt/shape_fn/trt_shfn.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
 
 #include <string>
 #include <vector>
@@ -33,7 +34,8 @@ tensorflow::Status TRTEngineOpShapeInference(InferenceContext* context) {
   TF_RETURN_IF_ERROR(context->GetAttr("serialized_engine", &serialized_engine));
   nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger);
   nvinfer1::ICudaEngine* trt_engine = infer->deserializeCudaEngine(
-      serialized_engine.c_str(), serialized_engine.size(), nullptr);
+      serialized_engine.c_str(), serialized_engine.size(),
+      tensorrt::PluginFactoryTensorRT::GetInstance());
 
   int num_batch = -1;
   std::vector<::tensorflow::DataType> input_type;
diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py
index ad01bedd8fa066..175ccd80068625 100644
--- a/tensorflow/contrib/tensorrt/test/test_tftrt.py
+++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py
@@ -18,7 +18,9 @@
 from __future__ import division
 from __future__ import print_function
 
+import argparse
 import numpy as np
+
 # normally we should do import tensorflow as tf and then
 # tf.placeholder, tf.constant, tf.nn.conv2d etc but
 # it looks like internal builds don't like it so
@@ -26,6 +28,7 @@
 
 from tensorflow.contrib import tensorrt as trt
 from tensorflow.core.protobuf import config_pb2 as cpb2
+from tensorflow.core.protobuf import rewriter_config_pb2 as rwpb2
 from tensorflow.python.client import session as csess
 from tensorflow.python.framework import constant_op as cop
 from tensorflow.python.framework import dtypes as dtypes
@@ -59,9 +62,11 @@ def get_simple_graph_def():
   return g.as_graph_def()
 
 
-def run_graph(gdef, dumm_inp):
+def execute_graph(gdef, dumm_inp):
   """Run given graphdef once."""
+  print("executing")
   gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
+  sessconfig = cpb2.ConfigProto(gpu_options=gpu_options)
   ops.reset_default_graph()
   g = ops.Graph()
   with g.as_default():
@@ -69,15 +74,14 @@ def run_graph(gdef, dumm_inp):
         graph_def=gdef, return_elements=["input", "output"])
     inp = inp.outputs[0]
     out = out.outputs[0]
-  with csess.Session(
-      config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess:
+  with csess.Session(config=sessconfig, graph=g) as sess:
     val = sess.run(out, {inp: dumm_inp})
   return val
 
 
 # Use real data that is representative of the inference dataset
 # for calibration. For this test script it is random data.
-def run_calibration(gdef, dumm_inp):
+def execute_calibration(gdef, dumm_inp):
   """Run given calibration graph multiple times."""
   gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
   ops.reset_default_graph()
@@ -96,7 +100,9 @@ def run_calibration(gdef, dumm_inp):
   return val
 
 
-if "__main__" in __name__:
+def user(run_graph=execute_graph, run_calibration=execute_calibration):
+  """Example function that converts a graph to TFTRT graph."""
+
   inp_dims = (100, 24, 24, 2)
   dummy_input = np.random.random_sample(inp_dims)
   orig_graph = get_simple_graph_def()  # use a frozen graph for inference
@@ -137,3 +143,51 @@ def run_calibration(gdef, dumm_inp):
   assert np.allclose(o1, o4)
   assert np.allclose(o1, o5)
   print("Pass")
+
+
+def auto():
+  """Run the conversion as an optimization pass."""
+  inp_dims = (100, 24, 24, 2)
+  dummy_input = np.random.random_sample(inp_dims)
+  orig_graph = get_simple_graph_def()
+  opt_config = rwpb2.RewriterConfig()
+  opt_config.optimizers.extend(["constfold", "layout"])
+  custom_op = opt_config.custom_optimizers.add()
+  custom_op.name = "TensorRTOptimizer"
+  custom_op.parameter_map["minimum_segment_size"].i = 3
+  custom_op.parameter_map["precision_mode"].s = "FP32"
+  custom_op.parameter_map["max_batch_size"].i = inp_dims[0]
+  custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25
+  print(custom_op)
+  gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
+  graph_options = cpb2.GraphOptions(rewrite_options=opt_config)
+  sessconfig = cpb2.ConfigProto(
+      gpu_options=gpu_options, graph_options=graph_options)
+  print(sessconfig)
+  g = ops.Graph()
+  ops.reset_default_graph()
+  with g.as_default():
+    inp, out = importer.import_graph_def(
+        graph_def=orig_graph, return_elements=["input", "output"])
+    inp = inp.outputs[0]
+    out = out.outputs[0]
+    with csess.Session(config=sessconfig, graph=g) as sess:
+      val = sess.run(out, {inp: dummy_input})
+  print(val.shape)
+
+
+if "__main__" in __name__:
+  P = argparse.ArgumentParser(
+      prog="tftrt_test",
+      description="Example utilization of TensorFlow-TensorRT integration")
+  P.add_argument(
+      "--automatic",
+      "-a",
+      action="store_true",
+      help="Do TRT conversion automatically",
+      default=False)
+  flags, unparsed = P.parse_known_args()
+  if flags.automatic:
+    auto()
+  else:
+    user()
diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
index d426e9f12c574c..0403b652d72877 100644
--- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
+++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py
@@ -44,8 +44,7 @@ def setUp(self):
     inp_dims = (100, 24, 24, 2)
     self._input = np.random.random_sample(inp_dims)
     self._original_graph = self.get_simple_graph_def()
-    self._gpu_options = cpb2.GPUOptions(
-        per_process_gpu_memory_fraction=0.50)
+    self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50)
     self._config = cpb2.ConfigProto(gpu_options=self._gpu_options)
     self._reference = self.run_graph(self._original_graph, self._input)
 
@@ -60,11 +59,7 @@ def get_simple_graph_def(self):
           name="weights",
           dtype=dtypes.float32)
       conv = nn.conv2d(
-          input=a,
-          filter=e,
-          strides=[1, 2, 2, 1],
-          padding="SAME",
-          name="conv")
+          input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv")
       b = cop.constant(
           [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32)
       t = nn.bias_add(conv, b, name="biasAdd")
@@ -85,8 +80,7 @@ def run_graph(self, gdef, dumm_inp):
       inp = inp.outputs[0]
       out = out.outputs[0]
     with self.test_session(
-        graph=g, config=self._config, use_gpu=True,
-        force_gpu=True) as sess:
+        graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess:
       val = sess.run(out, {inp: dumm_inp})
     return val
 
@@ -104,15 +98,14 @@ def run_calibration(self, gdef, dumm_inp):
       # run over real calibration data here, we are mimicking a calibration
       # set of 30 different batches. Use as much calibration data as you want
     with self.test_session(
-        graph=g, config=self._config, use_gpu=True,
-        force_gpu=True) as sess:
+        graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess:
       for _ in range(30):
         val = sess.run(out, {inp: dumm_inp})
     return val
 
   def get_trt_graph(self, mode):
     """Return trt converted graph."""
-    if mode in  ["FP32", "FP16", "INT8"]:
+    if mode in ["FP32", "FP16", "INT8"]:
       return trt.create_inference_graph(
           input_graph_def=self._original_graph,
           outputs=["output"],
@@ -120,7 +113,7 @@ def get_trt_graph(self, mode):
           max_workspace_size_bytes=1 << 25,
           precision_mode=mode,  # TRT Engine precision "FP32","FP16" or "INT8"
           minimum_segment_size=2  # minimum number of nodes in an engine
-          )
+      )
     return None
 
   def testFP32(self):
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index 5dd7bde2058fdf..5b9aeaa8797b92 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===================================================================
-"""TPU system metdata and associated tooling."""
+"""TPU system metadata and associated tooling."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md
index 4b6104a8b4d542..3137bfd03e3faa 100644
--- a/tensorflow/contrib/verbs/README.md
+++ b/tensorflow/contrib/verbs/README.md
@@ -159,7 +159,7 @@ When the receiver receives the RDMA write, it will locate the relevant **RdmaTen
 	* step_id - Step ID.
 	* request_index - Request index.
 	* remote_addr/rkey - Address/rkey of the reallocated result/proxy tensor.
-* **RDMA_MESSAGE_ERROR_STATUS** - (sender ==> receiver) Notify the receiver that an error had occured on the sender side, so it can propagate it to the upper levels.
+* **RDMA_MESSAGE_ERROR_STATUS** - (sender ==> receiver) Notify the receiver that an error had occurred on the sender side, so it can propagate it to the upper levels.
 	* type - The message type.
 	* name (name_size) - Name of the requested tensor.
 	* step_id - Step ID.
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 2dd8e6fb3155ad..3286f856db7345 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2762,6 +2762,7 @@ cc_library(
     ],
     visibility = [
         "//tensorflow/compiler:__subpackages__",
+        "//tensorflow/core/kernels:__subpackages__",
         "//tensorflow/core/profiler:__subpackages__",
     ],
     deps = [":lib_internal"],
@@ -3683,7 +3684,11 @@ tf_cuda_only_cc_test(
         ":test",
         ":test_main",
         "//third_party/eigen3",
-    ],
+    ] + if_mkl(
+        [
+            "//third_party/mkl:intel_binary_blob",
+        ],
+    ),
 )
 
 tf_cc_test_gpu(
diff --git a/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt
new file mode 100644
index 00000000000000..8cef243aee3a9b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt
@@ -0,0 +1,30 @@
+op {
+  graph_op_name: "RegexFullMatch"
+  in_arg {
+    name: "input"
+    description: <<END
+A string tensor of the text to be processed.
+END
+  }
+  in_arg {
+    name: "pattern"
+    description: <<END
+A 1-D string tensor of the regular expression to match the input.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+A bool tensor with the same shape as `input`.
+END
+  }
+  summary: "Check if the input matches the regex pattern."
+  description: <<END
+The input is a string tensor of any shape. The pattern is a scalar
+string tensor which is applied to every element of the input tensor.
+The boolean values (True or False) of the output tensor indicate
+if the input matches the regex pattern provided.
+
+The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_RegexFullMatch.pbtxt b/tensorflow/core/api_def/python_api/api_def_RegexFullMatch.pbtxt
new file mode 100644
index 00000000000000..ec310c8aebdee9
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_RegexFullMatch.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "RegexFullMatch"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/common_runtime/broadcaster.cc b/tensorflow/core/common_runtime/broadcaster.cc
index 9ceff866787f26..9646a0856ed60b 100644
--- a/tensorflow/core/common_runtime/broadcaster.cc
+++ b/tensorflow/core/common_runtime/broadcaster.cc
@@ -80,7 +80,7 @@ void Broadcaster::Run(StatusCallback done) {
 // continuing to occupy its current position.  Hence we calculate as
 // though each device's rank is actually r+1, then subtract 1 again to
 // get the descendent ranks.  If the source is not rank 0 then its
-// decendents include both {0,1} and the descendents of its current
+// descendants include both {0,1} and the descendents of its current
 // position.  Where a non-0-rank source is a descendent of another
 // device, no send to it is necessary.
 
@@ -115,7 +115,7 @@ void Broadcaster::TreeSendTo(const CollectiveParams& cp,
   DCHECK_NE(successor_rank, my_rank);
   if (cp.is_source && source_rank != 0) {
     // The source sends to rank 0,1 in addition to its positional
-    // decendents.
+    // descendants.
     if (cp.group.group_size > 1) {
       targets->push_back(0);
     }
diff --git a/tensorflow/core/common_runtime/buf_rendezvous.h b/tensorflow/core/common_runtime/buf_rendezvous.h
index e94e88b323ec74..9eb9f060f6bac2 100644
--- a/tensorflow/core/common_runtime/buf_rendezvous.h
+++ b/tensorflow/core/common_runtime/buf_rendezvous.h
@@ -79,7 +79,7 @@ class BufRendezvous {
                   const ProducerCallback& done);
 
   // Called to request access to a Tensor value corresponding to key.
-  // Consumer is provide with a Hook as soon as availble.
+  // Consumer is provide with a Hook as soon as available.
   void ConsumeBuf(const string& key, const ConsumerCallback& done);
 
   // Consumer must call this function when it's done reading the Hook provided
diff --git a/tensorflow/core/common_runtime/ring_reducer.cc b/tensorflow/core/common_runtime/ring_reducer.cc
index 6b072f3cc9cc52..f8428f2fde3464 100644
--- a/tensorflow/core/common_runtime/ring_reducer.cc
+++ b/tensorflow/core/common_runtime/ring_reducer.cc
@@ -283,7 +283,7 @@ void RingReducer::InitRingField(RingField* rf, int chunk_idx, int subdiv_idx,
   // Note on field indexing: There are group_size_ devices in the
   // instance, implying the same number of chunks per tensor, where a
   // chunk is the unit of data transferred in a time step.  However, if
-  // a device can simultaenously send data by 2 or more independent
+  // a device can simultaneously send data by 2 or more independent
   // channels we can speed up the transfer by subdividing chunks and
   // processing multiple subdivisions at once.  So the actual number
   // of RingFields is group_size_ * num_subdivs_.
diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc
index be79cc4507124f..c045596a69b60d 100644
--- a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc
+++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc
@@ -104,7 +104,7 @@ ScopedAllocatorContainer::~ScopedAllocatorContainer() {
   // contents deleted via Drop.  When when a step ends early
   // (e.g. through abnormal termination) we need to clean up
   // explicitly.  So long as graph execution of the associated step has
-  // completey terminated this should be safe.
+  // completely terminated this should be safe.
   for (auto& it : allocators_) {
     if (it.second.field_index == ScopedAllocator::kBackingIndex) {
       delete it.second.scoped_allocator;
diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc
index 4998a7acfe23b4..03a011f79e1871 100644
--- a/tensorflow/core/debug/debug_io_utils.cc
+++ b/tensorflow/core/debug/debug_io_utils.cc
@@ -52,7 +52,7 @@ namespace {
 
 // Creates an Event proto representing a chunk of a Tensor. This method only
 // populates the field of the Event proto that represent the envelope
-// informaion (e.g., timestmap, device_name, num_chunks, chunk_index, dtype,
+// information (e.g., timestamp, device_name, num_chunks, chunk_index, dtype,
 // shape). It does not set the value.tensor field, which should be set by the
 // caller separately.
 Event PrepareChunkEventProto(const DebugNodeKey& debug_node_key,
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc
index 18998bbccbb44d..b9f21ea211bdbd 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc
@@ -115,7 +115,7 @@ class GrpcWorkerCache : public WorkerCachePartial {
 
   size_t AssignWorkerToThread(const string& target) {
     // Round-robin target assignment, but keeps the same target on the same
-    // polling thread always, as this is important for gRPC performace
+    // polling thread always, as this is important for gRPC performance
     mutex_lock lock(assignment_mu_);
     auto it = target_assignments_.find(target);
     if (it == target_assignments_.end()) {
diff --git a/tensorflow/core/example/example.proto b/tensorflow/core/example/example.proto
index b2b723278b046b..e7142a4ef97132 100644
--- a/tensorflow/core/example/example.proto
+++ b/tensorflow/core/example/example.proto
@@ -7,7 +7,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ExampleProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.example";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example";
 package tensorflow;
 
 // An Example is a mostly-normalized data format for storing data for
diff --git a/tensorflow/core/example/example_parser_configuration.proto b/tensorflow/core/example/example_parser_configuration.proto
index 15846c0e302960..b2c115d80e35ec 100644
--- a/tensorflow/core/example/example_parser_configuration.proto
+++ b/tensorflow/core/example/example_parser_configuration.proto
@@ -6,6 +6,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ExampleParserConfigurationProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.example";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example";
 package tensorflow;
 
 import "tensorflow/core/framework/tensor_shape.proto";
diff --git a/tensorflow/core/example/feature.proto b/tensorflow/core/example/feature.proto
index da3dc59a120409..6d81974aac33b7 100644
--- a/tensorflow/core/example/feature.proto
+++ b/tensorflow/core/example/feature.proto
@@ -58,7 +58,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "FeatureProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.example";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example";
 package tensorflow;
 
 // Containers to hold repeated fundamental values.
diff --git a/tensorflow/core/framework/allocation_description.proto b/tensorflow/core/framework/allocation_description.proto
index bb1037c2dfe46a..64133b05e18f90 100644
--- a/tensorflow/core/framework/allocation_description.proto
+++ b/tensorflow/core/framework/allocation_description.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "AllocationDescriptionProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 message AllocationDescription {
   // Total number of bytes requested
diff --git a/tensorflow/core/framework/api_def.proto b/tensorflow/core/framework/api_def.proto
index e878ab620bfe3c..3f8dd272e7798a 100644
--- a/tensorflow/core/framework/api_def.proto
+++ b/tensorflow/core/framework/api_def.proto
@@ -8,6 +8,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ApiDefProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/attr_value.proto";
 
 // Used to specify and override the default API & behavior in the
diff --git a/tensorflow/core/framework/attr_value.proto b/tensorflow/core/framework/attr_value.proto
index 62f0a9050fb82c..054e3ec97cc144 100644
--- a/tensorflow/core/framework/attr_value.proto
+++ b/tensorflow/core/framework/attr_value.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "AttrValueProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/tensor.proto";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
diff --git a/tensorflow/core/framework/cost_graph.proto b/tensorflow/core/framework/cost_graph.proto
index 7885b0171a55a4..19d765cd32e05a 100644
--- a/tensorflow/core/framework/cost_graph.proto
+++ b/tensorflow/core/framework/cost_graph.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "CostGraphProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
 
diff --git a/tensorflow/core/framework/device_attributes.proto b/tensorflow/core/framework/device_attributes.proto
index 0b3c0d5bdf9f3d..44236ca9798abc 100644
--- a/tensorflow/core/framework/device_attributes.proto
+++ b/tensorflow/core/framework/device_attributes.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "DeviceAttributesProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 message InterconnectLink {
   int32 device_id = 1;
diff --git a/tensorflow/core/framework/function.proto b/tensorflow/core/framework/function.proto
index 72e3c438314bd8..e69d3938d93d10 100644
--- a/tensorflow/core/framework/function.proto
+++ b/tensorflow/core/framework/function.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "FunctionProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/attr_value.proto";
 import "tensorflow/core/framework/node_def.proto";
 import "tensorflow/core/framework/op_def.proto";
diff --git a/tensorflow/core/framework/graph.proto b/tensorflow/core/framework/graph.proto
index 7d6e16d5c129a0..76d358971d7d33 100644
--- a/tensorflow/core/framework/graph.proto
+++ b/tensorflow/core/framework/graph.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "GraphProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/node_def.proto";
 import "tensorflow/core/framework/function.proto";
 import "tensorflow/core/framework/versions.proto";
diff --git a/tensorflow/core/framework/graph_transfer_info.proto b/tensorflow/core/framework/graph_transfer_info.proto
index 41dd54d78c0395..232297d460dbe8 100644
--- a/tensorflow/core/framework/graph_transfer_info.proto
+++ b/tensorflow/core/framework/graph_transfer_info.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "GraphTransferInfoProto";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/types.proto";
 
 message GraphTransferNodeInput {
diff --git a/tensorflow/core/framework/iterator.proto b/tensorflow/core/framework/iterator.proto
index 7e5f5ea2e0c2f9..f015342e13313e 100644
--- a/tensorflow/core/framework/iterator.proto
+++ b/tensorflow/core/framework/iterator.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "IteratorProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.util";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 // Protocol buffer representing the metadata for an iterator's state stored
 // as a Variant tensor.
diff --git a/tensorflow/core/framework/kernel_def.proto b/tensorflow/core/framework/kernel_def.proto
index 65e9ef04a06651..a17b9c8492b68c 100644
--- a/tensorflow/core/framework/kernel_def.proto
+++ b/tensorflow/core/framework/kernel_def.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "KernelDefProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/attr_value.proto";
 
 message KernelDef {
diff --git a/tensorflow/core/framework/log_memory.proto b/tensorflow/core/framework/log_memory.proto
index d1e126330d20b6..7f37eadc3bed0a 100644
--- a/tensorflow/core/framework/log_memory.proto
+++ b/tensorflow/core/framework/log_memory.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "LogMemoryProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/tensor_description.proto";
 
 message MemoryLogStep {
diff --git a/tensorflow/core/framework/node_def.proto b/tensorflow/core/framework/node_def.proto
index 8fcee32e298661..0a095f903f9f6b 100644
--- a/tensorflow/core/framework/node_def.proto
+++ b/tensorflow/core/framework/node_def.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "NodeProto";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/attr_value.proto";
 
 message NodeDef {
diff --git a/tensorflow/core/framework/op_def.proto b/tensorflow/core/framework/op_def.proto
index ca0e5e7133af61..aea2d2bb09a2c2 100644
--- a/tensorflow/core/framework/op_def.proto
+++ b/tensorflow/core/framework/op_def.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "OpDefProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/attr_value.proto";
 import "tensorflow/core/framework/types.proto";
 
diff --git a/tensorflow/core/framework/op_gen_lib.h b/tensorflow/core/framework/op_gen_lib.h
index ff38e4b22141a7..533dd64805c679 100644
--- a/tensorflow/core/framework/op_gen_lib.h
+++ b/tensorflow/core/framework/op_gen_lib.h
@@ -59,14 +59,14 @@ class ApiDefMap {
   // You can call this method multiple times to load multiple
   // sets of files. Api definitions are merged if the same
   // op definition is loaded multiple times. Later-loaded
-  // definitions take precedense.
+  // definitions take precedence.
   // ApiDefs loaded from files must contain a subset of ops defined
   // in the OpList passed to the constructor.
   Status LoadFileList(Env* env, const std::vector<string>& filenames);
 
   // Load a single file. Api definitions are merged if the same
   // op definition is loaded multiple times. Later-loaded
-  // definitions take precedense.
+  // definitions take precedence.
   // ApiDefs loaded from file must contain a subset of ops defined
   // in the OpList passed to the constructor.
   Status LoadFile(Env* env, const string& filename);
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 67943377b9f5cd..f577664709c064 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -534,7 +534,7 @@ class OpKernelContext {
     Rendezvous* rendezvous = nullptr;
 
     // Mechanism for executing a collective op that needs to coordinate
-    // with parallel instances runing on other devices.
+    // with parallel instances running on other devices.
     CollectiveExecutor* collective_executor = nullptr;
 
     // The session state for this op.
diff --git a/tensorflow/core/framework/reader_base.proto b/tensorflow/core/framework/reader_base.proto
index 1b8b965ee105fb..9e187cfa791f69 100644
--- a/tensorflow/core/framework/reader_base.proto
+++ b/tensorflow/core/framework/reader_base.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ReaderBaseProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 // For serializing and restoring the state of ReaderBase, see
 // reader_base.h for details.
diff --git a/tensorflow/core/framework/remote_fused_graph_execute_info.proto b/tensorflow/core/framework/remote_fused_graph_execute_info.proto
index 946da40d0e315a..eb689ec1e68ec8 100644
--- a/tensorflow/core/framework/remote_fused_graph_execute_info.proto
+++ b/tensorflow/core/framework/remote_fused_graph_execute_info.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "RemoteFusedGraphExecuteInfoProto";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+//add go_package externally
 import "tensorflow/core/framework/graph.proto";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
diff --git a/tensorflow/core/framework/resource_handle.proto b/tensorflow/core/framework/resource_handle.proto
index b1921337f5fd0b..a54d3d906ca985 100644
--- a/tensorflow/core/framework/resource_handle.proto
+++ b/tensorflow/core/framework/resource_handle.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ResourceHandle";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 // Protocol buffer representing a handle to a tensorflow resource. Handles are
 // not valid across executions, but can be serialized back and forth from within
diff --git a/tensorflow/core/framework/step_stats.proto b/tensorflow/core/framework/step_stats.proto
index 65c8089d51141b..d98999cb54bd84 100644
--- a/tensorflow/core/framework/step_stats.proto
+++ b/tensorflow/core/framework/step_stats.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "StepStatsProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/allocation_description.proto";
 import "tensorflow/core/framework/tensor_description.proto";
 
diff --git a/tensorflow/core/framework/summary.proto b/tensorflow/core/framework/summary.proto
index 55879f87831eb9..532e4fcd87b78e 100644
--- a/tensorflow/core/framework/summary.proto
+++ b/tensorflow/core/framework/summary.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "SummaryProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/tensor.proto";
 
 // Metadata associated with a series of Summary data
diff --git a/tensorflow/core/framework/tensor.proto b/tensorflow/core/framework/tensor.proto
index abbf16e8103326..55921af1d0f7ab 100644
--- a/tensorflow/core/framework/tensor.proto
+++ b/tensorflow/core/framework/tensor.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "TensorProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/resource_handle.proto";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
diff --git a/tensorflow/core/framework/tensor_description.proto b/tensorflow/core/framework/tensor_description.proto
index 6ac3c1b8810878..4c23c7e6205ada 100644
--- a/tensorflow/core/framework/tensor_description.proto
+++ b/tensorflow/core/framework/tensor_description.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "TensorDescriptionProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 import "tensorflow/core/framework/types.proto";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/allocation_description.proto";
diff --git a/tensorflow/core/framework/tensor_shape.proto b/tensorflow/core/framework/tensor_shape.proto
index 1ec3c5323c2c73..286156a0123303 100644
--- a/tensorflow/core/framework/tensor_shape.proto
+++ b/tensorflow/core/framework/tensor_shape.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "TensorShapeProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 package tensorflow;
 
diff --git a/tensorflow/core/framework/tensor_slice.proto b/tensorflow/core/framework/tensor_slice.proto
index 24b01661dc4691..a5c366ed6061f3 100644
--- a/tensorflow/core/framework/tensor_slice.proto
+++ b/tensorflow/core/framework/tensor_slice.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "TensorSliceProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 package tensorflow;
 
diff --git a/tensorflow/core/framework/types.proto b/tensorflow/core/framework/types.proto
index e003fd00106fba..03835d1b923d4f 100644
--- a/tensorflow/core/framework/types.proto
+++ b/tensorflow/core/framework/types.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "TypesProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 // LINT.IfChange
 enum DataType {
diff --git a/tensorflow/core/framework/variable.proto b/tensorflow/core/framework/variable.proto
index e0df01cc9b7758..93ae423babb937 100644
--- a/tensorflow/core/framework/variable.proto
+++ b/tensorflow/core/framework/variable.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "VariableProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 // Protocol buffer representing a Variable.
 message VariableDef {
diff --git a/tensorflow/core/framework/versions.proto b/tensorflow/core/framework/versions.proto
index 7d5e58ae7d4230..dd2ec55238728f 100644
--- a/tensorflow/core/framework/versions.proto
+++ b/tensorflow/core/framework/versions.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "VersionsProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
 
 // Version information for a piece of serialized data
 //
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 5e2a465e22c7cb..029cdcf94af01d 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -2022,6 +2022,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Input'}"
       "node { name: 'E' op: 'BiasAdd'"
@@ -2051,6 +2052,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(_MklConv2D);DMT/_0(Const);DMT/_1(Const)|"
@@ -2069,6 +2071,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Input'}"
       "node { name: 'E' op: 'Input'}"
@@ -2095,6 +2098,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Input'}"
       "node { name: 'E' op: 'Input'}"
@@ -2125,6 +2129,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Input'}"
       "node { name: 'E' op: 'BiasAdd'"
@@ -2151,6 +2156,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Positive) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C'] }"
       "node { name: 'E' op: 'BiasAddGrad'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
@@ -2178,6 +2184,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative1) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C'] }"
       "node { name: 'E' op: 'BiasAddGrad'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
@@ -2204,6 +2211,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative2) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C'] }"
       "node { name: 'E' op: 'BiasAddGrad'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
@@ -2233,6 +2241,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackpropFilterFusion_Negative3) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
       "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
@@ -2272,6 +2281,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Input'}"
       "node { name: 'E' op: 'BiasAdd'"
@@ -2289,6 +2299,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['F', 'B', 'E']}"
       "node { name: 'Z' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
@@ -2319,6 +2330,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['B', 'C'] }");
@@ -2341,6 +2353,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Conv2D'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
@@ -2348,6 +2361,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'C']}"
       "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D'] }");
@@ -2370,6 +2384,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
       " input: ['B', 'C'] }");
@@ -2389,6 +2404,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C']}"
       "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }");
@@ -2411,6 +2427,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['B', 'A', 'C']}"
       "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }");
@@ -2477,6 +2494,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'M', 'N']}"
       "node { name: 'D' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
@@ -2529,6 +2547,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'F' op: 'Conv2D'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
@@ -2536,6 +2555,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['C', 'D']}"
       "node { name: 'G' op: 'Const' "
       " attr { key: 'dtype' value { type: DT_INT32 } }"
@@ -2572,6 +2592,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D']}"
@@ -2634,6 +2655,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'F' op: 'Conv2D'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
@@ -2641,6 +2663,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['C', 'D']}"
       "node { name: 'G' op: 'Const' "
       " attr { key: 'dtype' value { type: DT_INT32 } }"
@@ -2678,6 +2701,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D']}"
@@ -3274,6 +3298,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B']}"
       "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['B', 'C'] }",
@@ -3296,6 +3321,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
       "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
@@ -3323,6 +3349,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
       " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A', 'B', 'C']}"
       "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }",
diff --git a/tensorflow/core/graph/while_context.h b/tensorflow/core/graph/while_context.h
index 5944e368979ce0..2a83eb7bd8eb94 100644
--- a/tensorflow/core/graph/while_context.h
+++ b/tensorflow/core/graph/while_context.h
@@ -31,7 +31,7 @@ namespace tensorflow {
 // future to support these features.
 //
 // TODO(skyewm): de/serialize in MetaGraphDef so imported while loops will be
-// differentiable. Figure out backwards compatability story.
+// differentiable. Figure out backwards compatibility story.
 class WhileContext {
  public:
   WhileContext(StringPiece frame_name, std::vector<Node*> enter_nodes,
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 4941fb2b38b8fa..203f7b09e3ad58 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -1082,7 +1082,7 @@ Status GraphProperties::PropagateShapes(
     const std::unordered_map<const NodeDef*, const NodeDef*>& resource_handles,
     int num_loops) const {
   // Limit the number of iterations to prevent infinite loops in the presence of
-  // incorrect shape functions. The algoritm should converge in at most
+  // incorrect shape functions. The algorithm should converge in at most
   // num_nested_loops^2 * max_rank. We approximate max_rank with the constant 4.
   // The same applies to resources.
   VLOG(1) << "Propagating " << new_shapes->size() << " new shapes through "
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index 67bf1e6980e550..34d48819ac25ed 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -328,7 +328,7 @@ class VirtualScheduler {
   Costs graph_costs_;                   // Graph cost.
   std::map<string, Costs> op_to_cost_;  // Per-op cost.
 
-  // Auxilliary data structures for constructing NodeState and DeviceState.
+  // Auxiliary data structures for constructing NodeState and DeviceState.
   GraphProperties graph_properties_;
   Cluster* cluster_;  // Not owned.
 
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 87ab4608627e7b..e08ab1eb673e12 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -2183,7 +2183,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
   TuningConfig config;
   config.no_gemm = true;
-  // TODO(yaozhang): Enable tuning with various TuningConfig choices wtih
+  // TODO(yaozhang): Enable tuning with various TuningConfig choices with
   // the measurement-based estimator.
   status = Tune(item, graph_properties, config, output);
   if (!status.ok()) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b2b631a2225111..5948f8d39f9cd2 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4248,6 +4248,7 @@ cc_library(
         ":as_string_op",
         ":base64_ops",
         ":reduce_join_op",
+        ":regex_full_match_op",
         ":regex_replace_op",
         ":string_join_op",
         ":string_split_op",
@@ -4284,6 +4285,12 @@ tf_kernel_library(
     deps = STRING_DEPS,
 )
 
+tf_kernel_library(
+    name = "regex_full_match_op",
+    prefix = "regex_full_match_op",
+    deps = STRING_DEPS + ["@com_googlesource_code_re2//:re2"],
+)
+
 tf_kernel_library(
     name = "regex_replace_op",
     prefix = "regex_replace_op",
@@ -5174,6 +5181,7 @@ filegroup(
             "debug_ops.*",
             "mutex_ops.*",
             "batch_kernels.*",
+            "regex_full_match_op.cc",
             "regex_replace_op.cc",
         ],
     ),
diff --git a/tensorflow/core/kernels/batch_matmul_op_impl.h b/tensorflow/core/kernels/batch_matmul_op_impl.h
index a1c03f99181a6c..475bda848db4a7 100644
--- a/tensorflow/core/kernels/batch_matmul_op_impl.h
+++ b/tensorflow/core/kernels/batch_matmul_op_impl.h
@@ -329,6 +329,8 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
       c_ptrs.push_back(&c_device_memory.back());
     }
 
+    typedef Scalar Coefficient;
+
     // Cublas does
     // C = A x B
     // where A, B and C are assumed to be in column major.
@@ -352,9 +354,9 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
         bool blas_launch_status =
             stream
                 ->ThenBlasGemv(gemv_trans_a, adj_x ? m : k, adj_x ? k : m,
-                               static_cast<Scalar>(1.0), *(a_ptrs[0]),
+                               static_cast<Coefficient>(1.0), *(a_ptrs[0]),
                                adj_x ? m : k, *(b_ptrs[0]), 1,
-                               static_cast<Scalar>(0.0), c_ptrs[0], 1)
+                               static_cast<Coefficient>(0.0), c_ptrs[0], 1)
                 .ok();
         if (!blas_launch_status) {
           context->SetStatus(errors::Internal(
@@ -366,9 +368,9 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
         bool blas_launch_status =
             stream
                 ->ThenBlasGemm(blas_transpose_b, blas_transpose_a, n, m, k,
-                               static_cast<Scalar>(1.0), *(b_ptrs[0]),
+                               static_cast<Coefficient>(1.0), *(b_ptrs[0]),
                                adj_y ? k : n, *(a_ptrs[0]), adj_x ? m : k,
-                               static_cast<Scalar>(0.0), c_ptrs[0], n)
+                               static_cast<Coefficient>(0.0), c_ptrs[0], n)
                 .ok();
         if (!blas_launch_status) {
           context->SetStatus(errors::Internal(
@@ -383,8 +385,8 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
           stream
               ->ThenBlasGemmBatchedWithScratch(
                   blas_transpose_b, blas_transpose_a, n, m, k,
-                  static_cast<Scalar>(1.0), b_ptrs, adj_y ? k : n, a_ptrs,
-                  adj_x ? m : k, static_cast<Scalar>(0.0), c_ptrs, n,
+                  static_cast<Coefficient>(1.0), b_ptrs, adj_y ? k : n, a_ptrs,
+                  adj_x ? m : k, static_cast<Coefficient>(0.0), c_ptrs, n,
                   batch_size, &scratch_allocator)
               .ok();
       if (!blas_launch_status) {
@@ -398,6 +400,98 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
   }
 };
 
+template <>
+struct LaunchBatchMatMul<GPUDevice, Eigen::half> {
+  static void Launch(OpKernelContext* context, const Tensor& in_x,
+                     const Tensor& in_y, bool adj_x, bool adj_y, Tensor* out) {
+    typedef Eigen::half Scalar;
+    constexpr perftools::gputools::blas::Transpose kTranspose =
+        is_complex<Scalar>::value
+            ? perftools::gputools::blas::Transpose::kConjugateTranspose
+            : perftools::gputools::blas::Transpose::kTranspose;
+    perftools::gputools::blas::Transpose trans[] = {
+        perftools::gputools::blas::Transpose::kNoTranspose, kTranspose};
+    const uint64 m = in_x.dim_size(adj_x ? 2 : 1);
+    const uint64 k = in_x.dim_size(adj_x ? 1 : 2);
+    const uint64 n = in_y.dim_size(adj_y ? 1 : 2);
+    const uint64 batch_size = in_x.dim_size(0);
+    auto blas_transpose_a = trans[adj_x];
+    auto blas_transpose_b = trans[adj_y];
+
+    auto* stream = context->op_device_context()->stream();
+    OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
+
+    typedef perftools::gputools::DeviceMemory<Scalar> DeviceMemoryType;
+    std::vector<DeviceMemoryType> a_device_memory;
+    std::vector<DeviceMemoryType> b_device_memory;
+    std::vector<DeviceMemoryType> c_device_memory;
+    std::vector<DeviceMemoryType*> a_ptrs;
+    std::vector<DeviceMemoryType*> b_ptrs;
+    std::vector<DeviceMemoryType*> c_ptrs;
+    a_device_memory.reserve(batch_size);
+    b_device_memory.reserve(batch_size);
+    c_device_memory.reserve(batch_size);
+    a_ptrs.reserve(batch_size);
+    b_ptrs.reserve(batch_size);
+    c_ptrs.reserve(batch_size);
+    auto* a_base_ptr = in_x.template flat<Scalar>().data();
+    auto* b_base_ptr = in_y.template flat<Scalar>().data();
+    auto* c_base_ptr = out->template flat<Scalar>().data();
+    for (int64 i = 0; i < batch_size; ++i) {
+      a_device_memory.push_back(AsDeviceMemory(a_base_ptr + i * m * k));
+      b_device_memory.push_back(AsDeviceMemory(b_base_ptr + i * k * n));
+      c_device_memory.push_back(AsDeviceMemory(c_base_ptr + i * m * n));
+      a_ptrs.push_back(&a_device_memory.back());
+      b_ptrs.push_back(&b_device_memory.back());
+      c_ptrs.push_back(&c_device_memory.back());
+    }
+
+    typedef float Coefficient;
+
+    // Cublas does
+    // C = A x B
+    // where A, B and C are assumed to be in column major.
+    // We want the output to be in row-major, so we can compute
+    // C' = B' x A', where ' stands for transpose (not adjoint).
+    // TODO(yangzihao): Choose the best of the three strategies using autotune.
+    if (batch_size == 1) {
+      // This is a regular matrix*matrix or matrix*vector multiply. Avoid the
+      // overhead of the scratch allocator and the batch interface.
+      // TODO(benbarsdell): Use fp16 Gemv if it becomes supported by CUBLAS
+      bool blas_launch_status =
+          stream
+              ->ThenBlasGemm(blas_transpose_b, blas_transpose_a, n, m, k,
+                             static_cast<Coefficient>(1.0), *(b_ptrs[0]),
+                             adj_y ? k : n, *(a_ptrs[0]), adj_x ? m : k,
+                             static_cast<Coefficient>(0.0), c_ptrs[0], n)
+              .ok();
+      if (!blas_launch_status) {
+        context->SetStatus(errors::Internal(
+            "Blas xGEMM launch failed : a.shape=", in_x.shape().DebugString(),
+            ", b.shape=", in_y.shape().DebugString(), ", m=", m, ", n=", n,
+            ", k=", k));
+      }
+    } else {
+      CublasScratchAllocator scratch_allocator(context);
+      bool blas_launch_status =
+          stream
+              ->ThenBlasGemmBatchedWithScratch(
+                  blas_transpose_b, blas_transpose_a, n, m, k,
+                  static_cast<Coefficient>(1.0), b_ptrs, adj_y ? k : n, a_ptrs,
+                  adj_x ? m : k, static_cast<Coefficient>(0.0), c_ptrs, n,
+                  batch_size, &scratch_allocator)
+              .ok();
+      if (!blas_launch_status) {
+        context->SetStatus(
+            errors::Internal("Blas xGEMMBatched launch failed : a.shape=",
+                             in_x.shape().DebugString(), ", b.shape=",
+                             in_y.shape().DebugString(), ", m=", m, ", n=", n,
+                             ", k=", k, ", batch_size=", batch_size));
+      }
+    }
+  }
+};
+
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc
index 97cec3a5cc4f67..87a0795f2fd6b4 100644
--- a/tensorflow/core/kernels/batch_matmul_op_real.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_real.cc
@@ -15,6 +15,10 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/batch_matmul_op_impl.h"
 
+#if GOOGLE_CUDA
+#include "cuda/include/cuda.h"
+#endif  // GOOGLE_CUDA
+
 namespace tensorflow {
 
 #if !defined(INTEL_MKL)
diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h
index c6119b5011ac6c..b77c14d0128431 100644
--- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h
+++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h
@@ -76,7 +76,7 @@ class AdaptiveSharedBatchScheduler
           AdaptiveSharedBatchScheduler<TaskType>> {
  public:
   ~AdaptiveSharedBatchScheduler() {
-    // Finish processing batches before destorying other class members.
+    // Finish processing batches before destroying other class members.
     batch_thread_pool_.reset();
   }
 
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 9edc6d416e33d6..980b1063de9997 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -195,8 +195,8 @@ class Conv3DBackpropInputOp : public OpKernel {
     TensorShape input_shape;
     if (takes_shape_) {
       const Tensor& input_sizes = context->input(0);
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                  input_sizes.vec<int32>(), &input_shape));
+      // MakeShape is able to handle both DT_INT32 and DT_INT64 for input_sizes.
+      OP_REQUIRES_OK(context, MakeShape(input_sizes, &input_shape));
     } else {
       input_shape = context->input(0).shape();
     }
diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
index 180531b8c09d63..a2e7342b046ac0 100644
--- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
+++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
@@ -595,7 +595,7 @@ constexpr bool TileSizeOnNonLongSideFrontier(int TileLongSide,
   // For a tile size combination (longside, shortside), lying on the frontier
   // implies that (longside, shortside) is on or within the frontier but
   // (longside*2, shortside) or (longside, shortside+1) is not. With the above
-  // critereon, we simply need to use !TileSizeOnLongSideFrontier to ensure that
+  // criterion, we simply need to use !TileSizeOnLongSideFrontier to ensure that
   // it is not on the long side frontier.
   return !TileSizeOutsideFrontier(TileLongSide, TileShortSide, size_of_t) &&
          (TileSizeOutsideFrontier(TileLongSide * 2, TileShortSide, size_of_t) ||
diff --git a/tensorflow/core/kernels/nth_element_op.cc b/tensorflow/core/kernels/nth_element_op.cc
index 7f12eb953a31ec..0e43cc19aae513 100644
--- a/tensorflow/core/kernels/nth_element_op.cc
+++ b/tensorflow/core/kernels/nth_element_op.cc
@@ -114,7 +114,7 @@ struct NthElementFunctor<CPUDevice, T> {
 
     auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
     // The average time complexity of partition-based nth_element (BFPRT) is
-    // O(n), althought the worst time complexity could be O(n^2). Here, 20 is a
+    // O(n), although the worst time complexity could be O(n^2). Here, 20 is a
     // empirical factor of cost_per_unit.
     Shard(worker_threads.num_threads, worker_threads.workers, num_rows,
           20 * last_dim, SubNthElement);
diff --git a/tensorflow/core/kernels/regex_full_match_op.cc b/tensorflow/core/kernels/regex_full_match_op.cc
new file mode 100644
index 00000000000000..5863a2c8e46419
--- /dev/null
+++ b/tensorflow/core/kernels/regex_full_match_op.cc
@@ -0,0 +1,59 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <string>
+
+#include "re2/re2.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+class RegexFullMatchOp : public OpKernel {
+ public:
+  explicit RegexFullMatchOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor* input_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor));
+    const auto& input_flat = input_tensor->flat<string>();
+
+    const Tensor* pattern_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("pattern", &pattern_tensor));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(pattern_tensor->shape()),
+                errors::InvalidArgument("Pattern must be scalar, but received ",
+                                        pattern_tensor->shape().DebugString()));
+    const string pattern = pattern_tensor->flat<string>()(0);
+    const RE2 match(pattern);
+    OP_REQUIRES(ctx, match.ok(),
+                errors::InvalidArgument("Invalid pattern: ", pattern,
+                                        ", error: ", match.error()));
+
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output("output", input_tensor->shape(),
+                                             &output_tensor));
+    auto output_flat = output_tensor->flat<bool>();
+    for (size_t i = 0; i < input_flat.size(); ++i) {
+      output_flat(i) = RE2::FullMatch(input_flat(i), match);
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("RegexFullMatch").Device(DEVICE_CPU),
+                        RegexFullMatchOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/roll_op.cc b/tensorflow/core/kernels/roll_op.cc
index 96f94d80df9422..722116f86fd131 100644
--- a/tensorflow/core/kernels/roll_op.cc
+++ b/tensorflow/core/kernels/roll_op.cc
@@ -84,7 +84,7 @@ void DoRoll(OpKernelContext* context, const int64 num_elements,
   // Shard
   auto worker_threads = context->device()->tensorflow_cpu_worker_threads();
   // 15 - expiramentally determined with float and bool types
-  const int cost_per_element = 15 * sizeof(T);  // rough esitmate
+  const int cost_per_element = 15 * sizeof(T);  // rough estimate
   Shard(worker_threads->num_threads, worker_threads->workers, num_elements,
         cost_per_element, std::move(work));
 }
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index c87ce78e051a45..2328fc6afd8e7b 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -320,7 +320,9 @@ class SegmentSumGPUOp : public AsyncOpKernel {
   REGISTER_CPU_KERNEL_SEGMENT("SegmentSum", Eigen::internal::SumReducer<type>, \
                               type, index_type, 0);                            \
   REGISTER_CPU_KERNEL_SEGMENT(                                                 \
-      "SegmentProd", Eigen::internal::ProdReducer<type>, type, index_type, 1)
+      "SegmentMean", Eigen::internal::MeanReducer<type>, type, index_type, 0); \
+  REGISTER_CPU_KERNEL_SEGMENT(                                                 \
+      "SegmentProd", Eigen::internal::ProdReducer<type>, type, index_type, 1);
 
 #define REGISTER_REAL_CPU_KERNELS_ALL(type) \
   REGISTER_REAL_CPU_KERNELS(type, int32);   \
diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h
index 4abfbfb1a66c37..7796bf3587912d 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.h
+++ b/tensorflow/core/kernels/segment_reduction_ops.h
@@ -130,4 +130,4 @@ struct Highest {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
diff --git a/tensorflow/core/lib/core/error_codes.proto b/tensorflow/core/lib/core/error_codes.proto
index b82d3891460cb4..5ced65a97331cd 100644
--- a/tensorflow/core/lib/core/error_codes.proto
+++ b/tensorflow/core/lib/core/error_codes.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ErrorCodesProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/lib/core";
 
 // The canonical error codes for TensorFlow APIs.
 //
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 82330ec9d1d9f0..d949e70c661467 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -435,6 +435,25 @@ REGISTER_OP("DrawBoundingBoxes")
     .Output("output: T")
     .Attr("T: {float, half} = DT_FLOAT")
     .SetShapeFn([](InferenceContext* c) {
+      // The rank of images should be 4.
+      ShapeHandle images;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &images));
+      // Channel depth should be either 1 (GRY), 3 (RGB), or 4 (RGBA).
+      if (c->ValueKnown(c->Dim(images, 3))) {
+        int64 depth = c->Value(c->Dim(images, 3));
+        if (!(depth == 1 || depth == 3 || depth == 4)) {
+          return errors::InvalidArgument("Channel depth should be either 1 (GRY), "
+                                         "3 (RGB), or 4 (RGBA)");
+        }
+      }
+
+      // The rank of boxes is 3: [batch, num_bounding_boxes, 4].
+      ShapeHandle boxes;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &boxes));
+      // The last value of boxes shape is 4.
+      DimensionHandle unused;
+      TF_RETURN_IF_ERROR(c->WithValue(c->Dim(boxes, 2), 4, &unused));
+
       return shape_inference::UnchangedShapeWithRankAtLeast(c, 3);
     });
 
diff --git a/tensorflow/core/ops/image_ops_test.cc b/tensorflow/core/ops/image_ops_test.cc
index 5f0b391b0d14a9..517af26b44f53d 100644
--- a/tensorflow/core/ops/image_ops_test.cc
+++ b/tensorflow/core/ops/image_ops_test.cc
@@ -312,4 +312,23 @@ TEST(ImageOpsTest, QuantizedResizeBilinear_ShapeFn) {
   INFER_OK(op, "[1,?,3,?];[2];[];[]", "[d0_0,20,30,d0_3];[];[]");
 }
 
+TEST(ImageOpsTest, DrawBoundingBoxes_ShapeFn) {
+  ShapeInferenceTestOp op("DrawBoundingBoxes");
+  op.input_tensors.resize(2);
+
+  // Check images.
+  INFER_ERROR("must be rank 4", op, "[1,?,3];?");
+  INFER_ERROR("should be either 1 (GRY), 3 (RGB), or 4 (RGBA)",
+      op, "[1,?,?,5];?");
+
+  // Check boxes.
+  INFER_ERROR("must be rank 3", op, "[1,?,?,4];[1,4]");
+  INFER_ERROR("Dimension must be 4", op, "[1,?,?,4];[1,2,2]");
+
+  // OK shapes.
+  INFER_OK(op, "[4,?,?,4];?", "in0");
+  INFER_OK(op, "[?,?,?,?];[?,?,?]", "in0");
+  INFER_OK(op, "[4,?,?,4];[?,?,?]", "in0");
+  INFER_OK(op, "[4,?,?,4];[?,?,4]", "in0");
+}
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 8f8443a46cfa68..8c0b073ce46468 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1017,7 +1017,7 @@ REGISTER_OP("SegmentMean")
     .Input("data: T")
     .Input("segment_ids: Tindices")
     .Output("output: T")
-    .Attr("T: realnumbertype")
+    .Attr("T: numbertype")
     .Attr("Tindices: {int32,int64}")
     .SetShapeFn(SegmentReductionShapeFn);
 
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index bb46dafd424fe6..fc60e807b90b01 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -547,7 +547,7 @@ REGISTER_OP("Conv3DBackpropFilter")
     });
 
 REGISTER_OP("Conv3DBackpropInputV2")
-    .Input("input_sizes: int32")
+    .Input("input_sizes: Tshape")
     .Input("filter: T")
     .Input("out_backprop: T")
     .Output("output: T")
@@ -556,6 +556,7 @@ REGISTER_OP("Conv3DBackpropInputV2")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnet3dDataFormatAttrString())
     .Attr("dilations: list(int) = [1, 1, 1, 1, 1]")
+    .Attr("Tshape: {int32, int64} = DT_INT32")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle s;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s));
diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc
index 416ce9c0d82ca0..80ffae579655d5 100644
--- a/tensorflow/core/ops/random_ops.cc
+++ b/tensorflow/core/ops/random_ops.cc
@@ -72,7 +72,15 @@ REGISTER_OP("ParameterizedTruncatedNormal")
     .Attr("seed2: int = 0")
     .Attr("dtype: {half,bfloat16,float,double}")
     .Attr("T: {int32, int64}")
-    .SetShapeFn(shape_inference::RandomShape);
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      // Parameters must be 0-d or 1-d.
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(1), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(2), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(3), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(4), 1, &unused));
+      return shape_inference::RandomShape(c);
+    });
 
 REGISTER_OP("TruncatedNormal")
     .Input("shape: T")
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index 469f193cf41775..1d5c743a56cec3 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -37,6 +37,17 @@ REGISTER_OP("RegexReplace")
       return Status::OK();
     });
 
+REGISTER_OP("RegexFullMatch")
+    .Input("input: string")
+    .Input("pattern: string")
+    .Output("output: bool")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
 REGISTER_OP("StringToHashBucketFast")
     .Input("input: string")
     .Output("output: int64")
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 8307758936e7ac..dc12c78a4b5a09 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -103,7 +103,7 @@ constexpr char kResolveCacheSecs[] = "GCS_RESOLVE_REFRESH_SECS";
 // The environment variable to configure the http request's connection timeout.
 constexpr char kRequestConnectionTimeout[] =
     "GCS_REQUEST_CONNECTION_TIMEOUT_SECS";
-// The environment varaible to configure the http request's idle timeout.
+// The environment variable to configure the http request's idle timeout.
 constexpr char kRequestIdleTimeout[] = "GCS_REQUEST_IDLE_TIMEOUT_SECS";
 // The environment variable to configure the overall request timeout for
 // metadata requests.
diff --git a/tensorflow/core/platform/cloud/gcs_throttle.h b/tensorflow/core/platform/cloud/gcs_throttle.h
index 97a858e3fecfbb..8c9e2e074cbc20 100644
--- a/tensorflow/core/platform/cloud/gcs_throttle.h
+++ b/tensorflow/core/platform/cloud/gcs_throttle.h
@@ -132,7 +132,7 @@ class GcsThrottle {
    * UpdateState updates the available_tokens_ and last_updated_secs_ variables.
    *
    * UpdateState should be called in order to mark the passage of time, and
-   * therefore add tokens to the availble_tokens_ pool.
+   * therefore add tokens to the available_tokens_ pool.
    */
   void UpdateState() EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md
index bbaf55e613f6f3..cc6d9def4724aa 100644
--- a/tensorflow/core/profiler/g3doc/command_line.md
+++ b/tensorflow/core/profiler/g3doc/command_line.md
@@ -82,7 +82,7 @@ bazel-bin/tensorflow/core/profiler/profiler \
 #
 # Alternatively, user can pass separate files.
 #
-# --graph_path contains the model architecutre and tensor shapes.
+# --graph_path contains the model architecture and tensor shapes.
 # --run_meta_path contains the memory and time information.
 # --op_log_path contains float operation and code traces.
 # --checkpoint_path contains the model checkpoint data.
diff --git a/tensorflow/core/protobuf/cluster.proto b/tensorflow/core/protobuf/cluster.proto
index 33c87eefe022ee..c696d345e0cfb9 100644
--- a/tensorflow/core/protobuf/cluster.proto
+++ b/tensorflow/core/protobuf/cluster.proto
@@ -20,6 +20,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ClusterProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 // This file contains protos to be used when defining a TensorFlow
 // cluster.
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index 410ad227e96387..9a48f43a63abad 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ConfigProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/framework/cost_graph.proto";
 import "tensorflow/core/framework/graph.proto";
 import "tensorflow/core/framework/step_stats.proto";
diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto
index 3c05b4f0e22e5c..5f44878c44c90b 100644
--- a/tensorflow/core/protobuf/control_flow.proto
+++ b/tensorflow/core/protobuf/control_flow.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ControlFlowProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 // Control flow context related protocol buffers.
 
diff --git a/tensorflow/core/protobuf/critical_section.proto b/tensorflow/core/protobuf/critical_section.proto
index 0b3f531e6d9f59..7954e7ba87c1b9 100644
--- a/tensorflow/core/protobuf/critical_section.proto
+++ b/tensorflow/core/protobuf/critical_section.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "CriticalSectionProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 // Protocol buffer representing a CriticalSection.
 message CriticalSectionDef {
diff --git a/tensorflow/core/protobuf/debug.proto b/tensorflow/core/protobuf/debug.proto
index 56983f3b7d464f..499900f965ac2b 100644
--- a/tensorflow/core/protobuf/debug.proto
+++ b/tensorflow/core/protobuf/debug.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "DebugProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 // EXPERIMENTAL. Option for watching a node.
 message DebugTensorWatch {
diff --git a/tensorflow/core/protobuf/device_properties.proto b/tensorflow/core/protobuf/device_properties.proto
index 3bd30159003484..11e1258e75e6bb 100644
--- a/tensorflow/core/protobuf/device_properties.proto
+++ b/tensorflow/core/protobuf/device_properties.proto
@@ -18,6 +18,7 @@ syntax = "proto3";
 package tensorflow;
 option cc_enable_arenas = true;
 option java_outer_classname = "DevicePropertiesProtos";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 message DeviceProperties {
   // Device type (CPU, GPU, ...)
diff --git a/tensorflow/core/protobuf/master.proto b/tensorflow/core/protobuf/master.proto
index 96c91536f73865..03022875e64ace 100644
--- a/tensorflow/core/protobuf/master.proto
+++ b/tensorflow/core/protobuf/master.proto
@@ -20,7 +20,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "DistributedRuntimeProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/framework/device_attributes.proto";
 import "tensorflow/core/framework/graph.proto";
 import "tensorflow/core/framework/tensor.proto";
diff --git a/tensorflow/core/protobuf/master_service.proto b/tensorflow/core/protobuf/master_service.proto
index 1170611f372327..ce0e4f643544ee 100644
--- a/tensorflow/core/protobuf/master_service.proto
+++ b/tensorflow/core/protobuf/master_service.proto
@@ -19,7 +19,7 @@ package tensorflow.grpc;
 option java_outer_classname = "MasterServiceProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/protobuf/master.proto";
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/protobuf/meta_graph.proto b/tensorflow/core/protobuf/meta_graph.proto
index fd86c0da12b26c..75a2a88ed72cd9 100644
--- a/tensorflow/core/protobuf/meta_graph.proto
+++ b/tensorflow/core/protobuf/meta_graph.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "MetaGraphProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "google/protobuf/any.proto";
 
 import "tensorflow/core/framework/graph.proto";
diff --git a/tensorflow/core/protobuf/named_tensor.proto b/tensorflow/core/protobuf/named_tensor.proto
index dd4976e3546268..6e2f7feee29f2f 100644
--- a/tensorflow/core/protobuf/named_tensor.proto
+++ b/tensorflow/core/protobuf/named_tensor.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "NamedTensorProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/framework/tensor.proto";
 
 // A pair of tensor name and tensor values.
diff --git a/tensorflow/core/protobuf/queue_runner.proto b/tensorflow/core/protobuf/queue_runner.proto
index 05a48d0acf7581..f4df649f7d6548 100644
--- a/tensorflow/core/protobuf/queue_runner.proto
+++ b/tensorflow/core/protobuf/queue_runner.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "QueueRunnerProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/lib/core/error_codes.proto";
 
 // Protocol buffer representing a QueueRunner.
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 10bfe3034de280..45e57594e4d282 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "RewriterConfigProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 import "tensorflow/core/framework/attr_value.proto";
 
@@ -31,7 +32,7 @@ message RewriterConfig {
     AGGRESSIVE = 3;
   }
 
-  // Enum controling the number of times to run optimizers. The default is to
+  // Enum controlling the number of times to run optimizers. The default is to
   // run them once.
   enum NumIterationsType {
     DEFAULT_NUM_ITERS = 0;
diff --git a/tensorflow/core/protobuf/saved_model.proto b/tensorflow/core/protobuf/saved_model.proto
index c2595ddf884b08..03789d3df72f2a 100644
--- a/tensorflow/core/protobuf/saved_model.proto
+++ b/tensorflow/core/protobuf/saved_model.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "SavedModelProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.framework";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/protobuf/meta_graph.proto";
 
 // SavedModel is the high level serialization format for TensorFlow Models.
diff --git a/tensorflow/core/protobuf/saver.proto b/tensorflow/core/protobuf/saver.proto
index a757d3f756ab73..4245386145907f 100644
--- a/tensorflow/core/protobuf/saver.proto
+++ b/tensorflow/core/protobuf/saver.proto
@@ -5,6 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "SaverProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.util";
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 
 // Protocol buffer representing the configuration of a Saver.
 message SaverDef {
diff --git a/tensorflow/core/protobuf/tensor_bundle.proto b/tensorflow/core/protobuf/tensor_bundle.proto
index 80e87f14f941b9..681c01bbbd40fd 100644
--- a/tensorflow/core/protobuf/tensor_bundle.proto
+++ b/tensorflow/core/protobuf/tensor_bundle.proto
@@ -5,7 +5,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "TensorBundleProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.util";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/tensor_slice.proto";
 import "tensorflow/core/framework/types.proto";
diff --git a/tensorflow/core/protobuf/tensorflow_server.proto b/tensorflow/core/protobuf/tensorflow_server.proto
index 6199e707e5ad03..be25804a1b4a94 100644
--- a/tensorflow/core/protobuf/tensorflow_server.proto
+++ b/tensorflow/core/protobuf/tensorflow_server.proto
@@ -23,7 +23,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "ServerProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 // Defines the configuration of a single TensorFlow server.
 message ServerDef {
   // The cluster of which this server is a member.
diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto
index b400638df103f4..a3bc2f422e776a 100644
--- a/tensorflow/core/protobuf/worker.proto
+++ b/tensorflow/core/protobuf/worker.proto
@@ -20,7 +20,7 @@ option cc_enable_arenas = true;
 option java_outer_classname = "WorkerProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "google/protobuf/any.proto";
 import "tensorflow/core/framework/cost_graph.proto";
 import "tensorflow/core/framework/step_stats.proto";
diff --git a/tensorflow/core/protobuf/worker_service.proto b/tensorflow/core/protobuf/worker_service.proto
index e0c27f394a9ca1..9ebbd553f2181b 100644
--- a/tensorflow/core/protobuf/worker_service.proto
+++ b/tensorflow/core/protobuf/worker_service.proto
@@ -19,7 +19,7 @@ package tensorflow.grpc;
 option java_outer_classname = "WorkerServiceProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.distruntime";
-
+option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf";
 import "tensorflow/core/protobuf/worker.proto";
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index ba69efb289a42a..522a9d84fddd2e 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX ""
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h
index f2d4e470c82d9a..b91f8bb8ef0c36 100644
--- a/tensorflow/core/util/cuda_device_functions.h
+++ b/tensorflow/core/util/cuda_device_functions.h
@@ -537,7 +537,7 @@ __device__ detail::ToTypeIfConvertible<U, T> CudaAtomicSub(T* ptr, U value) {
   return atomicSub(ptr, value);
 }
 
-// Specializations of substraction which add the negative value.
+// Specializations of subtraction which add the negative value.
 __device__ inline float CudaAtomicSub(float* ptr, float value) {
   return CudaAtomicAdd(ptr, -value);
 }
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 50a8e305749eec..8105121e7ce809 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -1359,7 +1359,7 @@ inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims,
 /// Map MklDnn memory::dims object into TensorShape object.
 ///
 /// This function will simply map input shape in MKL-DNN memory::dims format
-/// in Tensorflow's TensorShape object by perserving dimension order.
+/// in Tensorflow's TensorShape object by preserving dimension order.
 ///
 /// @input MKL-DNN memory::dims object
 /// @output TensorShape corresponding to memory::dims
diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h
index 58bc79aebf8466..d3d5602f924541 100644
--- a/tensorflow/core/util/tensor_format.h
+++ b/tensorflow/core/util/tensor_format.h
@@ -75,7 +75,7 @@ enum FilterTensorFormat {
   FORMAT_OIHW = 1,
 
   // OIHW_VECT_I is the most performant tensor format for cudnn6's quantized
-  // int8 convolution and fused convolution. It is analagous to the NCHW_VECT_C
+  // int8 convolution and fused convolution. It is analogous to the NCHW_VECT_C
   // data format. It is laid out in the same order as OIHW, except that the size
   // of the Input Channels dimension is divided by 4, and a new dimension of
   // size 4 is appended, which packs 4 adjacent input channel weights into an
diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md
index b3ca9583704eb3..5bbbfd32160f71 100644
--- a/tensorflow/docs_src/api_guides/python/reading_data.md
+++ b/tensorflow/docs_src/api_guides/python/reading_data.md
@@ -184,7 +184,7 @@ The recommended way to read a TFRecord file is with a @{tf.data.TFRecordDataset}
     dataset = dataset.map(decode)
 ```
 
-To acomplish the same task with a queue based input pipeline requires the following code 
+To accomplish the same task with a queue based input pipeline requires the following code
 (using the same `decode` function from the above example): 
 
 ``` python
diff --git a/tensorflow/docs_src/community/benchmarks.md b/tensorflow/docs_src/community/benchmarks.md
index 67856ce8698aec..153ef4a015d475 100644
--- a/tensorflow/docs_src/community/benchmarks.md
+++ b/tensorflow/docs_src/community/benchmarks.md
@@ -1,14 +1,14 @@
 # Defining and Running Benchmarks
 
-This guide contains instructions for defining and running a TensorFlow benchmark. These benchmarks store output in [TestResults](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto) format. If these benchmarks are added to TensorFlow github repo, then we will run them daily with our continuous build and display a graph on our dashboard: https://benchmarks-dot-tensorflow-testing.appspot.com/.
+This guide contains instructions for defining and running a TensorFlow benchmark. These benchmarks store output in [TestResults](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto) format. If these benchmarks are added to the TensorFlow github repo, we will run them daily with our continuous build and display a graph on our dashboard: https://benchmarks-dot-tensorflow-testing.appspot.com/.
 
 [TOC]
 
 
 ## Defining a Benchmark
 
-Defining a TensorFlow benchmark requires extending from `tf.test.Benchmark`
-class and calling `self.report_benchmark` method. For example, take a look at the sample benchmark code below:
+Defining a TensorFlow benchmark requires extending the `tf.test.Benchmark`
+class and calling the `self.report_benchmark` method. Below, you'll find an example of benchmark code:
 
 ```python
 import time
@@ -54,20 +54,20 @@ Key points to note in the example above:
 
 ## Running with Python
 
-Use the `--benchmarks` flag to run the benchmark with python. A [BenchmarkEntries](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/util/test_log.proto) proto will be printed.
+Use the `--benchmarks` flag to run the benchmark with Python. A [BenchmarkEntries](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/util/test_log.proto) proto will be printed.
 
 ```
 python sample_benchmark.py --benchmarks=SampleBenchmark
 ```
 
-Setting the flag as `--benchmarks=.` or `--benchmarks=all` would work as well.
+Setting the flag as `--benchmarks=.` or `--benchmarks=all` works as well.
 
-(Please ensure that Tensorflow is installed to successfully import the package in the line `import tensorflow as tf`. For installation instructions, see [Installing TensorFlow](https://www.tensorflow.org/install/). This step is not necessary when running with bazel.)
+(Please ensure that Tensorflow is installed to successfully import the package in the line `import tensorflow as tf`. For installation instructions, see [Installing TensorFlow](https://www.tensorflow.org/install/). This step is not necessary when running with Bazel.)
 
 
 ## Adding a `bazel` Target
 
-We have a special target called `tf_py_logged_benchmark` for benchmarks defined under TensorFlow github repo. `tf_py_logged_benchmark` should wrap around a regular `py_test` target. Running a `tf_py_logged_benchmark` would print a [TestResults](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto) proto. Defining a `tf_py_logged_benchmark` also lets us run it with TensorFlow continuous build.
+We have a special target called `tf_py_logged_benchmark` for benchmarks defined under the TensorFlow github repo. `tf_py_logged_benchmark` should wrap around a regular `py_test` target. Running a `tf_py_logged_benchmark` would print a [TestResults](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto) proto. Defining a `tf_py_logged_benchmark` also lets us run it with TensorFlow continuous build.
 
 First, define a regular `py_test` target. See example below:
 
@@ -82,7 +82,7 @@ py_test(
 )
 ```
 
-You can run benchmarks in a `py_test` target by passing `--benchmarks` flag. The benchmark should just print out a [BenchmarkEntries](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/util/test_log.proto) proto.
+You can run benchmarks in a `py_test` target by passing the `--benchmarks` flag. The benchmark should just print out a [BenchmarkEntries](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/util/test_log.proto) proto.
 
 ```shell
 bazel test :sample_benchmark --test_arg=--benchmarks=all
@@ -90,7 +90,7 @@ bazel test :sample_benchmark --test_arg=--benchmarks=all
 
 
 Now, add the `tf_py_logged_benchmark` target (if available). This target would
-pass in `--benchmarks=all` to the wrapped `py_test` target and provide a way to store output for our TensorFlow continuous build. `tf_py_logged_benchmark` target should be available in TensorFlow repository.
+pass in `--benchmarks=all` to the wrapped `py_test` target and provide a way to store output for our TensorFlow continuous build. The target `tf_py_logged_benchmark` should be available in TensorFlow repository.
 
 ```build
 load("//tensorflow/tools/test:performance.bzl", "tf_py_logged_benchmark")
diff --git a/tensorflow/docs_src/community/swift.md b/tensorflow/docs_src/community/swift.md
index e5e9e8e0a4fb01..d1625d3b93e2a9 100644
--- a/tensorflow/docs_src/community/swift.md
+++ b/tensorflow/docs_src/community/swift.md
@@ -8,7 +8,7 @@ Welcome to the Swift for TensorFlow development community!
 
 Swift for TensorFlow is a new way to develop machine learning models. It
 gives you the power of
-[TensorFlow](programmers_guide/eager) directly
+[TensorFlow](https://www.tensorflow.org) directly
 integrated into the [Swift programming language](https://swift.org/about).
 With Swift, you can write the following imperative code, and Swift
 automatically turns it into **a single TensorFlow Graph** and runs it
diff --git a/tensorflow/docs_src/deploy/s3.md b/tensorflow/docs_src/deploy/s3.md
index ef3b030e3277c1..9ef9674338a905 100644
--- a/tensorflow/docs_src/deploy/s3.md
+++ b/tensorflow/docs_src/deploy/s3.md
@@ -1,6 +1,6 @@
 # How to run TensorFlow on S3
 
-Tensorflow supports reading and writing data to S3. S3 is an object storage API which is nearly ubiquitious, and can help in situations where data must accessed by multiple actors, such as in distributed training.
+Tensorflow supports reading and writing data to S3. S3 is an object storage API which is nearly ubiquitous, and can help in situations where data must accessed by multiple actors, such as in distributed training.
 
 This document guides you through the required setup, and provides examples on usage.
 
diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index c3795492cef7d6..1b028be4ea16af 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -863,48 +863,53 @@ REGISTER_OP("ZeroOut")
 Instead of writing another `OpKernel` with redundant code as above, often you
 will be able to use a C++ template instead.  You will still have one kernel
 registration (`REGISTER_KERNEL_BUILDER` call) per overload.
-<pre class="prettyprint"><code class="lang-cpp">
-<b>template &lt;typename T&gt;</b>
+```c++
+template <typename T>
 class ZeroOutOp : public OpKernel {
  public:
-  explicit ZeroOutOp(OpKernelConstruction\* context) : OpKernel(context) {}<br/>
-  void Compute(OpKernelContext\* context) override {
+  explicit ZeroOutOp(OpKernelConstruction* context) : OpKernel(context) {}
+  
+  void Compute(OpKernelContext* context) override {
     // Grab the input tensor
-    const Tensor& input\_tensor = context-&gt;input(0);
-    auto input = input\_tensor.flat<b>&lt;T&gt;</b>();<br/>
+    const Tensor& input_tensor = context->input(0);
+    auto input = input_tensor.flat<T>();
+    
     // Create an output tensor
     Tensor* output = NULL;
-    OP\_REQUIRES\_OK(context,
-                   context-&gt;allocate\_output(0, input_tensor.shape(), &output));
-    auto output\_flat = output-&gt;template flat<b>&lt;T&gt;</b>();<br/>
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, input_tensor.shape(), &output));
+    auto output_flat = output->template flat<T>();
+    
     // Set all the elements of the output tensor to 0
     const int N = input.size();
-    for (int i = 0; i &lt; N; i++) {
-      output\_flat(i) = 0;
-    }<br/>
+    for (int i = 0; i < N; i++) {
+      output_flat(i) = 0;
+    }
+    
     // Preserve the first input value
-    if (N &gt; 0) output\_flat(0) = input(0);
+    if (N > 0) output_flat(0) = input(0);
   }
-};<br/>
-// Note that TypeConstraint&lt;int32&gt;("T") means that attr "T" (defined
+};
+
+// Note that TypeConstraint<int32>("T") means that attr "T" (defined
 // in the op registration above) must be "int32" to use this template
-// instantiation.</b>
-REGISTER\_KERNEL\_BUILDER(
+// instantiation.
+REGISTER_KERNEL_BUILDER(
     Name("ZeroOut")
-    .Device(DEVICE\_CPU)
-    .TypeConstraint&lt;int32&gt;("T"),
-    <b>ZeroOutOp&lt;int32&gt;</b>);
-REGISTER\_KERNEL\_BUILDER(
+    .Device(DEVICE_CPU)
+    .TypeConstraint<int32>("T"),
+    ZeroOutOp<int32>);
+REGISTER_KERNEL_BUILDER(
     Name("ZeroOut")
-    .Device(DEVICE\_CPU)
-    .TypeConstraint&lt;float&gt;("T"),
-    <b>ZeroOutOp&lt;float&gt;</b>);
-<b>REGISTER\_KERNEL\_BUILDER(
+    .Device(DEVICE_CPU)
+    .TypeConstraint<float>("T"),
+    ZeroOutOp<float>);
+REGISTER_KERNEL_BUILDER(
     Name("ZeroOut")
-    .Device(DEVICE\_CPU)
-    .TypeConstraint&lt;double&gt;("T"),
-    ZeroOutOp&lt;double&gt;);
-</b></code></pre>
+    .Device(DEVICE_CPU)
+    .TypeConstraint<double>("T"),
+    ZeroOutOp<double>);
+```
 
 If you have more than a couple overloads, you can put the registration in a
 macro.
diff --git a/tensorflow/docs_src/extend/architecture.md b/tensorflow/docs_src/extend/architecture.md
index c0fc714a4405d6..c8f522a03ab0c1 100644
--- a/tensorflow/docs_src/extend/architecture.md
+++ b/tensorflow/docs_src/extend/architecture.md
@@ -4,8 +4,8 @@ We designed TensorFlow for large-scale distributed training and inference, but
 it is also flexible enough to support experimentation with new machine
 learning models and system-level optimizations.
 
-This document describes the system architecture that makes possible this
-combination of scale and flexibility. It assumes that you have basic familiarity
+This document describes the system architecture that makes this
+combination of scale and flexibility possible. It assumes that you have basic familiarity
 with TensorFlow programming concepts such as the computation graph, operations,
 and sessions. See @{$programmers_guide/low_level_intro$this document}
 for an introduction to these topics. Some familiarity
@@ -15,8 +15,8 @@ will also be helpful.
 This document is for developers who want to extend TensorFlow in some way not
 supported by current APIs, hardware engineers who want to optimize for
 TensorFlow, implementers of machine learning systems working on scaling and
-distribution, or anyone who wants to look under Tensorflow's hood. After
-reading it you should understand TensorFlow architecture well enough to read
+distribution, or anyone who wants to look under Tensorflow's hood. By the end of this document 
+you should understand the TensorFlow architecture well enough to read
 and modify the core TensorFlow code.
 
 ## Overview
@@ -35,7 +35,7 @@ This document focuses on the following layers:
 *  **Client**:
    *  Defines the computation as a dataflow graph.
    *  Initiates graph execution using a [**session**](
-      https://www.tensorflow.org/code/tensorflow/python/client/session.py)
+      https://www.tensorflow.org/code/tensorflow/python/client/session.py).
 *  **Distributed Master**
    *  Prunes a specific subgraph from the graph, as defined by the arguments
       to Session.run().
@@ -55,7 +55,7 @@ Figure 2 illustrates the interaction of these components. "/job:worker/task:0" a
 server": a task responsible for storing and updating the model's parameters.
 Other tasks send updates to these parameters as they work on optimizing the
 parameters. This particular division of labor between tasks is not required, but
-it is common for distributed training.
+ is common for distributed training.
 
 ![TensorFlow Architecture Diagram](https://www.tensorflow.org/images/diag1.svg){: width="500"}
 
@@ -193,7 +193,7 @@ https://www.tensorflow.org/code/tensorflow/contrib/nccl/python/ops/nccl_ops.py))
 
 ## Kernel Implementations
 
-The runtime contains over 200 standard operations, including mathematical, array
+The runtime contains over 200 standard operations including mathematical, array
 manipulation, control flow, and state management operations. Each of these
 operations can have kernel implementations optimized for a variety of devices.
 Many of the operation kernels are implemented using Eigen::Tensor, which uses
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index 8c165aad52499a..1abd840ab3ca3f 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@ enable TensorFlow for C:
          OS="linux" # Change to "darwin" for macOS
          TARGET_DIRECTORY="/usr/local"
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" |
            sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 26cbcc9a9b0a99..52a2a3f8a68dd5 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
          TF_TYPE="cpu" # Change to "gpu" for GPU support
          TARGET_DIRECTORY='/usr/local'
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" |
          sudo tar -C $TARGET_DIRECTORY -xz
 
      The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index 05b28787017487..1256fb99c4307c 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>tensorflow</artifactId>
-  <version>1.8.0-rc1</version>
+  <version>1.8.0</version>
 </dependency>
 ```
 
@@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
                <dependency>
                  <groupId>org.tensorflow</groupId>
                  <artifactId>tensorflow</artifactId>
-                 <version>1.8.0-rc1</version>
+                 <version>1.8.0</version>
                </dependency>
              </dependencies>
          </project>
@@ -124,12 +124,12 @@ instead:
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>libtensorflow</artifactId>
-  <version>1.8.0-rc1</version>
+  <version>1.8.0</version>
 </dependency>
 <dependency>
   <groupId>org.tensorflow</groupId>
   <artifactId>libtensorflow_jni_gpu</artifactId>
-  <version>1.8.0-rc1</version>
+  <version>1.8.0</version>
 </dependency>
 ```
 
@@ -148,7 +148,7 @@ refer to the simpler instructions above instead.
 Take the following steps to install TensorFlow for Java on Linux or macOS:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar),
      which is the TensorFlow Java Archive (JAR).
 
   2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
          OS=$(uname -s | tr '[:upper:]' '[:lower:]')
          mkdir -p ./jni
          curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" |
            tar -xz -C ./jni
 
 ### Install on Windows
@@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
 Take the following steps to install TensorFlow for Java on Windows:
 
   1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar),
      which is the TensorFlow Java Archive (JAR).
   2. Download the following Java Native Interface (JNI) file appropriate for
-     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip).
+     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip).
   3. Extract this .zip file.
 
 
@@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the
 downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
 as follows:
 
-<pre><b>javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.8.0.jar HelloTF.java</b></pre>
 
 
 ### Running
@@ -241,11 +241,11 @@ two files are available to the JVM:
 For example, the following command line executes the `HelloTF` program on Linux
 and macOS X:
 
-<pre><b>java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
 
 And the following command line executes the `HelloTF` program on Windows:
 
-<pre><b>java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
 
 If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
 installed TensorFlow for Java and are ready to use the API.  If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index 9d9322dbb59e2b..0ed81600279b17 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -438,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
 
      <pre>
      (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl</b></pre>
 
 <a name="ValidateYourInstallation"></a>
 ## Validate your installation
@@ -684,14 +684,14 @@ This section documents the relevant values for Linux installations.
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -703,14 +703,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
 </pre>
 
 Note that GPU support requires the NVIDIA hardware and software described in
@@ -722,14 +722,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
 </pre>
 
 
@@ -741,14 +741,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
 GPU support:
 
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
 </pre>
 
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index 0906b550086c8e..29a867a9e300b7 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv:
      TensorFlow in the active Virtualenv is as follows:
 
      <pre> $ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl</b></pre>
 
 If you encounter installation problems, see
 [Common Installation Problems](#common-installation-problems).
@@ -242,7 +242,7 @@ take the following steps:
      issue the following command:
 
      <pre> $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b> </pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl</b> </pre>
 
      If the preceding command fails, see
      [installation problems](#common-installation-problems).
@@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      TensorFlow for Python 2.7:
 
      <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl</b></pre>
 
 
 <a name="ValidateYourInstallation"></a>
@@ -522,7 +522,7 @@ The value you specify depends on your Python version.
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
 </pre>
 
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-a
 
 
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
 </pre>
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index 8bbdf013ca4a7d..5ba522b436137b 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package.
 The filename of the `.whl` file depends on your platform.
 For example, the following command will install the pip package
 
-for TensorFlow 1.8.0rc1 on Linux:
+for TensorFlow 1.8.0 on Linux:
 
 <pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl</b>
 </pre>
 
 ## Validate your installation
diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md
index 1b0b9b44b469af..241f01d460ae35 100644
--- a/tensorflow/docs_src/mobile/mobile_intro.md
+++ b/tensorflow/docs_src/mobile/mobile_intro.md
@@ -212,7 +212,7 @@ handle the task then it will be difficult to train a computer to do better.
 
 After you’ve solved any fundamental issues with your use case, you need to
 create a labeled dataset to define what problem you’re trying to solve. This
-step is extremely important, moreso than picking which model to use. You want it
+step is extremely important, more than picking which model to use. You want it
 to be as representative as possible of your actual use case, since the model
 will only be effective at the task you teach it. It’s also worth investing in
 tools to make labeling the data as efficient and accurate as possible. For
diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md
index 01881ccf3bb15b..56220348276399 100644
--- a/tensorflow/docs_src/mobile/tflite/index.md
+++ b/tensorflow/docs_src/mobile/tflite/index.md
@@ -155,7 +155,7 @@ retraining for both floating point and quantized inference.
 
 The following diagram shows the architectural design of TensorFlow Lite:
 
-<img src="/images/tflite-architecture.jpg"
+<img src="https://www.tensorflow.org/images/tflite-architecture.jpg"
      alt="TensorFlow Lite architecture diagram"
      style="max-width:600px;">
 
diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md
index 51c1a1e032baae..b6291a9fface40 100644
--- a/tensorflow/docs_src/programmers_guide/faq.md
+++ b/tensorflow/docs_src/programmers_guide/faq.md
@@ -72,7 +72,7 @@ tensors in the execution of a step.
 
 If `t` is a @{tf.Tensor} object,
 @{tf.Tensor.eval} is shorthand for
-@{tf.Session.run} (where `sess` is the
+@{tf.Session.run}, where `sess` is the
 current @{tf.get_default_session}. The
 two following snippets of code are equivalent:
 
@@ -101,9 +101,8 @@ sessions, it may be more straightforward to make explicit calls to
 Sessions can own resources, such as
 @{tf.Variable},
 @{tf.QueueBase}, and
-@{tf.ReaderBase}; and these resources can use
-a significant amount of memory. These resources (and the associated memory) are
-released when the session is closed, by calling
+@{tf.ReaderBase}. These resources can sometimes use
+a significant amount of memory, and can be released when the session is closed by calling
 @{tf.Session.close}.
 
 The intermediate tensors that are created as part of a call to
@@ -137,7 +136,7 @@ TensorFlow also has a
 to help build support for more client languages.  We invite contributions of new
 language bindings.
 
-Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the opensource community build on top of the C API supported by the TensorFlow maintainers.
+Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the open source community build on top of the C API supported by the TensorFlow maintainers.
 
 #### Does TensorFlow make use of all the devices (GPUs and CPUs) available on my machine?
 
@@ -210,8 +209,8 @@ a new tensor with a different dynamic shape.
 
 #### How do I build a graph that works with variable batch sizes?
 
-It is often useful to build a graph that works with variable batch sizes, for
-example so that the same code can be used for (mini-)batch training, and
+It is often useful to build a graph that works with variable batch sizes 
+so that the same code can be used for (mini-)batch training, and
 single-instance inference. The resulting graph can be
 @{tf.Graph.as_graph_def$saved as a protocol buffer}
 and
@@ -260,7 +259,7 @@ See the how-to documentation for
 There are three main options for dealing with data in a custom format.
 
 The easiest option is to write parsing code in Python that transforms the data
-into a numpy array. Then use @{tf.data.Dataset.from_tensor_slices} to
+into a numpy array. Then, use @{tf.data.Dataset.from_tensor_slices} to
 create an input pipeline from the in-memory data.
 
 If your data doesn't fit in memory, try doing the parsing in the Dataset
@@ -274,7 +273,7 @@ If your data is not easily parsable with the built-in TensorFlow operations,
 consider converting it, offline, to a format that is easily parsable, such
 as @{tf.python_io.TFRecordWriter$`TFRecord`} format.
 
-The more efficient method to customize the parsing behavior is to
+The most efficient method to customize the parsing behavior is to
 @{$adding_an_op$add a new op written in C++} that parses your
 data format. The @{$new_data_formats$guide to handling new data formats} has
 more information about the steps for doing this.
diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md
index 58a80d533927e4..1248c3cabe23c8 100644
--- a/tensorflow/docs_src/programmers_guide/tensors.md
+++ b/tensorflow/docs_src/programmers_guide/tensors.md
@@ -265,7 +265,7 @@ example:
 ```python
 constant = tf.constant([1, 2, 3])
 tensor = constant * constant
-print tensor.eval()
+print(tensor.eval())
 ```
 
 The `eval` method only works when a default `tf.Session` is active (see
@@ -306,8 +306,8 @@ Note that you rarely want to use the following pattern when printing a
 
 ``` python
 t = <<some tensorflow operation>>
-print t  # This will print the symbolic tensor when the graph is being built.
-         # This tensor does not have a value in this context.
+print(t)  # This will print the symbolic tensor when the graph is being built.
+          # This tensor does not have a value in this context.
 ```
 
 This code prints the `tf.Tensor` object (which represents deferred computation)
diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md
index e8cf7711552f4c..cd8c4b5b9a026f 100644
--- a/tensorflow/docs_src/programmers_guide/variables.md
+++ b/tensorflow/docs_src/programmers_guide/variables.md
@@ -237,7 +237,7 @@ TensorFlow supports two ways of sharing variables:
 While code which explicitly passes variables around is very clear, it is
 sometimes convenient to write TensorFlow functions that implicitly use
 variables in their implementations. Most of the functional layers from
-`tf.layer` use this approach, as well as all `tf.metrics`, and a few other
+`tf.layers` use this approach, as well as all `tf.metrics`, and a few other
 library utilities.
 
 Variable scopes allow you to control variable reuse when calling functions which
diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md
index ead5a636b99a02..0f17899dae7ccd 100644
--- a/tensorflow/docs_src/tutorials/layers.md
+++ b/tensorflow/docs_src/tutorials/layers.md
@@ -209,7 +209,6 @@ for two-dimensional image data expect input tensors to have a shape of
 *   _`channels`_. Number of color channels in the example images. For color
     images, the number of channels is 3 (red, green, blue). For monochrome
     images, there is just 1 channel (black).
-*   _`image_height`_. Height of the example images.
 *   _`data_format`_. A string, one of `channels_last` (default) or `channels_first`.
       `channels_last` corresponds to inputs with shape
       `(batch, ..., channels)` while `channels_first` corresponds to
diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py
index 9e21aee87f6298..a40a9eaecbd9bb 100644
--- a/tensorflow/examples/learn/text_classification_cnn.py
+++ b/tensorflow/examples/learn/text_classification_cnn.py
@@ -73,7 +73,7 @@ def cnn_model(features, labels, mode):
         kernel_size=FILTER_SHAPE2,
         padding='VALID')
     # Max across each filter to get useful features for classification.
-    pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
+    pool2 = tf.squeeze(tf.reduce_max(conv2, 1), axis=[1])
 
   # Apply regular WX + B and classification.
   logits = tf.layers.dense(pool2, MAX_LABEL, activation=None)
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index a503b3b00af71f..36db3dda6bcf07 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -21321,7 +21321,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
 //    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
 //
 // The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+// non-finite input values.  It is a `uint8` 1-D tensor of length `channels`.
 // Each element must be in the range `[0, 255]` (It represents the value of a
 // pixel in the output image).  Non-finite values in the input tensor are
 // replaced by this tensor in the output image.  The default value is the color
diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py
index 9af2e9b8b62abe..32e08021dc80d1 100644
--- a/tensorflow/python/data/util/nest.py
+++ b/tensorflow/python/data/util/nest.py
@@ -103,7 +103,7 @@ def is_sequence(seq):
   NOTE(mrry): This differs from `tensorflow.python.util.nest.is_sequence()`,
   which *does* treat a Python list as a sequence. For ergonomic
   reasons, `tf.data` users would prefer to treat lists as
-  implict `tf.Tensor` objects, and dicts as (nested) sequences.
+  implicit `tf.Tensor` objects, and dicts as (nested) sequences.
 
   Args:
     seq: an input sequence.
diff --git a/tensorflow/python/debug/cli/curses_ui.py b/tensorflow/python/debug/cli/curses_ui.py
index f66cefb427c9cc..7b87972d694981 100644
--- a/tensorflow/python/debug/cli/curses_ui.py
+++ b/tensorflow/python/debug/cli/curses_ui.py
@@ -190,8 +190,6 @@ def layout(self):
     return layout
 
   def get_click_command(self, mouse_y):
-    # TODO(cais): Support continuous scrolling when the mouse button is held
-    # down.
     if self._output_num_rows <= 1:
       return None
     elif mouse_y == self._min_y:
@@ -271,6 +269,10 @@ class CursesUI(base_ui.BaseUI):
 
   _UI_WAIT_MESSAGE = "Processing..."
 
+  # The delay (in ms) between each update of the scroll bar when the mouse
+  # button is held down on the scroll bar. Controls how fast the screen scrolls.
+  _MOUSE_SCROLL_DELAY_MS = 100
+
   _single_instance_lock = threading.Lock()
 
   def __init__(self, on_ui_exit=None, config=None):
@@ -855,7 +857,30 @@ def _on_textbox_keypress(self, x):
       except curses.error:
         mouse_event_type = None
 
-      if mouse_event_type == curses.BUTTON1_RELEASED:
+      if mouse_event_type == curses.BUTTON1_PRESSED:
+        # Logic for held mouse-triggered scrolling.
+        if mouse_x >= self._max_x - 2:
+          # Disable blocking on checking for user input.
+          self._command_window.nodelay(True)
+
+          # Loop while mouse button is pressed.
+          while mouse_event_type == curses.BUTTON1_PRESSED:
+            # Sleep for a bit.
+            curses.napms(self._MOUSE_SCROLL_DELAY_MS)
+            scroll_command = self._scroll_bar.get_click_command(mouse_y)
+            if scroll_command in (_SCROLL_UP_A_LINE, _SCROLL_DOWN_A_LINE):
+              self._scroll_output(scroll_command)
+
+            # Check to see if different mouse event is in queue.
+            self._command_window.getch()
+            try:
+              _, _, _, _, mouse_event_type = self._screen_getmouse()
+            except curses.error:
+              pass
+
+          self._command_window.nodelay(False)
+          return x
+      elif mouse_event_type == curses.BUTTON1_RELEASED:
         # Logic for mouse-triggered scrolling.
         if mouse_x >= self._max_x - 2:
           scroll_command = self._scroll_bar.get_click_command(mouse_y)
@@ -1677,4 +1702,7 @@ def _set_mouse_enabled(self, enabled):
       self._redraw_output()
 
   def _screen_set_mousemask(self):
-    curses.mousemask(self._mouse_enabled)
+    if self._mouse_enabled:
+      curses.mousemask(curses.BUTTON1_RELEASED | curses.BUTTON1_PRESSED)
+    else:
+      curses.mousemask(0)
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 9b4b8666974aff..347a760333a1bd 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1163,7 +1163,7 @@ def _train_model_distributed(self, input_fn, hooks, saving_listeners):
             model_fn_lib.ModeKeys.TRAIN,
             self.config)
 
-        # TODO(anjalisridhar): Figure out how to resolve the folowing scaffold
+        # TODO(anjalisridhar): Figure out how to resolve the following scaffold
         # parameters: init_feed_dict, init_fn.
         scaffold_list = self._distribution.unwrap(
             grouped_estimator_spec.scaffold)
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
index 8e5d8141a1a15d..8e2ec83020abc5 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
@@ -52,7 +52,7 @@ def _fill_array(arr, seq, fillvalue=0):
   If length of seq is less than arr padded length, fillvalue used.
   Args:
     arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len].
-    seq: Non-padded list of data sampels of shape
+    seq: Non-padded list of data samples of shape
       [batch_size, ..., padded_dim(None)]
     fillvalue: Default fillvalue to use.
   """
diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 9961fa74c2184d..7bcf3d84bb4218 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -74,7 +74,7 @@ def _any_variable_initalized():
   """Check if any variable has been initialized in the Keras model.
 
   Returns:
-    boolean, True if at least one variable has been initalized, else False.
+    boolean, True if at least one variable has been initialized, else False.
   """
   variables = variables_module.global_variables()
   for v in variables:
diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py
index 08fff3ba647bb5..522662cd328d7b 100644
--- a/tensorflow/python/estimator/training.py
+++ b/tensorflow/python/estimator/training.py
@@ -597,7 +597,7 @@ def after_save(self, session, global_step_value):
     # max_steps, the evaluator will send the final export signal. There is a
     # small chance that the Estimator.train stopping logic sees a different
     # global_step value (due to global step race condition and the fact the
-    # saver sees a larger value for checkpoing saving), which does not end
+    # saver sees a larger value for checkpoint saving), which does not end
     # the training. When the training ends, a new checkpoint is generated, which
     # triggers the listener again. So, it could be the case the final export is
     # triggered twice.
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index ede6e0d15957da..ffcb9990d52c2e 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -48,7 +48,7 @@
 
       embedded_dept_column = embedding_column(
           categorical_column_with_vocabulary_list(
-              "department", ["math", "philosphy", ...]), dimension=10)
+              "department", ["math", "philosophy", ...]), dimension=10)
 
   * Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`).
 
@@ -280,7 +280,7 @@ def input_layer(features,
 # TODO(akshayka): InputLayer should be a subclass of Layer, and it
 # should implement the logic in input_layer using Layer's build-and-call
 # paradigm; input_layer should create an instance of InputLayer and
-# return the result of inovking its apply method, just as functional layers do.
+# return the result of invoking its apply method, just as functional layers do.
 class InputLayer(object):
   """An object-oriented version of `input_layer` that reuses variables."""
 
@@ -834,7 +834,7 @@ def shared_embedding_columns(
     tensor_name_in_ckpt=None, max_norm=None, trainable=True):
   """List of dense columns that convert from sparse, categorical input.
 
-  This is similar to `embedding_column`, except that that it produces a list of
+  This is similar to `embedding_column`, except that it produces a list of
   embedding columns that share the same embedding weights.
 
   Use this when your inputs are sparse and of the same type (e.g. watched and
diff --git a/tensorflow/python/framework/fast_tensor_util.pyx b/tensorflow/python/framework/fast_tensor_util.pyx
index 19928314efe143..17d112a1ece9ae 100644
--- a/tensorflow/python/framework/fast_tensor_util.pyx
+++ b/tensorflow/python/framework/fast_tensor_util.pyx
@@ -7,6 +7,18 @@ cimport numpy as np
 from tensorflow.python.util import compat
 
 
+def AppendFloat16ArrayToTensorProto(
+    # For numpy, npy_half is a typedef for npy_uint16,
+    # see: https://github.com/numpy/numpy/blob/master/doc/source/reference/c-api.coremath.rst#half-precision-functions
+    # Because np.float16_t dosen't exist in cython, we use uint16_t here.
+    # TODO: Use np.float16_t when cython supports it.
+    tensor_proto, np.ndarray[np.uint16_t, ndim=1] nparray):
+  cdef long i, n
+  n = nparray.size
+  for i in range(n):
+    tensor_proto.half_val.append(nparray[i])
+
+
 def AppendFloat32ArrayToTensorProto(
     tensor_proto, np.ndarray[np.float32_t, ndim=1] nparray):
   cdef long i, n
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 80140e4063d9a4..9fc813634838b6 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -2582,7 +2582,7 @@ def set_shape_and_handle_data_for_outputs(op):
 
   When _USE_C_API = True, this is lazily called when a tensor's shape is first
   requested. Usually this should work automatically, but some edge cases may
-  require manaully calling this first to make sure Tensor._shape_val and
+  require manually calling this first to make sure Tensor._shape_val and
   Tensor._handle_data are set (e.g. manually overriding _handle_data, copying a
   Tensor).
   """
@@ -5426,36 +5426,30 @@ def enable_eager_execution(config=None, device_policy=None,
       in which operations are executed. Note that @{tf.ConfigProto} is also
       used to configure graph execution (via @{tf.Session}) and many options
       within `tf.ConfigProto` are not implemented (or are irrelevant) when
-     eager execution is enabled.
+      eager execution is enabled.
     device_policy: (Optional.) Policy controlling how operations requiring
-     inputs on a specific device (e.g., a GPU 0) handle inputs on a different
-     device  (e.g. GPU 1 or CPU). When set to None, an appropriate value will be
-     picked automatically. The value picked may change between TensorFlow
-     releases.
-     Valid values:
-
+      inputs on a specific device (e.g., a GPU 0) handle inputs on a different
+      device  (e.g. GPU 1 or CPU). When set to None, an appropriate value will be
+      picked automatically. The value picked may change between TensorFlow
+      releases.
+      Valid values:
       - tf.contrib.eager.DEVICE_PLACEMENT_EXPLICIT: raises an error if the
         placement is not correct.
-
       - tf.contrib.eager.DEVICE_PLACEMENT_WARN: copies the tensors which are not
         on the right device but logs a warning.
-
       - tf.contrib.eager.DEVICE_PLACEMENT_SILENT: silently copies the tensors.
         Note that this may hide performance problems as there is no notification
         provided when operations are blocked on the tensor being copied between
         devices.
-
       - tf.contrib.eager.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies
         int32 tensors, raising errors on the other ones.
     execution_mode: (Optional.) Policy controlling how operations dispatched are
       actually executed. When set to None, an appropriate value will be picked
       automatically. The value picked may change between TensorFlow releases.
       Valid values:
-
-        - tf.contrib.eager.SYNC: executes each operation synchronously.
-
-        - tf.contrib.eager.ASYNC: executes each operation asynchronously. These
-          operations may return "non-ready" handles.
+      - tf.contrib.eager.SYNC: executes each operation synchronously.
+      - tf.contrib.eager.ASYNC: executes each operation asynchronously. These
+        operations may return "non-ready" handles.
 
   Raises:
     ValueError: If eager execution is enabled after creating/executing a
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 8cf24206edab8b..ca63efbc84dab2 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -50,6 +50,13 @@ def SlowAppendFloat16ArrayToTensorProto(tensor_proto, proto_values):
       [ExtractBitsFromFloat16(x) for x in proto_values])
 
 
+def _MediumAppendFloat16ArrayToTensorProto(tensor_proto, proto_values):
+  # TODO: Remove the conversion if cython supports np.float16_t
+  fast_tensor_util.AppendFloat16ArrayToTensorProto(
+      tensor_proto,
+      np.asarray(proto_values, dtype=np.float16).view(np.uint16))
+
+
 def ExtractBitsFromBFloat16(x):
   return np.asscalar(
       np.asarray(x, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16))
@@ -64,11 +71,8 @@ def SlowAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values):
   _NP_TO_APPEND_FN = {
       dtypes.bfloat16.as_numpy_dtype:
           SlowAppendBFloat16ArrayToTensorProto,
-      # TODO(sesse): We should have a
-      # fast_tensor_util.AppendFloat16ArrayToTensorProto,
-      # but it seems np.float16_t doesn't exist?
       np.float16:
-          SlowAppendFloat16ArrayToTensorProto,
+          _MediumAppendFloat16ArrayToTensorProto,
       np.float32:
           fast_tensor_util.AppendFloat32ArrayToTensorProto,
       np.float64:
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 97cd22e47a0959..5b01df48fea19f 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -682,7 +682,7 @@ def test_foo(self):
 
 
   Args:
-    __unused__: Prevents sliently skipping tests.
+    __unused__: Prevents silently skipping tests.
     config: An optional config_pb2.ConfigProto to use to configure the
       session when executing graphs.
     use_gpu: If True, attempt to run as many operations as possible on GPU.
diff --git a/tensorflow/python/keras/utils/__init__.py b/tensorflow/python/keras/utils/__init__.py
index 7b5eecc153fb19..69337b6a8d52ab 100644
--- a/tensorflow/python/keras/utils/__init__.py
+++ b/tensorflow/python/keras/utils/__init__.py
@@ -20,6 +20,7 @@
 
 from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer
 from tensorflow.python.keras.utils.data_utils import get_file
+from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer
 from tensorflow.python.keras.utils.data_utils import Sequence
 from tensorflow.python.keras.utils.data_utils import SequenceEnqueuer
 from tensorflow.python.keras.utils.generic_utils import custom_object_scope
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 72cc357c71dbdb..3dfad9c130ca12 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -741,6 +741,18 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "regex_full_match_op_test",
+    size = "small",
+    srcs = ["regex_full_match_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:string_ops",
+    ],
+)
+
 tf_py_test(
     name = "save_restore_ops_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index e2e6205911caa0..fcba456004407b 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -31,9 +31,7 @@ class Conv1DTest(test.TestCase):
 
   def testBasic(self):
     """Test that argument passing to conv1d is handled properly."""
-    # TODO(yongtang): dtypes.float64 can only be enabled once conv2d support
-    # dtypes.float64, as conv1d implicitly calls conv2d after expand_dims.
-    for dtype in [dtypes.float16, dtypes.float32]:
+    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
       x = constant_op.constant([1, 2, 3, 4], dtype=dtype)
       x = array_ops.expand_dims(x, 0)  # Add batch dimension
       x = array_ops.expand_dims(x, 2)  # And depth dimension
diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py
index 8973a450fa246e..289ae29fcec724 100644
--- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py
+++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py
@@ -131,6 +131,23 @@ def testConv3DTransposeShapeMismatch(self):
     nn_ops.conv3d_transpose(
         x_value, f_value, y_shape, strides, data_format='NCDHW')
 
+  def testConv3DTransposeOutputShapeType(self):
+    # Test case for GitHub issue 18887
+    for dtype in [dtypes.int32, dtypes.int64]:
+      with self.test_session():
+        x_shape = [2, 5, 6, 4, 3]
+        y_shape = [2, 5, 6, 4, 2]
+        f_shape = [3, 3, 3, 2, 3]
+        strides = [1, 1, 1, 1, 1]
+        x_value = constant_op.constant(
+            1.0, shape=x_shape, name="x", dtype=dtypes.float32)
+        f_value = constant_op.constant(
+            1.0, shape=f_shape, name="filter", dtype=dtypes.float32)
+        output = nn_ops.conv3d_transpose(
+            x_value, f_value, constant_op.constant(y_shape, dtype=dtype),
+            strides=strides, padding="SAME")
+        output.eval()
+
   def testConv3DTransposeValid(self):
     with self.test_session():
       strides = [1, 2, 2, 2, 1]
diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py
index 8e5556d0a0421d..63d19c15cfdc8c 100644
--- a/tensorflow/python/kernel_tests/distributions/util_test.py
+++ b/tensorflow/python/kernel_tests/distributions/util_test.py
@@ -735,7 +735,7 @@ def _fill_triangular(self, x, upper=False):
       raise ValueError("Invalid shape.")
     n = np.int32(n)
     # We can't do: `x[..., -(n**2-m):]` because this doesn't correctly handle
-    # `m == n == 1`. Hence, we do absoulte indexing.
+    # `m == n == 1`. Hence, we do absolute indexing.
     x_tail = x[..., (m - (n * n - m)):]
     y = np.concatenate(
         [x, x_tail[..., ::-1]] if upper else [x_tail, x[..., ::-1]],
diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py
index f31426713c49ba..dc3ea386714c98 100644
--- a/tensorflow/python/kernel_tests/manip_ops_test.py
+++ b/tensorflow/python/kernel_tests/manip_ops_test.py
@@ -93,7 +93,7 @@ def testComplexTypes(self):
   def testNegativeAxis(self):
     self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1)
     self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2)
-    # Make sure negative axis shoudl be 0 <= axis + dims < dims
+    # Make sure negative axis should be 0 <= axis + dims < dims
     with self.test_session():
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "is out of range"):
diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py
new file mode 100644
index 00000000000000..5daae1b79bf493
--- /dev/null
+++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py
@@ -0,0 +1,54 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for RegexFullMatch op from string_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+
+
+class RegexFullMatchOpTest(test.TestCase):
+
+  def testRegexFullMatch(self):
+    values = ["abaaba", "abcdabcde"]
+    with self.test_session():
+      input_vector = constant_op.constant(values, dtypes.string)
+      matched = string_ops.regex_full_match(input_vector, "a.*a").eval()
+      self.assertAllEqual([True, False], matched)
+
+  def testEmptyMatch(self):
+    values = ["abc", "1"]
+    with self.test_session():
+      input_vector = constant_op.constant(values, dtypes.string)
+      matched = string_ops.regex_full_match(input_vector, "").eval()
+      self.assertAllEqual([False, False], matched)
+
+  def testInvalidPattern(self):
+    values = ["abc", "1"]
+    with self.test_session():
+      input_vector = constant_op.constant(values, dtypes.string)
+      invalid_pattern = "A["
+      matched = string_ops.regex_full_match(input_vector, invalid_pattern)
+      with self.assertRaisesOpError("Invalid pattern"):
+        matched.eval()
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 3bca5fadc42693..794be096b7309a 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -91,16 +91,18 @@ def testValues(self):
     ]
 
     # Each item is np_op1, np_op2, tf_op
-    ops_list = [(np.add, None, math_ops.segment_sum), (self._mean_cum_op,
-                                                       self._mean_reduce_op,
-                                                       math_ops.segment_mean),
+    ops_list = [(np.add, None, math_ops.segment_sum),
+                (self._mean_cum_op, self._mean_reduce_op,
+                 math_ops.segment_mean),
                 (np.ndarray.__mul__, None, math_ops.segment_prod),
                 (np.minimum, None, math_ops.segment_min),
                 (np.maximum, None, math_ops.segment_max)]
 
     # A subset of ops has been enabled for complex numbers
     complex_ops_list = [(np.add, None, math_ops.segment_sum),
-                        (np.ndarray.__mul__, None, math_ops.segment_prod)]
+                        (np.ndarray.__mul__, None, math_ops.segment_prod),
+                        (self._mean_cum_op, self._mean_reduce_op,
+                         math_ops.segment_mean)]
 
     n = 10
     shape = [n, 2]
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 340c34fc5e6bf0..eda036ece4a7d7 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -191,6 +191,16 @@ def add_weight(self, name, shape, dtype=None,
       RuntimeError: If called with partioned variable regularization and
         eager execution is enabled.
     """
+    
+    def _should_add_regularizer(variable, existing_variable_set):
+      if isinstance(variable, tf_variables.PartitionedVariable):
+        for var in variable:
+          if var in existing_variable_set:
+            return False
+        return True
+      else:
+        return variable not in existing_variable_set
+
     init_graph = None
     if not context.executing_eagerly():
       default_graph = ops.get_default_graph()
@@ -233,7 +243,8 @@ def add_weight(self, name, shape, dtype=None,
             getter=vs.get_variable)
 
         if regularizer:
-          if context.executing_eagerly() or variable not in existing_variables:
+          if context.executing_eagerly() or _should_add_regularizer(
+              variable, existing_variables):
             self._handle_weight_regularization(name, variable, regularizer)
 
         if init_graph is not None:
@@ -353,4 +364,3 @@ def _add_elements_to_collection(elements, collection_list):
     for element in elements:
       if element not in collection_set:
         collection.append(element)
-
diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py
index f08b552840f5ff..ab49e37b90e183 100644
--- a/tensorflow/python/layers/base_test.py
+++ b/tensorflow/python/layers/base_test.py
@@ -30,6 +30,7 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
@@ -95,6 +96,21 @@ def testAddWeight(self):
           regularizer=regularizer)
       self.assertEqual(len(layer.losses), 1)
 
+  def testReusePartitionedVaraiblesAndRegularizers(self):
+    regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
+    partitioner = partitioned_variables.fixed_size_partitioner(3)
+    for reuse in [False, True]:
+      with variable_scope.variable_scope(variable_scope.get_variable_scope(),
+                                         partitioner=partitioner,
+                                         reuse=reuse):
+        layer = base_layers.Layer(name='my_layer')
+        variable = layer.add_variable(
+            'reg_part_var', [4, 4],
+            initializer=init_ops.zeros_initializer(),
+            regularizer=regularizer)
+    self.assertEqual(
+        len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 3)
+
   def testNoEagerActivityRegularizer(self):
     with context.eager_mode():
       with self.assertRaisesRegexp(ValueError, 'activity_regularizer'):
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index ab5997e85c6030..3a31ef7f881490 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1285,7 +1285,7 @@ def reduce_sum(input_tensor,
     The reduced tensor, of the same dtype as the input_tensor.
 
   @compatibility(numpy)
-  Equivalent to np.sum appart the fact that numpy upcast uint8 and int32 to
+  Equivalent to np.sum apart the fact that numpy upcast uint8 and int32 to
   int64 while tensorflow returns the same dtype as the input.
   @end_compatibility
   """
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 1271ee5108fc47..ae79c0194954a0 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -39,6 +39,8 @@
 from tensorflow.python.util.tf_export import tf_export
 # pylint: enable=wildcard-import
 
+# Expose regex_full_match in strings namespace
+tf_export("strings.regex_full_match")(regex_full_match)
 
 @tf_export("string_split")
 def string_split(source, delimiter=" ", skip_empty=True):  # pylint: disable=invalid-name
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 75580fc6308345..9e49188c1ef353 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -232,7 +232,12 @@ def testComplexCodeView(self):
 
         self.assertLess(0, tfprof_node.total_exec_micros)
         self.assertEqual(2844, tfprof_node.total_parameters)
-        self.assertLess(145660, tfprof_node.total_float_ops)
+        #The graph is modifed when MKL is enabled,total_float_ops will
+        #be different
+        if test_util.IsMklEnabled():
+          self.assertLess(101600, tfprof_node.total_float_ops)
+        else:
+          self.assertLess(145660, tfprof_node.total_float_ops)
         self.assertEqual(8, len(tfprof_node.children))
         self.assertEqual('_TFProfRoot', tfprof_node.name)
         self.assertEqual(
diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py
index 4b3982677fbd85..24a13c0f336aa9 100644
--- a/tensorflow/python/saved_model/builder_impl.py
+++ b/tensorflow/python/saved_model/builder_impl.py
@@ -130,7 +130,8 @@ def _save_and_write_assets(self, assets_collection_to_add=None):
       if not file_io.file_exists(asset_destination_filepath):
         file_io.copy(asset_source_filepath, asset_destination_filepath)
 
-    tf_logging.info("Assets written to: %s", assets_destination_dir)
+    tf_logging.info("Assets written to: %s",
+                    compat.as_text(assets_destination_dir))
 
   def _maybe_add_legacy_init_op(self, legacy_init_op=None):
     """Add legacy init op to the SavedModel.
@@ -461,7 +462,7 @@ def save(self, as_text=False):
           compat.as_bytes(self._export_dir),
           compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
       file_io.write_string_to_file(path, self._saved_model.SerializeToString())
-    tf_logging.info("SavedModel written to: %s", path)
+    tf_logging.info("SavedModel written to: %s", compat.as_text(path))
 
     return path
 
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index 6d05a2ee29ada7..ab8b37bb655bfc 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -750,7 +750,7 @@ def call_for_each_tower(self, fn, *args, **kwargs):
     `fn` may call `tf.get_tower_context()` to access methods such as
     `tower_id()` and `merge_call()`.
 
-    `merge_call()` is used to communicate betwen the towers and
+    `merge_call()` is used to communicate between the towers and
     re-enter the cross-tower context. All towers pause their execution
     having encountered a `merge_call()` call. After that the
     `merge_fn`-function is executed. Its results are then unwrapped and
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index fc89f88063e4b9..4d464135fd0333 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1743,7 +1743,7 @@ def restore(self, sess, save_path):
       return
     if save_path is None:
       raise ValueError("Can't load save_path when it is None.")
-    logging.info("Restoring parameters from %s", save_path)
+    logging.info("Restoring parameters from %s", compat.as_text(save_path))
     try:
       if context.executing_eagerly():
         self._build_eager(save_path, build_save=False, build_restore=True)
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 5faf644c91e91b..fbd65617670b15 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -232,7 +232,7 @@ def getcallargs(func, *positional, **named):
   it. If no attached decorators modify argspec, the final unwrapped target's
   argspec will be used.
   """
-  argspec = getargspec(func)
+  argspec = getfullargspec(func)
   call_args = named.copy()
   this = getattr(func, 'im_self', None) or getattr(func, '__self__', None)
   if ismethod(func) and this:
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 2b33d106bc8abe..0f465eda4fc44f 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -320,7 +320,7 @@ void SetDifferentKeysError(PyObject* dict1, PyObject* dict2, string* error_msg,
 
 // Returns true iff there were no "internal" errors. In other words,
 // errors that has nothing to do with structure checking.
-// If an "internal" error occured, the appropriate Python error will be
+// If an "internal" error occurred, the appropriate Python error will be
 // set and the caller can propage it directly to the user.
 //
 // Both `error_msg` and `is_type_error` must be non-null. `error_msg` must
diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h
index 9851c11c2efda4..70efc10c9abe7c 100644
--- a/tensorflow/python/util/util.h
+++ b/tensorflow/python/util/util.h
@@ -97,7 +97,7 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types);
 // used instead. The same convention is followed in `pack_sequence_as`. This
 // correctly repacks dicts and `OrderedDict`s after they have been flattened,
 // and also allows flattening an `OrderedDict` and then repacking it back using
-// a correponding plain dict, or vice-versa.
+// a corresponding plain dict, or vice-versa.
 // Dictionaries with non-sortable keys cannot be flattened.
 //
 // Args:
diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h
index be0b0bf5fb20b2..ea87744b225215 100644
--- a/tensorflow/stream_executor/blas.h
+++ b/tensorflow/stream_executor/blas.h
@@ -1083,6 +1083,13 @@ class BlasSupport {
   // This is a batched version of DoBlasGemm.
   // The batched GEMM computes matrix product for each input/output in a, b,
   // and c, which contain batch_count DeviceMemory objects.
+  virtual bool DoBlasGemmBatched(
+      Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
+      uint64 n, uint64 k, float alpha,
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda,
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb,
+      float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c,
+      int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0;
   virtual bool DoBlasGemmBatched(
       Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
       uint64 n, uint64 k, float alpha,
@@ -1945,6 +1952,13 @@ class BlasSupport {
       DeviceMemory<std::complex<double>> *c, int ldc,                          \
       blas::ComputationType computation_type, blas::AlgorithmType algorithm,   \
       blas::ProfileResult *output_profile_result) override;                    \
+  bool DoBlasGemmBatched(                                                      \
+      Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
+      uint64 m, uint64 n, uint64 k, float alpha,                               \
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda,         \
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb,         \
+      float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c,      \
+      int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \
   bool DoBlasGemmBatched(                                                      \
       Stream *stream, blas::Transpose transa, blas::Transpose transb,          \
       uint64 m, uint64 n, uint64 k, float alpha,                               \
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc
index 3e9a23c6589b3d..08fe153b5909d3 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.cc
+++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
@@ -286,6 +286,10 @@ STREAM_EXECUTOR_CUBLAS_WRAP(cublasGetMathMode)
 STREAM_EXECUTOR_CUBLAS_WRAP(cublasSetMathMode)
 #endif
 
+#if CUDA_VERSION >= 9010
+STREAM_EXECUTOR_CUBLAS_WRAP(cublasGemmBatchedEx)
+#endif
+
 }  // namespace wrap
 
 static string ToString(cublasStatus_t status) {
@@ -2330,13 +2334,23 @@ bool CUDABlas::DoBlasGemmWithAlgorithm(
       computation_type, algorithm, output_profile_result);
 }
 
-template <typename T, typename FuncT>
+template <typename T>
+struct HalfAsFloat {
+  typedef T type;
+};
+
+template <>
+struct HalfAsFloat<Eigen::half> {
+  typedef float type;
+};
+
+template <typename T, typename Scalar, typename FuncT>
 port::Status CUDABlas::DoBlasGemmBatchedInternal(
     FuncT cublas_func, Stream *stream, blas::Transpose transa,
-    blas::Transpose transb, uint64 m, uint64 n, uint64 k, T alpha,
+    blas::Transpose transb, uint64 m, uint64 n, uint64 k, Scalar alpha,
     const port::ArraySlice<DeviceMemory<T> *> &a_ptrs_to_wrappers, int lda,
     const port::ArraySlice<DeviceMemory<T> *> &b_ptrs_to_wrappers, int ldb,
-    T beta, const port::ArraySlice<DeviceMemory<T> *> &c_ptrs_to_wrappers,
+    Scalar beta, const port::ArraySlice<DeviceMemory<T> *> &c_ptrs_to_wrappers,
     int ldc, int batch_count, ScratchAllocator *scratch_allocator) {
   std::vector<T *> a_raw_ptrs, b_raw_ptrs, c_raw_ptrs;
   for (int i = 0; i < batch_count; ++i) {
@@ -2345,7 +2359,7 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal(
     c_raw_ptrs.push_back(static_cast<T *>(c_ptrs_to_wrappers[i]->opaque()));
   }
 
-  typedef typename CUDAComplexT<T>::type CUDA_T;
+  typedef typename HalfAsFloat<typename CUDAComplexT<T>::type>::type CUDA_T;
 
   const size_t size = batch_count * sizeof(CUDA_T *);
 
@@ -2397,18 +2411,84 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal(
                         "CUDABlas::DoBlasGemmBatched");
   }
 
-  bool ok = DoBlasInternal(
-      cublas_func, stream, true /* = pointer_mode_host */,
-      CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k,
-      CUDAComplex(&alpha), const_cast<const CUDA_T **>(CUDAMemory(a)), lda,
-      const_cast<const CUDA_T **>(CUDAMemory(b)), ldb, CUDAComplex(&beta),
-      const_cast<CUDA_T **>(CUDAMemory(c)), ldc, batch_count);
+  cudaDataType_t data_type = CUDADataType<T>::type;
 
-  if (ok) {
+#if CUDA_VERSION >= 9010
+  int cc_major, cc_minor;
+  if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
+          &cc_major, &cc_minor) &&
+      cc_major >= 5) {
+    bool use_tensor_ops = TensorOpMathEnabled() && data_type == CUDA_R_16F;
+    cublasGemmAlgo_t algo =
+        (use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT);
+    cudaDataType_t compute_type =
+        (data_type == CUDA_R_16F ? CUDA_R_32F : data_type);
+    const void **a_void_ptrs = reinterpret_cast<const void **>(
+        const_cast<const CUDA_T **>(CUDAMemory(a)));
+    const void **b_void_ptrs = reinterpret_cast<const void **>(
+        const_cast<const CUDA_T **>(CUDAMemory(b)));
+    void **c_void_ptrs =
+        reinterpret_cast<void **>(const_cast<CUDA_T **>(CUDAMemory(c)));
+    bool ok;
+    ok = DoBlasInternalImpl(
+        wrap::cublasGemmBatchedEx, stream, true /* = pointer_mode_host */,
+        true /* = err_on_failure */, use_tensor_ops, CUDABlasTranspose(transa),
+        CUDABlasTranspose(transb), m, n, k, &alpha, a_void_ptrs, data_type, lda,
+        b_void_ptrs, data_type, ldb, &beta, c_void_ptrs, data_type, ldc,
+        batch_count, compute_type, algo);
+    if (ok) {
+      return port::Status::OK();
+    }
+    return port::Status(port::error::INTERNAL,
+                        "failed BLAS call, see log for details");
+  }
+#endif
+  // either CUDA_VERSION < 9.1 or SM < 5.0
+  if (data_type != CUDA_R_16F) {
+    bool ok = DoBlasInternal(
+        cublas_func, stream, true /* = pointer_mode_host */,
+        CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k,
+        CUDAComplex(&alpha), const_cast<const CUDA_T **>(CUDAMemory(a)), lda,
+        const_cast<const CUDA_T **>(CUDAMemory(b)), ldb, CUDAComplex(&beta),
+        const_cast<CUDA_T **>(CUDAMemory(c)), ldc, batch_count);
+    if (ok) {
+      return port::Status::OK();
+    }
+    return port::Status(port::error::INTERNAL,
+                        "failed BLAS call, see log for details");
+  } else {
+    // Fall back to a loop for fp16
+    for (int b = 0; b < batch_count; ++b) {
+      const DeviceMemory<T> &a_matrix = *a_ptrs_to_wrappers[b];
+      const DeviceMemory<T> &b_matrix = *b_ptrs_to_wrappers[b];
+      DeviceMemory<T> *c_matrix = c_ptrs_to_wrappers[b];
+      bool ok = DoBlasGemm(stream, transa, transb, m, n, k, alpha, a_matrix,
+                           lda, b_matrix, ldb, beta, c_matrix, ldc);
+      if (!ok) {
+        return port::Status(port::error::INTERNAL,
+                            "failed BLAS call, see log for details");
+      }
+    }
     return port::Status::OK();
   }
-  return port::Status(port::error::INTERNAL,
-                      "failed BLAS call, see log for details");
+}
+
+bool CUDABlas::DoBlasGemmBatched(
+    Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m,
+    uint64 n, uint64 k, float alpha,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &a_array, int lda,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &b_array, int ldb,
+    float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c_array,
+    int ldc, int batch_count, ScratchAllocator *scratch_allocator) {
+  // Note: The func passed here (cublasSgemmBatched) is not actually called,
+  // due to special handling of fp16 inside DoBlasGemmBatchedInternal.
+  port::Status status = DoBlasGemmBatchedInternal(
+      wrap::cublasSgemmBatched, stream, transa, transb, m, n, k, alpha, a_array,
+      lda, b_array, ldb, beta, c_array, ldc, batch_count, scratch_allocator);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+  }
+  return status.ok();
 }
 
 bool CUDABlas::DoBlasGemmBatched(
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.h b/tensorflow/stream_executor/cuda/cuda_blas.h
index 12dc5e47fd1b9d..42b3fde5b0816f 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.h
+++ b/tensorflow/stream_executor/cuda/cuda_blas.h
@@ -107,12 +107,12 @@ class CUDABlas : public blas::BlasSupport {
 
   // A helper function to implement DoBlasGemmBatched interfaces for generic
   // types.
-  template <typename T, typename FuncT>
+  template <typename T, typename Scalar, typename FuncT>
   port::Status DoBlasGemmBatchedInternal(
       FuncT cublas_func, Stream *stream, blas::Transpose transa,
-      blas::Transpose transb, uint64 m, uint64 n, uint64 k, T alpha,
+      blas::Transpose transb, uint64 m, uint64 n, uint64 k, Scalar alpha,
       const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda,
-      const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, T beta,
+      const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, Scalar beta,
       const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc,
       int batch_count, ScratchAllocator *scratch_allocator);
 
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 2bc9b6b798865c..4a98cfe16460ff 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -4480,6 +4480,40 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo,
               n, alpha, a, lda, b, ldb);
 }
 
+Stream &Stream::ThenBlasGemmBatched(
+    blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n,
+    uint64 k, float alpha,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb, float beta,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &c, int ldc,
+    int batch_count) {
+  return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda,
+                                        b, ldb, beta, c, ldc, batch_count,
+                                        /*scratch_allocator=*/nullptr);
+}
+
+Stream &Stream::ThenBlasGemmBatchedWithScratch(
+    blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n,
+    uint64 k, float alpha,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb, float beta,
+    const port::ArraySlice<DeviceMemory<Eigen::half> *> &c, int ldc,
+    int batch_count, ScratchAllocator *scratch_allocator) {
+  VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k),
+            PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb),
+            PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count));
+
+  ThenBlasImpl<blas::Transpose, blas::Transpose, uint64, uint64, uint64, float,
+               const port::ArraySlice<DeviceMemory<Eigen::half> *> &, int,
+               const port::ArraySlice<DeviceMemory<Eigen::half> *> &, int,
+               float, const port::ArraySlice<DeviceMemory<Eigen::half> *> &,
+               int, int, ScratchAllocator *>
+      impl;
+  return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n,
+              k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count,
+              scratch_allocator);
+}
+
 Stream &Stream::ThenBlasGemmBatched(
     blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n,
     uint64 k, float alpha, const port::ArraySlice<DeviceMemory<float> *> &a,
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index c6e37da6d14f63..3da1b856d6a41f 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -1471,6 +1471,13 @@ class Stream {
       blas::ProfileResult *output_profile_result);
 
   // See BlasSupport::DoBlasGemmBatched.
+  Stream &ThenBlasGemmBatched(
+      blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n,
+      uint64 k, float alpha,
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda,
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb,
+      float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c,
+      int ldc, int batch_count);
   Stream &ThenBlasGemmBatched(blas::Transpose transa, blas::Transpose transb,
                               uint64 m, uint64 n, uint64 k, float alpha,
                               const port::ArraySlice<DeviceMemory<float> *> &a,
@@ -1503,6 +1510,13 @@ class Stream {
       std::complex<double> beta,
       const port::ArraySlice<DeviceMemory<std::complex<double>> *> &c, int ldc,
       int batch_count);
+  Stream &ThenBlasGemmBatchedWithScratch(
+      blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n,
+      uint64 k, float alpha,
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &a, int lda,
+      const port::ArraySlice<DeviceMemory<Eigen::half> *> &b, int ldb,
+      float beta, const port::ArraySlice<DeviceMemory<Eigen::half> *> &c,
+      int ldc, int batch_count, ScratchAllocator *scratch_allocator);
   Stream &ThenBlasGemmBatchedWithScratch(
       blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n,
       uint64 k, float alpha, const port::ArraySlice<DeviceMemory<float> *> &a,
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 880ec0523d397a..d71fd71bbd83ad 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1300,7 +1300,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]):
     native.cc_library(
         name=basename + "_gpu",
         srcs=gpu_srcs,
-        copts=_cuda_copts(),
+        copts=_cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
         deps=deps + if_cuda(cuda_deps))
     cuda_deps.extend([":" + basename + "_gpu"])
 
@@ -1483,7 +1483,7 @@ def tf_py_wrap_cc(name,
 # This macro is for running python tests against system installed pip package
 # on Windows.
 #
-# py_test is built as an exectuable python zip file on Windows, which contains all
+# py_test is built as an executable python zip file on Windows, which contains all
 # dependencies of the target. Because of the C++ extensions, it would be very
 # inefficient if the py_test zips all runfiles, plus we don't need them when running
 # tests against system installed pip package. So we'd like to get rid of the deps
diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD
index e58de5b63ea72a..f46bb4b5fcc5d6 100644
--- a/tensorflow/tools/api/generator/BUILD
+++ b/tensorflow/tools/api/generator/BUILD
@@ -101,6 +101,7 @@ genrule(
         "api/profiler/__init__.py",
         "api/python_io/__init__.py",
         "api/resource_loader/__init__.py",
+        "api/strings/__init__.py",
         "api/saved_model/__init__.py",
         "api/saved_model/builder/__init__.py",
         "api/saved_model/constants/__init__.py",
diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt
index 74b1b39d9faea1..dc2bd40096a695 100644
--- a/tensorflow/tools/api/golden/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.pbtxt
@@ -500,6 +500,10 @@ tf_module {
     name: "string"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "strings"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "summary"
     mtype: "<type \'module\'>"
diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
new file mode 100644
index 00000000000000..a3fbe95bbad4b8
--- /dev/null
+++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.strings"
+tf_module {
+  member_method {
+    name: "regex_full_match"
+    argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 5aaf544afdcb88..982161cefeefdd 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -17,14 +17,9 @@
 set -e
 
 # We don't apt-get install so that we can install a newer version of pip.
-# Only needed for Ubuntu 14.04 ,and not needed for Ubuntu 16.04 / Debian 8,9
-if $(cat /etc/*-release | grep -q 14.04); then
-  easy_install -U pip==9.0.3
-  easy_install3 -U pip==9.0.3
-else
-  pip2 install --upgrade pip==9.0.3
-  pip3 install --upgrade pip==9.0.3
-fi
+# Only needed for Ubuntu 14.04 and 16.04; not needed for 18.04 and Debian 8,9?
+easy_install -U pip==9.0.3
+easy_install3 -U pip==9.0.3
 
 # Install pip packages from whl files to avoid the time-consuming process of
 # building from source.
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index b9996395d02bfb..406d134699ff18 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -85,7 +85,7 @@ RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.g
 ENV CI_BUILD_PYTHON python
 
 RUN tensorflow/tools/ci_build/builds/configured CPU \
-    bazel build -c opt --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
+    bazel build -c opt --copt=-mavx --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
         # For optimized builds appropriate for the hardware platform of your choosing, uncomment below...
         # For ivy-bridge or sandy-bridge
         # --copt=-march="ivybridge" \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
index c65e0b72bc582d..a6cd44ced1d546 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
@@ -35,10 +35,10 @@ ENV CI_BUILD_PYTHON=python \
     PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
     CC_OPT_FLAGS='-march=native' \
     TF_NEED_JEMALLOC=0 \
-    TF_NEED_GCP=0 \
+    TF_NEED_GCP=1 \
     TF_NEED_CUDA=0 \
     TF_NEED_HDFS=0 \
-    TF_NEED_S3=0 \
+    TF_NEED_S3=1 \
     TF_NEED_OPENCL=0 \
     TF_NEED_GDR=0 \
     TF_ENABLE_XLA=0 \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 7e5e6ef2d5b024..2fe47f3356ce26 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -98,7 +98,7 @@ ENV TF_CUDNN_VERSION=7
 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
     tensorflow/tools/ci_build/builds/configured GPU \
-    bazel build -c opt --config=cuda \
+    bazel build -c opt --copt=-mavx --config=cuda \
 	--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
         tensorflow/tools/pip_package:build_pip_package && \
     rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md
index 67badb4869029b..9f6f553ba1e4c6 100644
--- a/tensorflow/tools/graph_transforms/README.md
+++ b/tensorflow/tools/graph_transforms/README.md
@@ -388,7 +388,7 @@ input is collapsed down into a simple constant.
 Args:
 
 *   clear_output_shapes: Clears tensor shape information saved as attributes.
-    Some older graphs containes out-of-date information and may cause import
+    Some older graphs contains out-of-date information and may cause import
     errors. Defaults to true.
 
 Prerequisites: None
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index b66d5bdd37c03e..1a83c6e7578fed 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -24,7 +24,7 @@ function real_path() {
 function cp_external() {
   local src_dir=$1
   local dest_dir=$2
-  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*local_config_tensorrt*' ! -name '*org_tensorflow*'`; do
     cp -R "$f" "$dest_dir"
   done
   mkdir -p "${dest_dir}/local_config_cuda/cuda/cuda/"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index f7385e59912fa0..319878e1b5ae9f 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -31,7 +31,7 @@
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.8.0-rc1'
+_VERSION = '1.8.0'
 
 _SHORT_DESCRIPTION = ('TensorFlow is an open source machine learning framework '
                       'for everyone.')
@@ -55,7 +55,7 @@
     'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
-    'tensorboard >= 1.7.0, < 1.8.0',
+    'tensorboard >= 1.8.0, < 1.9.0',
     'termcolor >= 1.1.0',
 ]
 
diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md
index 7f477d19208257..fbb1fde837b92b 100644
--- a/third_party/examples/eager/spinn/README.md
+++ b/third_party/examples/eager/spinn/README.md
@@ -70,7 +70,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth
 - After training, you may use the model to perform inference on input data in
   the SNLI data format. The premise and hypotheses sentences are specified with
   the command-line flags `--inference_premise` and `--inference_hypothesis`,
-  respecitvely. Each sentence should include the words, as well as parentheses
+  respectively. Each sentence should include the words, as well as parentheses
   representing a binary parsing of the sentence. The words and parentheses
   should all be separated by spaces. For instance,
 
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 50d1b778d9e14b..c90c66912d959a 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -604,7 +604,7 @@ def _find_cupti_header_dir(repository_ctx, cuda_config):
   for relative_path in CUPTI_HEADER_PATHS:
     if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
         return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-  auto_configure_fail("Cannot find cupti.h under %s" % cuda_toolkit_path)
+  auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
 
 
 def _find_cupti_lib(repository_ctx, cuda_config):
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
index c2adf578c703f5..a058c46cc42439 100644
--- a/third_party/mkl/BUILD
+++ b/third_party/mkl/BUILD
@@ -34,6 +34,7 @@ filegroup(
         "@org_tensorflow//tensorflow:windows": [
             "@mkl_windows//:LICENSE",
         ],
+        "//conditions:default": [],
     }),
     visibility = ["//visibility:public"],
 )
@@ -54,5 +55,6 @@ cc_library(
             "@mkl_windows//:mkl_headers",
             "@mkl_windows//:mkl_libs_windows",
         ],
+        "//conditions:default": [],
     }),
 )