From b59833c3fd91511b33255369016868e4ae6cda2e Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 24 May 2018 19:12:26 -0700 Subject: [PATCH] Merge changes from github. Revert #18413. Too many internal test failures due to the name scope change caused by this change. Revert #18192. Cannot use re2::StringPiece internally. Need alternative for set call. Will pull and clean this up in a separate change. PiperOrigin-RevId: 197991247 --- CONTRIBUTING.md | 11 + README.md | 39 +- RELEASE.md | 7 +- SECURITY.md | 2 +- configure.py | 3 + .../jit/encapsulate_subgraphs_pass.cc | 2 +- tensorflow/compiler/xla/README.md | 8 +- .../xla/service/conditional_simplifier.cc | 2 +- .../compiler/xla/service/copy_insertion.cc | 2 +- .../compiler/xla/service/cpu/ir_function.h | 4 +- .../xla/service/cpu/shape_partition.h | 2 +- .../compiler/xla/service/despecializer.h | 2 +- .../xla/service/gpu/ir_emitter_unnested.h | 2 +- .../compiler/xla/service/hlo_evaluator.cc | 1 + .../xla/service/interpreter/README.md | 2 +- .../compiler/xla/service/layout_assignment.h | 4 +- .../xla/service/reduce_precision_insertion.cc | 2 +- .../compiler/xla/service/source_map_util.h | 2 +- tensorflow/compiler/xla/shape_util.h | 2 +- .../compiler/xla/tests/dot_operation_test.cc | 18 +- tensorflow/compiler/xla/tests/tuple_test.cc | 2 +- tensorflow/compiler/xla/xlalogo.png | Bin 0 -> 46785 bytes tensorflow/contrib/autograph/impl/config.py | 2 +- .../autograph/operators/control_flow.py | 2 +- .../python/training/functions/gbdt_batch.py | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 29 +- tensorflow/contrib/cmake/external/zlib.cmake | 3 +- tensorflow/contrib/cmake/tf_tests.cmake | 2 + .../contrib/data/python/kernel_tests/BUILD | 4 + .../data/python/kernel_tests/resample_test.py | 109 +++- tensorflow/contrib/data/python/ops/BUILD | 2 + .../contrib/data/python/ops/resampling.py | 265 ++++++--- .../ops/bijectors/cholesky_outer_product.py | 2 +- tensorflow/contrib/eager/README.md | 2 +- tensorflow/contrib/ffmpeg/ffmpeg_lib.h | 2 +- .../python/ops/critical_section_ops.py | 2 +- .../estimator/python/gan_estimator_impl.py | 7 +- .../estimator/python/gan_estimator_test.py | 11 + .../gan/python/estimator/python/head_impl.py | 45 +- .../gan/python/estimator/python/head_test.py | 7 +- .../features/python/conditioning_utils.py | 2 +- tensorflow/contrib/graph_editor/transform.py | 2 +- .../hvx_ops_support_checker_main.cc | 2 +- tensorflow/contrib/image/__init__.py | 2 +- tensorflow/contrib/kfac/examples/convnet.py | 2 +- .../contrib/kfac/python/ops/optimizer.py | 6 +- .../contrib/kfac/python/ops/placement.py | 2 +- .../contrib/layers/python/layers/layers.py | 142 ++++- .../layers/python/layers/layers_test.py | 15 +- .../learn/utils/saved_model_export_utils.py | 3 +- tensorflow/contrib/lite/BUILD | 2 - tensorflow/contrib/lite/Makefile | 19 +- .../contrib/lite/examples/minimal/minimal.cc | 71 +++ tensorflow/contrib/lite/g3doc/rpi.md | 2 +- .../internal/optimized/optimized_ops.h | 2 +- .../internal/reference/reference_ops.h | 4 +- tensorflow/contrib/lite/schema/schema.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v0.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v1.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v2.fbs | 2 +- tensorflow/contrib/lite/schema/schema_v3.fbs | 4 +- .../contrib/lite/testing/generate_examples.py | 4 +- .../contrib/lite/testing/tflite_driver.cc | 4 +- .../lite/toco/g3doc/cmdline_examples.md | 4 +- .../contrib/lite/toco/tflite/operator.h | 4 +- tensorflow/contrib/lite/toco/toco_flags.proto | 2 +- .../elastic_average_optimizer_test.py | 2 +- .../training/model_average_optimizer_test.py | 4 +- .../contrib/signal/python/ops/window_ops.py | 4 +- .../python/slim/data/tfexample_decoder.py | 2 +- .../contrib/slim/python/slim/learning.py | 2 +- .../tensorboard/db/summary_db_writer.cc | 22 +- .../tensorboard/db/summary_db_writer_test.cc | 50 ++ tensorflow/contrib/tensorrt/BUILD | 55 +- .../contrib/tensorrt/convert/convert_graph.cc | 123 +++-- .../contrib/tensorrt/convert/convert_graph.h | 10 + .../contrib/tensorrt/convert/convert_nodes.cc | 501 ++++++++---------- .../contrib/tensorrt/convert/convert_nodes.h | 14 +- .../tensorrt/convert/trt_optimization_pass.cc | 246 +++++++++ .../tensorrt/convert/trt_optimization_pass.h | 73 +++ .../tensorrt/custom_plugin_examples/BUILD | 118 +++++ .../custom_plugin_examples/__init__.py | 24 + .../tensorrt/custom_plugin_examples/inc_op.py | 32 ++ .../inc_op_kernel.cu.cc | 84 +++ .../custom_plugin_examples/inc_op_kernel.h | 35 ++ .../custom_plugin_examples/inc_op_plugin.cc | 86 +++ .../custom_plugin_examples/inc_op_plugin.h | 102 ++++ .../custom_plugin_examples/ops/inc_op.cc | 36 ++ .../custom_plugin_examples/plugin_test.py | 95 ++++ .../contrib/tensorrt/kernels/trt_engine_op.cc | 54 +- .../contrib/tensorrt/kernels/trt_engine_op.h | 11 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 2 +- .../contrib/tensorrt/plugin/trt_plugin.cc | 106 ++++ .../contrib/tensorrt/plugin/trt_plugin.h | 74 +++ .../tensorrt/plugin/trt_plugin_factory.cc | 78 +++ .../tensorrt/plugin/trt_plugin_factory.h | 102 ++++ .../plugin/trt_plugin_factory_test.cc | 125 +++++ .../tensorrt/plugin/trt_plugin_utils.cc | 42 ++ .../tensorrt/plugin/trt_plugin_utils.h | 46 ++ .../tensorrt/resources/trt_allocator.cc | 62 +++ .../tensorrt/resources/trt_allocator.h | 68 +++ .../tensorrt/resources/trt_resources.h | 44 +- .../contrib/tensorrt/segment/segment.cc | 379 +++++++++++-- tensorflow/contrib/tensorrt/segment/segment.h | 18 +- .../contrib/tensorrt/segment/segment_test.cc | 16 +- .../contrib/tensorrt/shape_fn/trt_shfn.cc | 4 +- .../contrib/tensorrt/test/test_tftrt.py | 64 ++- .../tensorrt/test/tf_trt_integration_test.py | 19 +- .../contrib/tpu/python/tpu/tpu_context.py | 2 +- tensorflow/contrib/verbs/README.md | 2 +- tensorflow/core/BUILD | 7 +- .../base_api/api_def_RegexFullMatch.pbtxt | 30 ++ .../python_api/api_def_RegexFullMatch.pbtxt | 4 + tensorflow/core/common_runtime/broadcaster.cc | 4 +- .../core/common_runtime/buf_rendezvous.h | 2 +- .../core/common_runtime/ring_reducer.cc | 2 +- .../common_runtime/scoped_allocator_mgr.cc | 2 +- tensorflow/core/debug/debug_io_utils.cc | 2 +- .../rpc/grpc_worker_cache.cc | 2 +- tensorflow/core/example/example.proto | 2 +- .../example_parser_configuration.proto | 1 + tensorflow/core/example/feature.proto | 2 +- .../framework/allocation_description.proto | 1 + tensorflow/core/framework/api_def.proto | 1 + tensorflow/core/framework/attr_value.proto | 2 +- tensorflow/core/framework/cost_graph.proto | 2 +- .../core/framework/device_attributes.proto | 1 + tensorflow/core/framework/function.proto | 2 +- tensorflow/core/framework/graph.proto | 2 +- .../core/framework/graph_transfer_info.proto | 2 +- tensorflow/core/framework/iterator.proto | 1 + tensorflow/core/framework/kernel_def.proto | 2 +- tensorflow/core/framework/log_memory.proto | 2 +- tensorflow/core/framework/node_def.proto | 2 +- tensorflow/core/framework/op_def.proto | 2 +- tensorflow/core/framework/op_gen_lib.h | 4 +- tensorflow/core/framework/op_kernel.h | 2 +- tensorflow/core/framework/reader_base.proto | 1 + .../remote_fused_graph_execute_info.proto | 2 +- .../core/framework/resource_handle.proto | 1 + tensorflow/core/framework/step_stats.proto | 2 +- tensorflow/core/framework/summary.proto | 2 +- tensorflow/core/framework/tensor.proto | 2 +- .../core/framework/tensor_description.proto | 2 +- tensorflow/core/framework/tensor_shape.proto | 1 + tensorflow/core/framework/tensor_slice.proto | 1 + tensorflow/core/framework/types.proto | 1 + tensorflow/core/framework/variable.proto | 1 + tensorflow/core/framework/versions.proto | 1 + tensorflow/core/graph/mkl_layout_pass_test.cc | 27 + tensorflow/core/graph/while_context.h | 2 +- .../core/grappler/costs/graph_properties.cc | 2 +- .../core/grappler/costs/virtual_scheduler.h | 2 +- .../grappler/optimizers/layout_optimizer.cc | 2 +- tensorflow/core/kernels/BUILD | 8 + .../core/kernels/batch_matmul_op_impl.h | 106 +++- .../core/kernels/batch_matmul_op_real.cc | 4 + .../adaptive_shared_batch_scheduler.h | 2 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 4 +- tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | 2 +- tensorflow/core/kernels/nth_element_op.cc | 2 +- .../core/kernels/regex_full_match_op.cc | 59 +++ tensorflow/core/kernels/roll_op.cc | 2 +- .../core/kernels/segment_reduction_ops.cc | 4 +- .../core/kernels/segment_reduction_ops.h | 2 +- tensorflow/core/lib/core/error_codes.proto | 1 + tensorflow/core/ops/image_ops.cc | 19 + tensorflow/core/ops/image_ops_test.cc | 19 + tensorflow/core/ops/math_ops.cc | 2 +- tensorflow/core/ops/nn_ops.cc | 3 +- tensorflow/core/ops/random_ops.cc | 10 +- tensorflow/core/ops/string_ops.cc | 11 + .../core/platform/cloud/gcs_file_system.cc | 2 +- tensorflow/core/platform/cloud/gcs_throttle.h | 2 +- .../core/profiler/g3doc/command_line.md | 2 +- tensorflow/core/protobuf/cluster.proto | 1 + tensorflow/core/protobuf/config.proto | 2 +- tensorflow/core/protobuf/control_flow.proto | 1 + .../core/protobuf/critical_section.proto | 1 + tensorflow/core/protobuf/debug.proto | 1 + .../core/protobuf/device_properties.proto | 1 + tensorflow/core/protobuf/master.proto | 2 +- tensorflow/core/protobuf/master_service.proto | 2 +- tensorflow/core/protobuf/meta_graph.proto | 2 +- tensorflow/core/protobuf/named_tensor.proto | 2 +- tensorflow/core/protobuf/queue_runner.proto | 2 +- .../core/protobuf/rewriter_config.proto | 3 +- tensorflow/core/protobuf/saved_model.proto | 2 +- tensorflow/core/protobuf/saver.proto | 1 + tensorflow/core/protobuf/tensor_bundle.proto | 2 +- .../core/protobuf/tensorflow_server.proto | 2 +- tensorflow/core/protobuf/worker.proto | 2 +- tensorflow/core/protobuf/worker_service.proto | 2 +- tensorflow/core/public/version.h | 2 +- tensorflow/core/util/cuda_device_functions.h | 2 +- tensorflow/core/util/mkl_util.h | 2 +- tensorflow/core/util/tensor_format.h | 2 +- .../api_guides/python/reading_data.md | 2 +- tensorflow/docs_src/community/benchmarks.md | 18 +- tensorflow/docs_src/community/swift.md | 2 +- tensorflow/docs_src/deploy/s3.md | 2 +- tensorflow/docs_src/extend/adding_an_op.md | 63 ++- tensorflow/docs_src/extend/architecture.md | 14 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 18 +- tensorflow/docs_src/install/install_mac.md | 10 +- .../docs_src/install/install_sources.md | 4 +- tensorflow/docs_src/mobile/mobile_intro.md | 2 +- tensorflow/docs_src/mobile/tflite/index.md | 2 +- tensorflow/docs_src/programmers_guide/faq.md | 17 +- .../docs_src/programmers_guide/tensors.md | 6 +- .../docs_src/programmers_guide/variables.md | 2 +- tensorflow/docs_src/tutorials/layers.md | 1 - .../examples/learn/text_classification_cnn.py | 2 +- tensorflow/go/op/wrappers.go | 2 +- tensorflow/python/data/util/nest.py | 2 +- tensorflow/python/debug/cli/curses_ui.py | 36 +- tensorflow/python/estimator/estimator.py | 2 +- .../inputs/queues/feeding_functions.py | 2 +- tensorflow/python/estimator/keras.py | 2 +- tensorflow/python/estimator/training.py | 2 +- .../python/feature_column/feature_column.py | 6 +- .../python/framework/fast_tensor_util.pyx | 12 + tensorflow/python/framework/ops.py | 26 +- tensorflow/python/framework/tensor_util.py | 12 +- tensorflow/python/framework/test_util.py | 2 +- tensorflow/python/keras/utils/__init__.py | 1 + tensorflow/python/kernel_tests/BUILD | 12 + tensorflow/python/kernel_tests/conv1d_test.py | 4 +- .../kernel_tests/conv3d_transpose_test.py | 17 + .../kernel_tests/distributions/util_test.py | 2 +- .../python/kernel_tests/manip_ops_test.py | 2 +- .../kernel_tests/regex_full_match_op_test.py | 54 ++ .../segment_reduction_ops_test.py | 10 +- tensorflow/python/layers/base.py | 14 +- tensorflow/python/layers/base_test.py | 16 + tensorflow/python/ops/math_ops.py | 2 +- tensorflow/python/ops/string_ops.py | 2 + .../python/profiler/model_analyzer_test.py | 7 +- tensorflow/python/saved_model/builder_impl.py | 5 +- tensorflow/python/training/distribute.py | 2 +- tensorflow/python/training/saver.py | 2 +- tensorflow/python/util/tf_inspect.py | 2 +- tensorflow/python/util/util.cc | 2 +- tensorflow/python/util/util.h | 2 +- tensorflow/stream_executor/blas.h | 14 + tensorflow/stream_executor/cuda/cuda_blas.cc | 106 +++- tensorflow/stream_executor/cuda/cuda_blas.h | 6 +- tensorflow/stream_executor/stream.cc | 34 ++ tensorflow/stream_executor/stream.h | 14 + tensorflow/tensorflow.bzl | 4 +- tensorflow/tools/api/generator/BUILD | 1 + tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + .../tools/api/golden/tensorflow.strings.pbtxt | 7 + .../ci_build/install/install_pip_packages.sh | 11 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 4 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/graph_transforms/README.md | 2 +- .../tools/pip_package/build_pip_package.sh | 2 +- tensorflow/tools/pip_package/setup.py | 4 +- third_party/examples/eager/spinn/README.md | 2 +- third_party/gpus/cuda_configure.bzl | 2 +- third_party/mkl/BUILD | 2 + 266 files changed, 4146 insertions(+), 940 deletions(-) create mode 100644 tensorflow/compiler/xla/xlalogo.png create mode 100644 tensorflow/contrib/lite/examples/minimal/minimal.cc create mode 100644 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc create mode 100644 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc create mode 100644 tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin.h create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc create mode 100644 tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_allocator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_allocator.h create mode 100644 tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RegexFullMatch.pbtxt create mode 100644 tensorflow/core/kernels/regex_full_match_op.cc create mode 100644 tensorflow/python/kernel_tests/regex_full_match_op_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.strings.pbtxt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3dad41a88c8212..8669c25c452b53 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,16 @@ # Contributing guidelines +## Pull Request Checklist + +Before sending your pull requests, make sure you followed this list. + +- Read [contributing guidelines](CONTRIBUTING.md). +- Read [Code of Conduct](CODE_OF_CONDUCT.md). +- Ensure you have signed the [Contributor License Agreement (CLA)](https://cla.developers.google.com/). +- Check if my changes are consistent with the [guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#general-guidelines-and-philosophy-for-contribution). +- Changes are consistent with the [Coding Style](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#c-coding-style). +- Run [Unit Tests](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md#running-unit-tests). + ## How to become a contributor and submit your own code ### Contributor License Agreements diff --git a/README.md b/README.md index e1a50c87e26d49..6fb4486d0de9ff 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ ----------------- -| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | -|-----------------|---------------------|------------------|-------------------|---------------|---------------| -| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | ![Build Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) +| **`Documentation`** | +|-----------------| +| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while @@ -40,15 +40,6 @@ environment to install the nightly TensorFlow build. We support CPU and GPU packages on Linux, Mac, and Windows. -**Individual whl files** -* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=cpu-slave/)) -* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) / [Python 3.6](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp36-cp36m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.6,label=gpu-linux/)) -* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/)) -* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/)) -* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/)) -* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) -([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)) - #### *Try your first TensorFlow program* ```shell $ python @@ -82,6 +73,30 @@ The TensorFlow project strives to abide by generally accepted best practices in [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) + +## Continuous build status + +### Official Builds + +| Build Type | Status | Artifacts | +| --- | --- | --- | +| **Linux CPU** | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.png) | [pypi](https://pypi.org/project/tf-nightly/) | +| **Linux GPU** | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-cc.png) | [pypi](https://pypi.org/project/tf-nightly-gpu/) | +| **Linux XLA** | TBA | TBA | +| **MacOS** | ![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.png) | [pypi](https://pypi.org/project/tf-nightly/) | +| **Windows CPU** | [![Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [pypi](https://pypi.org/project/tf-nightly/) | +| **Windows GPU** | [![Status](http://ci.tensorflow.org/job/tf-master-win-gpu-cmake/badge/icon)](http://ci.tensorflow.org/job/tf-master-win-gpu-cmake/) | [pypi](https://pypi.org/project/tf-nightly-gpu/) | +| **Android** | [![Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) | [![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg)](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) [build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) | + + +### Community Supported Builds + +| Build Type | Status | Artifacts | +| --- | --- | --- | +| **IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA | +| **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | + + ## For more information * [TensorFlow Website](https://www.tensorflow.org) diff --git a/RELEASE.md b/RELEASE.md index 2717c75740aeea..84d9d52868ecd5 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,7 +6,7 @@ * Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor. * Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability. * `tf.contrib.bayesflow` is moving out to it's own repo. -* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication. +* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication[1](#rpc-issue). ## Bug Fixes and Other Changes * `tf.data`: @@ -49,13 +49,14 @@ * Fix non-uniformity of orthogonal matrices. * Fix bug where multi-image Estimator eval summaries were not displayed correctly. +1 The cancellation logic of the RPC op contains a concurrency error. A fix has been submitted to master and will be part of the next release. + ## Thanks to our Contributors This release contains contributions from many people at Google, as well as: 4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu - # Release 1.7.0 ## Major Features And Improvements @@ -235,7 +236,7 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田 * Add `complex64` support to XLA compiler. * `bfloat` support is now added to XLA infrastructure. * Make `ClusterSpec` propagation work with XLA devices. - * Use a determinisitic executor to generate XLA graph. + * Use a deterministic executor to generate XLA graph. * `tf.contrib`: * `tf.contrib.distributions`: * Add `tf.contrib.distributions.Autoregressive`. diff --git a/SECURITY.md b/SECURITY.md index a5ce3a62ee202f..01886b613e5d93 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -173,7 +173,7 @@ the progress being made towards a fix and announcement. In addition, please include the following information along with your report: * Your name and affiliation (if any). -* A description the technical details of the vulnerabilities. It is very +* A description of the technical details of the vulnerabilities. It is very important to let us know how we can reproduce your findings. * An explanation who can exploit this vulnerability, and what they gain when doing so -- write an attack scenario. This will help us evaluate your report diff --git a/configure.py b/configure.py index 3a7f7b3de28fe7..b6c32543cf7079 100644 --- a/configure.py +++ b/configure.py @@ -1222,6 +1222,9 @@ def set_tf_cuda_compute_capabilities(environ_cp): ask_cuda_compute_capabilities, default_cuda_compute_capabilities) # Check whether all capabilities from the input is valid all_valid = True + # Remove all whitespace characters before splitting the string + # that users may insert by accident, as this will result in error + tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split()) for compute_capability in tf_cuda_compute_capabilities.split(','): m = re.match('[0-9]+.[0-9]+', compute_capability) if not m: diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index f06debaf316c01..6d1e3325ebd35b 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -240,7 +240,7 @@ class Encapsulator { // Once edges between compiled and outside_compilation clusters have been // replaced by send/recv ops, some dependencies may no longer be apparent. // A clustering pass finds all the dependencies between HC nodes that are only - // present as a result of edges between nodes in outside_compilaton clusters. + // present as a result of edges between nodes in outside_compilation clusters. // Suppose there is a path from outside_compilation cluster C in subgraph S // to outside_compilation cluster D in subgraph T. If S != T then a control // edge is added from the call node for S to the call node for T, which diff --git a/tensorflow/compiler/xla/README.md b/tensorflow/compiler/xla/README.md index c93c39e180655e..39f8caaa961dc7 100644 --- a/tensorflow/compiler/xla/README.md +++ b/tensorflow/compiler/xla/README.md @@ -1 +1,7 @@ -This is the home of XLA. +

+ +

+ +XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear +algebra that optimizes TensorFlow computations. See the +[documentation](https://www.tensorflow.org/performance/xla/) for more details. diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc index e560abc87f8456..e9ec796121fff2 100644 --- a/tensorflow/compiler/xla/service/conditional_simplifier.cc +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -35,7 +35,7 @@ namespace xla { // Tries to replace a conditional with a call operation of the corresponding // computation. If the given conditional has a constant predicate, tries to -// replace it with a call to its true/false computation as appropirate and then +// replace it with a call to its true/false computation as appropriate and then // inline that computation. // // Returns true if it made a change to the graph. diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index dce201456479c1..33d8338809d4e8 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -64,7 +64,7 @@ struct SpecialCaseCopyPolicy { // output tuple. bool copy_root_replicated_buffers = false; // If true, insert a copy if a buffer coming from a constant or a parameter - // is found wihtin the output tuple. + // is found within the output tuple. bool copy_parameters_and_constants = false; }; diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h index 557aa4a6bfc2ef..2e55181eed867a 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_function.h +++ b/tensorflow/compiler/xla/service/cpu/ir_function.h @@ -33,8 +33,8 @@ namespace cpu { // emitters for function and function argument access. // The llvm::Function is created with the standard function signature // used in the XLA CPU backend (see ir_function.cc for argument details). -// In addtion IrFunction saves the callers IR insert point during contruction, -// and restores it after desctruction. +// In addition IrFunction saves the callers IR insert point during construction, +// and restores it after destruction. // // Example usage: // diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.h b/tensorflow/compiler/xla/service/cpu/shape_partition.h index 33d02b70e61e33..db2cda2936c834 100644 --- a/tensorflow/compiler/xla/service/cpu/shape_partition.h +++ b/tensorflow/compiler/xla/service/cpu/shape_partition.h @@ -38,7 +38,7 @@ namespace cpu { // // [0, 1), [1, 2), [2, 3), [3, 4), [4, 5) [5, 8) // -// Note that the last partition has residule because the dimension size is +// Note that the last partition has residual because the dimension size is // not a multiple of the partition count. // // diff --git a/tensorflow/compiler/xla/service/despecializer.h b/tensorflow/compiler/xla/service/despecializer.h index af48f4ab6e506d..cc1695b7f86380 100644 --- a/tensorflow/compiler/xla/service/despecializer.h +++ b/tensorflow/compiler/xla/service/despecializer.h @@ -25,7 +25,7 @@ namespace xla { // Creates an HloPassPipeline containing multiple HloPasses that can // despecialize an optimized HloModule. This is useful to run an HloModule -// optimized for one specfic platform on a different platform (undoing platform +// optimized for one specific platform on a different platform (undoing platform // specific passes) with matching numerics for comparison. // // Current despecialization passes are Defuser, ImplicitBroadcastRemover, diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index a1d4dca5e0fc52..b41eaa303b0aad 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -38,7 +38,7 @@ namespace gpu { // // Examples of things that are not unnested computations: // -// - The reducer of a kReduce HLO. This is emited using IrEmitterNested. +// - The reducer of a kReduce HLO. This is emitted using IrEmitterNested. // - The body of a fusion node. IrEmitterUnenested emits the relevant code // within a kernel function using FusedIrEmitter. (FusedIrEmitter is not // really an IrEmitter, but is more an "IR generator generator".) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 2beac3227e4537..fa59a5fb2030b2 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -135,6 +135,7 @@ StatusOr> Compare( } // namespace + HloEvaluator::HloEvaluator(int64 max_loop_iterations) : max_loop_iterations_(max_loop_iterations) { typed_visitors_[PRED] = MakeUnique>(this); diff --git a/tensorflow/compiler/xla/service/interpreter/README.md b/tensorflow/compiler/xla/service/interpreter/README.md index 4c19a1b916d421..0b21b251c3f663 100644 --- a/tensorflow/compiler/xla/service/interpreter/README.md +++ b/tensorflow/compiler/xla/service/interpreter/README.md @@ -5,7 +5,7 @@ evaluating the result of the HLO graph directly with HloEvaluator, without lowering it further (to LLVM IR for example) before execution as other backends (CPU and GPU for example) do. -Its key componenets are: +Its key components are: * [`InterpreterCompiler`] despite the inherited naming of "compiler", all `InterpreterCompiler` really does is the following: diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 8b4e07995afffa..c287cca0c54ba1 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -282,8 +282,8 @@ class LayoutAssignment : public HloPassInterface { // the case that no particular layout is requested. // // channel_constraints is both an input and output. Any sends or recvs that - // are present in channel_constraints will be layed out as constrained. Any - // unconstrained sends or recvs will be layed out as locally optimal and their + // are present in channel_constraints will be laid out as constrained. Any + // unconstrained sends or recvs will be laid out as locally optimal and their // layout will be added as a constraint to channel_constraints. // // If channel_constraints is nullptr, no kSend or kRecvs must be contained diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc index e2c07e38271df8..688cceff0cd10d 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc @@ -75,7 +75,7 @@ StatusOr ReducePrecisionInsertion::insert_after( return false; } - // Check that we haven't already inserted an equivalant reduce-precision + // Check that we haven't already inserted an equivalent reduce-precision // operation after this instruction. (The zero-user case occurs when this is // the root instruction.) if (instruction->user_count() > 0) { diff --git a/tensorflow/compiler/xla/service/source_map_util.h b/tensorflow/compiler/xla/service/source_map_util.h index a776d745f4e56c..18e2651abb1600 100644 --- a/tensorflow/compiler/xla/service/source_map_util.h +++ b/tensorflow/compiler/xla/service/source_map_util.h @@ -23,7 +23,7 @@ limitations under the License. namespace xla { namespace source_map_util { -// Creates an INVALID_ARUGMENT status with the given format string. +// Creates an INVALID_ARGUMENT status with the given format string. // // Also, attempts to extract the OpMetadata for parameter_number on executable // and append it to the status message for source mapping to user code. diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 73e014805f563b..6f5765849ad314 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -234,7 +234,7 @@ class ShapeUtil { } // Returns the higher-precision element type if a and b are both floating - // point types; otherwise, checks that that they have the same element type + // point types; otherwise, checks that they have the same element type // and returns it. static PrimitiveType HigherPrecisionElementType(const Shape& a, const Shape& b) { diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index efa5aed2d1af8e..0fd846cef8095a 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -61,7 +61,7 @@ using TypesF16F32F64CF64 = ::testing::Types; #endif // Check that we can safely pass an input tuple's elements to a dot operation. -TEST_F(DotOperationTest, DotOfInputTupleElem) { +XLA_TEST_F(DotOperationTest, DotOfInputTupleElem) { XlaBuilder builder(TestName()); XlaOp param; @@ -798,7 +798,7 @@ XLA_TYPED_TEST(DotOperationTest_F16F32F64, this->error_spec_); } -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { +XLA_TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { std::unique_ptr> constant_lhs_array(new Array2D( {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); std::unique_ptr> constant_rhs_array( @@ -826,7 +826,7 @@ TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { ComputeAndCompareR2(&builder, expected, {}, error_spec_); } -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { +XLA_TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { std::unique_ptr> constant_lhs_array(new Array2D( {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); std::unique_ptr> constant_rhs_array( @@ -855,7 +855,7 @@ TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( DotOfGatherOptimizationWithConstRHSReverseMM)))) { std::unique_ptr> constant_lhs_array( @@ -886,7 +886,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( DotOfGatherOptimizationWithConstLHSReverseMM)))) { std::unique_ptr> constant_lhs_array( @@ -917,7 +917,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { std::unique_ptr> constant_lhs_array( @@ -953,7 +953,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { std::unique_ptr> constant_lhs_array( @@ -989,7 +989,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { std::unique_ptr> constant_lhs_array(new Array2D( @@ -1017,7 +1017,7 @@ TEST_F(DotOperationTest, } // TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, +XLA_TEST_F(DotOperationTest, DISABLED_ON_CPU(DISABLED_ON_GPU( DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { std::unique_ptr> constant_lhs_array(new Array2D( diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index 098443824e4ed6..41189231b90e84 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -514,7 +514,7 @@ XLA_TEST_F(TupleTest, ComplexTuples) { class TupleHloTest : public HloTestBase {}; // Disabled on the interpreter because bitcast doesn't exist on the interpreter. -TEST_F(TupleHloTest, DISABLED_ON_INTERPRETER(BitcastAfterGTE)) { +XLA_TEST_F(TupleHloTest, DISABLED_ON_INTERPRETER(BitcastAfterGTE)) { const char* testcase = R"( HloModule m diff --git a/tensorflow/compiler/xla/xlalogo.png b/tensorflow/compiler/xla/xlalogo.png new file mode 100644 index 0000000000000000000000000000000000000000..7a0a295953d0c47b23718197dcbab1677b337455 GIT binary patch literal 46785 zcmY&<1yEaE^k?wk?hd7RvEuGf+$j|I;_hxm1I68?xVux_t+>0p+lFudyE{8fhRMrJ zUf#X;ob!`&!W88tkr4d-)m6d&pv{EkgaLqR5aNpg z4EQ~nk(9C=0O0W%0PqU}0G_~?{0;yBXJ!E4NFM;;Nd^FL?b2J6_`n~)8p=qD0p36U zvfGQ|z*peyq`o@>06!!?UJzLEqTsIvz&T0FiNkF{VgunB%+w9b zb^9?_f45$*kn$y}g+V*DddDWPTR;4e@di3gaheRx!-IS%DJ>|?VMfZq*1<7u5)q;9GxE#F&)cKGW&`QOLgLp04>;@O zd;pjJIz*}Wy~VI0j|PcJToRIun9!E*jmGAEnnyLbxt&SrKxsFjL6e9B{VB|A9uB5Dxgt8Cy*!I6_!1X_OPPRgW}e+X*$ci`aWIJ=a<-Wc zoDbTSoOYeZ?tG`~!ByV8T-reHg*|VlR2urPH_o&7#rdi&%b^BAozOdELInPjHWo5y zkB%4TmzyPpIuaAS`Fq-7VdyHFo31M$n~?Uew6D@RKksI#g;>SB#ol&P*<5}+`uZ-{ z)SL9nS}oL7eB8Vj>*jhMCxTKssCN)pY?~=lom(TXuxs&^!JEh=1 zxmtiV}H6j{SaB`Lb$8f~9{wr_iNNUFu+1VCmEW&KbV z17<xs;V7{UlQ4` z?PeZ465<(WJm$Ci9)#QKeLkbR)7X+h1_4Fy)yOw|flTGHs_!?xQ%T=&J&fa`byNbM zQ3QYcw42`6a5ucxf7?R<;HhGvZ4{IO0{m68{MKNh^VWt);Qc7FX%SKlkq<+C?jKg$ zQ_n$CpPv!=>uCADbq5$}GoSz-oE&TK^xFAx*}u6f`F#(H`t&CSTCn=UZQ+gC-NXR^ z0Kx$Jl;-N-B`4fSX7)#pexZ6py#|vk=1*Z{g`FPrTxj|Fj=BGt)v(O}v1b$l86zB2 zhnA1+5#ug~MyBE?47s_8)V@JT`Y4JHKqn{_S@G>1_7!x#!|0XID=e#grwvaESdC8; zJ2?3fN@}yXqIJ$iioje7$VS_2(jYCQ)O(?p84yhf_e{`ugSw!LKONdh0%7ELfA~=2`2lT5u#`^aEJ{e zO59sa*Xe%X38lqz_p;7=Qsx*|1q&0V`D)3~J}4g=hhZAT5s8hy2Q|0ZP9;(wn~O-& z5>?{)ueNbm9fF2n19d|5Wuw%7!2J@kx?(af6$?WoyEBh?{lu-bps%44g;mnQ zAO;|otVByC>sD%|hL%l*$4f%>DYVVmC>uz2vi;yc_RoZx(LLYcf|L-=HX7GMQ^hSN z0D1rkUHAOxuR^1Biq1Y4>4i-54A(E{M5hj+bB`-s(-E2*?qNYK08lL>)YbP==ix{? zh;_x)Dx%Nc&jQkM|8Xs0=!a&vS5Sw{WmMuAiJGa~7tc>2k35sl;eNXXyifpvC?)Kj zT|F*Jdp=_OrUT;CVUnUjLHEuh@YDabCQRm?@J%*I$2HZhDzZ+9G1^{{JkL%96D$M2k-#wk0Esn}ExuH$`GNjE?Yy)WBLkg?IdNqN>t10y!SDq z++q=Rem%Y7UMjM&s8<9;%ZBzuKU2FxnUEM*#t?gz$K8t@zGwKeWjfNVd_rq`$-?9pc_S6M zq_?Mkl}*!`>&l{V}Jn2WO}< z9Zh>Bq&r;&E@tyc$T`PR^F5>rNZt+o_VOe$r&Q=UF8YMls2bNt4FzK1p;n@RMvnw{ zf)@kKEWvB^bca7W?N8|O)31(}4AGUSH`|Z6S*YFyb{8|?RUALCjroCP%+IBx&24J} zg3f;isww~#%z&tUQfdG(EZcI&e|4Ev56BYtQP)7<;hu20V> ze||fR#!+m1<5lN#p{}>mJmt3t)lP?k&coe=TIa=a^??FPfD0qbR5?x4$-(x4@3RY$ z66NmmfDw&6fG%Gwc-zh9JQnbPB(a_s^cO`5dTj5??F2f=sp8ihmxlLk!1y6lJF%3z z)w^@}3)^7|xGY4k-#8iF?#W@3N`Dg6qs-XuFIb=y`MM~HkKyLBHU(|{l{e8OsCAwo zAa`!FfB8M?-kbmX>qJ(P*xf?3t)`vhbF1JKei`VWhNF3ceRG3Yx*J7~Y`aV?_)$HlRErwQN3a>U7`o9Uh> ztVdJj$_nr~;3wqV*cnh)A8#m&5FNdJzJ&{$jE{3cx^=U*XFpEIoJFf>hShsJhIlyl z%`KlaT7nZ5$u0ty0G)Ys+lJFpk=zM?EYp@ z{x8(5eg(5*w-?IXI_x9;$J53U5P1dzGHywpZYDQsisOrpG111cD2R?-PW|>k>cCsvNGTXeb^5^R};I*=KfQ?0fvd6SK3gBf_icjId z@?Q52ZxWB4+zAaRtSdmUDn2Akn%wtr>P*_l>}&4aPgHTLfgqd6G)d=5~bdv|p=0 z%rnu@p+vhY0P~(FZcHjop()ZOb2}0MumD)7kg;avBsDPoO(uhcCO?_y<}$zKzqv`h z{j+QnW~5)$IVqZ!JDME73|{(`F#-jC(I1(zMB22wf1Yb6>ga9eqjB4p>qVykdc_#n zw^{x5u47&M?^;faMtnE}`jWizPXl`Ns9(!RD3!<44|uE7%hRDy!8_jThJ2uN+MWp# zNI*xvI3(rfa3+`{|3<-BnC@7ZZ1XJ)wIUw)NOavTbRQ$+^)R9O&QolQ=vUbKia|8@ACrobyh4B`S7{SjDJSzLDI*7Xd z7PbaO;&Ul`Bqxi#FUw*tdoVZqAG4TL8lX9sE0C{vz+31) z@JZL#T;p*O2N;k}=wj3gIvlihO8$G(;53z&<}FMn`&=hMK@epKKIESwWoN=dE^mwD z9Y42*Po6GShb&&&CUi4_Rosn2Hvs%a+{elMDu!hUhEuO_GA$d zJXw`*r}LAPc2$c2Q~{sSe~SnOHvXo8AZr#C0zu_yO*o$d(6_GB()UrHdVagW1zHgl zY!I@GSF;)Xmr^0fg=A1Q*5l-}aAF-6^G&0A(MqDQ#(fVs+-fZ0Vi?Ea86@OJ<{p-B z3BJ5%1t8@~D%ou*4TK**KPApdxo^6?s0>78vcoF!nhK3#!zjL_8LJ+MwQ-j|lyIsZD)VGX zct=2UTw>;B-|i$l`l#qVvV&Ornj;ARGUY&b-hWI~)Y;5zS`Q4VC%s0_RRK7DLAbS& z4xR#ZQ9)91aJ7=zCLRe^AY#t@U8%vCTlOj1^*P$R2>79o!<{Ua9f+!=AXhXI&PfRoq5>2@8GulW8lTB z_3K`c6UTHx3~#t)&&?tn3)&bKSWKq+w|(xKrS*P66MO58kmT((?p)V!!D3d{1f&v~ z_B#IYm{I_zF8`&llu=KL(a;@U%h4P*yOyL@7aD?WIQEBL(mq#pV!ybGRHyxPa8aRG zru~H(4J1k$@~R4Om(E%9?P_0+;6Zs&_W0t7WmiDVOG1t0p}2%TsW~gTBkt7K?;)mnDfg572m(n*gbm#V z3vzwq2*)-^2sE-!u4pf8{)$nJHt4HcMBx)QReGtnIf3&$6y2t#-v9*7L>k%7dw7=HhCKIlCRyx6>0 zFm)&9yDFlAMXJ(|XI|z6k`cZ^<>@*H-8=ZmNCjEduju}f0|+Y65ags9_5Zjvd~F`J zXJDUsnAyEne89T*o+?QnIqJ$6tcUAHk`2Ndr`kBd3l2RPn12D$v}Y z{-O^KwYS?W@3c6qI?*zskEPdM=oJdE_KQM&iKm3}j_w?g}Y{bh~bNF9whMmiA7q_ba z((>hF4xB)++C+olChhaB!)kayLmmiw{p4J*aLN7)O(8{kJNj=7r2&7Ka}$t&;UHtz zo!U-wCMX;rB9K|F-{ReOm6uscaBwVS^>M0b@Ew1sl$Kr|gfiu6le!K2My;t_GtWK21&xliq2E&%`O6V4!G{Kwx9Pe624m2S)(SmaGIfOud!ni}1 zaaUNpIkN>3@+>$L3z7ppyugluRPio$LHME(GUuo5DoHUUS+1sKMs4A3P+w*3?>&6v z1eI}am6JG@zIt$skP;Gzy=XKB<9z=IC!&lq`q}E{qbTWIOj_?ORuJ3Rqhuf98jMd3 z|t=1qsa~cXoj|%d+Nn7BpZ}UJJhV?O5$XD)(G0 ztg%4={bDtdZz?Ve>+&ebKs=t+m%6!+9!6px2w;IaPyPk0Zmcxr;Z|0jmo@Rlwlzrb z4B!S54x3LE1f~)bUe2zV9DwAnZS13s`m)WJFEWD_)FhK~YxB7hF z+1yrz-Up8Jy=?ttY{S(y&bU$N5U^zR*=8_=0Co&y8CbD*MR8e8LW8ngCUMA^NP*;jsmZ%;+k){?^hAVC^)s#Y$dh?z8Je z0S1B-dP_gOo62{iFBoxAv+JpJ4-Dj#{zlS$(EH1y#4$II=evVYa6bMg<*rJLWq9IJ z#VgHgiV>US02fI!UR@%*BYFPhyLn@o-r>96Hm?`fXKLta2KVpgw3u5k;ptmcykx?3 zcz4Clmj`n3p{E!RI0!#tvN?rcqh=31|2_7q1l1)}+Ns%iv(F(9;VX002cP3-D{0Qr z0IIfo!g#xwUknz5bwJ;TS``NNfz(tc=Y3&r$;oDzPW-FtigG=5;(az z5?_iTS|m}xSGuE)bQkmhe)3QYu(=7okZAmtp-Fx7Lu9IBi}xogZb>5o4@ z!tylD>aGUk*7X(?2461|wNh(6BI63$-B#;h;6VEmGx6h=;CCMNoKO|KIzaYMbeg65 zg=&MPwK|YA1yWsSux(wXb=cy#8u^|ScMv9{B|SBm#8Gdg*4*>U9q66^A3kA`L=wl7 zaezjZlJjRPo%U{!x4-#=I*Rzo^Q!!al_DZexSCa-APOlUMqQW4JY6CWvXuQ2Pq~4p z5sl7O?|$z>F1jdH7I28&rLY4l!a>1MAT0 z5|moHo3Q@05u$0U!26pdu2GhSXN?6+VcwjHA2uQ;G$!g!WdcU09GW7*nqirzzr6c2 z7#uWmg_6zE+bx-)h|W3Hc^@kD3ZCI0sAoEUS?#c1hCq&Xv``$bnI7M-k&Ie&94W1+ zgTnKGKky#YF5CfR(?%k6^1bLpL?ZgTcW_7Dsort**_^O*wS3QmF|zw-=O?lyeQ#pb=g_cf-is%cVV0sHTXIX0r*hqtPXn6eV=r8^heql7qRKVas<(J)_+Y>Ad21v7Z8{f{Ltm0gXaba|&qeLE(6wbOHz( zciO~PwO*#ZA(EMI-$3@~GI9r5H3?}HBP4+z0*L_mMITH={JHhg#TkC|bv0V8t_>+k zuzyJAt(!%mWC1aNOtp(ejk&Q~>J2e`sqr?t(ugJ@)#cmg>oGc7R0i?IadV{<@=XG$ z;tO?1DuC3$>5@{l-zFlqhuEXIy|>2pb=t{3uJ2on3t=E1)oC)t0QkmV3HtWNo$qw8wl-;jIE3$@UTEn zh|##lhZ!ge7W|j6#Jg;|?YS(|c@-Yd635On&bTySQtBOEM^9$!^L#$T(<5u*cd@R# zFH^qRrk%YLvpv`EdN^?7kXd&P3FL3TY4cPZX?^pP6u=X@dvAsVxb9p2J)6&@y#1R; z(|kEzot*WYweBUy@7L#=J32FBUuoaiJMreLs1%Z<)h=?TiOCDhSrn=Y=dXt-ZTgv* zSV*^s&TDfvvYAbOY?-H`1J3pQT(w>F-1olEr!0E`wN5T5;|!NBej)VdRpr@sC-}Jh*G1gbvm5vw7&i*UsL(<%G2|jz zz~XIql;&%$L)4{_d-o#G@@hV4C<%lrB9UFb5Hxq5cnUW(0L8Rw_smul*qnJ@Xa}1! z_aEkr1EahW-$|X;64;;;hSqW@ep*g8*!KG=4By>_|3cs_)#cW!~i>)l*=AAXk^g&~ZQQ{cK? zt<1W0i&NL zTiK!X$|0EQ&-4K$0aOEHY=VH--19ceoD?4lG)Yelvz$;4afn4)t|hUBrDl}(1iYV_ zdtA71$uf%)j#2Y~og#qf^{RCxdvQaug%;-Ewr-vCpPIZJDUen~nAxd1`^o{q`b^=W(;O?n* z#rAxj>IS7#)^OcF#r&LUODbd=U~C0dw3GvE?#yGP#ez^GOvHWNGxC0K+XyWZ_^xzo z>~01FhC-Vg3*ekrEO~KsS(>c(9{rpQXZ*UoL zJhPt+A5V(M?-1aE7LFqk%mENyy97}HEP}V-MytLES`6h?uKF$~zNMJ;O*QA#e#l`$ zNb4koVhe2*Q0wb;>~|wFPt0sKyQf;;++a%$Y1;Pf;ZZ<9;PPV+ zoQ=q}KB|S?{d(ILkx_3uP90{E{e~YMc``Y$RWN2XZ)rsIWLsGLVgWOR*5Eq^1jnWZ zu&_Aru!;0t1jL!O-&e#^65utCJ1#|}5CKBt7d0H_9_R&jjS3*x#t!@)>R`l$v% zi*F$+VgLuB348*GBa&0)T}<{s0PbOFcD76Y@$4U2CUvq~7gs{zj}UVHQaKsFkiGb+ zLG4p#ZD|R1S((X9zT0x{)<{5fb&eoy)K?IXMm|}edUcC7D$mM7k(rMIjrk?nOfvaX z#RiKZJ8IJwaS>zZTL5OVA*alcR>J;46hH`_F7=d zgMq-NEJiaAV+tE%=!c_ff3Q=?+KIeCJYZidkzoCBc^8$fM{S-;YN6VDr+Fs@2;g~6 zrOO!c_2_F@JS7wW`Nub^V;0&e0kU}r`d%b8w)PX$pw4AJzZmRcdX1X6i%S9E$%S@s z(oYV_!hrQq%jVkosTz%pRLaBGKEgGB(>b%^vmSaI|B$oS#W_(nN+u3vmO8LzOc?mZ zAp9h1^Fo2!lN3)`hdY%Rj$vjDVR-@%Rv*#%hViZNL>`7{px=P$Jx#&mP(b+};{iys zyp{eH=u(~)IAWClkZ{G*RhD!))Po-kCVQJAV{wEQ8- zR+d6Mqct10+2vZv$^Y&-EP`A&U?tgk7eNRVUy*MoA5mAe)TdDzkkX(2wtJaBJBmc+ z%4slrvc*z}aQ{H11@N);Fo{o1c zCp=*i6|#%7`LGxy@f__E4hRr{(EcObPix6J#GhLl8Sck-RMyw;8Mb%KPR^ac?-8_W z+5+B<7lnLC{&^qT+n@>8=NPooQw%+MM?b4Zm=N31N79@nsm+&A8&Nq0jUgG{*&8x- zz8A#IcJI~XRNm*592QE(m9$9ZpiR>#Cq-m9kmV=3w0gfO^2Jq4)%-vJ=uKjx2>mO+ z-`DxBFIlU>N|>|X)6@@txQwu`UhBCZd)-jUJj2?DPgM_;H(Del6nng&zqB+ooCkD6 zpc@ECn|)b?DXv3f{)J}P?{?2x5dLI$p$mt*Ts^T;G=hJ>o&A|@2@-gXo8dUwoazMZOc%6=A(&^3g!S4_j$ z2!!iGgQqEymE1{BMk~0zJAJ>Dk)UAA;o_iR9$1NzMTZW>#^Vs)B`@yLcEu@K#^MGm ziLayF^Q0}~IRbbo9U`cf;nK#HZ24LEvHv){Y~KRLj#Y2>rE}kuS3nAg5b%-imG+!c zU=QL2y7j-yug!J0OX3?WAJ zDbD6E`*`%GahCW5m!S3rHAG>dg@Gu?qD2iIuj!*D{|=P8(O)0dnk;~s%W+e?-!#2-fgD;rL^PG7;N)0k27m6k*%w+CER3;gF> zEjl)^(m+SPu7(&CNwLA}2>Yx}nMvPMO{{TIS&jTo0u8pj)cpJ^dX$g!!FmMj#h*xX zKZVwth1o-~P@1rt_`1Aw_W|t#XPt~P}1j`ik{y&Mq%ds zfo^pHQiiz-URmnaUDmT;Z=OU6HN;7O`}Laq^IEh57`m_Sa4>sV&_EzSj)eH;elqf> zb!#5J1V>`HbQ*;;G_^E?zVo#kxth5BREmJ4Et{`Lz-*Xjp!@mknOaX%`(C6Gty30J z34;`#zLm4kjB)aFczWITAnyBjjA@i1jO(SW#B-l5U4r@!M^a$ZNGZlyjWdf;fc}#j z3InKGcmF;f69tgWL22~uU^+-31T22#0LeBm?Pj&|tkH;4{ z8wr{Be=KT6XNY<(e_E2$C|ckC_;Hm*2TIO)jteH>OPPJzkb-8h$1e1V%07lLRzxn1 zPyGG)SN8Zf(crcnjjRLyjZNI~{LGxK4&ih|ugD{vD5C$Z@AZA z*}i`_LZR@d%3zvq&1?#F7rejtjCrWZwN>DI6;`>@sqsE*p9OXDBV8qOUKAF(bZMg7 z(I_8wZi{H=lc5$yKOT7FgMq*&V3|6l30%9&1jK(f6c3lF@6P0-FR1Np%x2W;KYLe} zId4hrobW)>`fK>x{QTg#s+z69AS*6x@;;6ctss;X0z@d#U{5-Z(0p2WH9E?zMXCT? zN}TU`-e&pVr5#&aKO0xtYhVFb!5ITEiQqj+X$SZuCB$DEOj)FGpXvbh+h1%6!U_%@ zFZlp7Ce9_9qh>lL+6)jsc}Gk3Mk=wM<9TD9it~gz4h8kUTAeeJ-&a_iFd4>Vz7b zF~j)?NV2?4c5pzljM-0K9(;S88$QV6#oApnq~A4b@{H;-K4Cu_|5ZUOPIVf?fUftB zk@b8)jOFmTF;msQwe1KXg)MC#4I?6W6Dr%DVc7Zma_kATD`0DR!skAKHs%x3?}m@MtJfyhC*c@ zHf^GK-=kbQluWMjL7Vj}(SGBF z2Kv=xtBC6{t>Z5izHce>kWSPJtfu!>WSDNJ4)pTB;3rdDmKW^yuxPBy@) zqOGA{C8Lq#B(pOw-%nJ5(70f-rnbaP%IZ8+fo~zX0oz5{)y1H}h_g4%bz#?XzSI`D z2qRB}@K>WB=Z3#N@Y==hzwgKMrNa{{VqqfkqRnMWU8|O(oVGH&XN<*lf!0ve0@rZ5 z4R=G*j6I8l{^)YCh%AzY!d1twL_E~6b9yy$#{E(33=fSPX+nqn9bjh+Qn56)21+-< zTUtsm*?haerGu#Z^|KI>}^ZgGZcm!#qijq-Gw4zGwn#m{obiWV35 z=enE@6k~>SozUICO=eTz4DL(noh+!X_NrQm=30mt!%wg%&DF)nBN-?yX0~wy=>(1= zmiIC0-9?4h=+C*&gKshP;|4pGTm#VL+bSE{Bu08hU1=QlA<*0$0Wfs|8o~6ZRpP|c z>N$c@kCGbr2@vit;i7CKVK^2oA5eq!*%{^@`DXjD-(taMcc3gV^GAKbqJ8uw)a~Q2 z_9wp}G%|{yf|X96C`<_S9*N(xvXBZ4d?XEme20yQNMy`S8I_N(oc0SXxC3*4-E#2{ z)7ci~V)2C_p#{ox)xD~suvR@S@(iT&Sur3|{5!k6A+Un=rI8Cwgm4eruwk&FpoP#C zvHbBJP4Q4yFDLg~uwU`>mBH3V!JyCj8oQc$?%iUMy{DYqdvIXjx8z4LQb%1ILN>8M zpnpvK#qKt0G^DW5YqZ>rx+pzQD(-KzI8VBKqs@mRp8?!;-#DN&VY1V4;8YHC?_fjS zm0T{lC>Wm=fC4qf#q~|a+~iuwshHy(gc^c@R~k7joWE8e>m$n{@6fzT8izH4E$KD% z&^P|tX8M}UQ7e4YIu1RW<))otYYesnHMtK*f zQIjZ0n_H{R>jI)6KkHmWeL{6cK9oZ{HFa<9OJ=!-M#x zu{l_NE{CNZNSlt10huwF*HmiS&pN6>c@u+s?l6o+Oyj}8V3r5vp*msQSK~iami2kY zb768c3TS#?|5B2>>rEYR!UfV_OBh*Q%vFV!gx z_vaHAPaO?vI_>2PkdeZHUimTS=O9hN@Aow+WrKW*sw01R*>Wx6;zuWgCRVgcveus` z%U70a$t&f#jKn86#5q1sx+Xy6LtM1cgvzJrTCMh61?)1jh6J4@Utl)@&1|8;$+8zd zPwIUAGxGl>Riz)K>f~5x_+}$mt4G${>W?Fl*F(AGKK5yQEQOz+USRnw#pzT9F*bzz?DvSM>tB1v^#))fm57E3?s#qGxoi{yytH-} zk={P~vWwmHh#_{*P{pvLhz1pQhn+5U1L$0>w(j}u*p5`7#536U6jRV+Fx1u8ss)=q z5LsKMsN^g<_9ODNvAz~kURS{T)Yd#OwGz_eJAA8KgZQ$uNqj(OUepSsIRNDRh}hO5labsdRY2?C_!r+#qFTO8|`&T%7?DQyTEF9 zrP8_GyYA6K_V#C+DyIHU=G;tW2MMSil?6p_m=CK4#?PbNXT}ZGbPYJ2*Ew!zqy|)bhRd5_t-XQ7`?=pLL&%RJhhZ@f^!E&x1`O zP4=rRTz0#>N3BLb?0>3M&t>nxXk46=Z5X4`V1Ub)VtjgC?`3|SC^i6s4P3smzQ4VS z&U4SgIeuGGx{)0=DPf}o6Z0Y`E88CO=jy>t*XAkc|J6*mxEh%?&PmW-_%|!}TkMXeLYL##$k7|Xb0N7HDQ476+T~4a5^8()VdJMDH_NvBmSzzY{^iS@ zPeU%J;MH&;DXj~HfCkI@!Kf>dhk^W5_;`-ylwiT8fh%+qXLEAvCq7RHi-OBuUtiV6 zItTioQDuLaaeiGd?U*PYrse6Zv? z)*_%g^58@dJ&x9YMYF7=a_2!IM5}#{C9kUZM*# z-tF?*79vSoLau4cq$D#r4IqGFGcuR92HclR6Kx@M5ks&kPEE9IkS_<9YSg2#Gaco< zIS*`M3P956qRX(Jfv5ccY_@_Ku+1iR?!?!X-^m7U;9oY~e6MeCwswke{GIpe#07eA$Oz%2EL%Fc~IDutO)6#*0x9~i^!w_8EkMc6Gk zparQpao8Ju&?L1g^@zFxcN=sebTCt|MT!1S%5D7CNnzQ!$`8)R%AdU(4#BliRGjK* zzHNA3ZQe@vXR0IAD58Xlsu4Ol6|~nVFwx^5S0w#uG0{SZ8Y0M7(jwG2jN4YAFYDz$ z8B}xHGpaQU=6-zCEp&^BP)I~6yMuE)7Lx!bVT6hqmLV-xig&UcDIlC7LPiMP4+=TN zL{uC@m!3}@T1e~?M(z#~&euS?Rqn%cqY=1r_+A1KDYtI&KHTKDHD5x8CvP~j6!bCv zhewmvbs+M|&Te1E=|~`-*rNtC;d_A-eZHWo?r;C_P}6@Y;d1DuF- z9n@9+J!z~!ntAol?_br@Rg8B|Dd)j?B4H-AYBwB|DwFA9C{w#lv=4W5hn0a}fr)@g zu8crZvhDW^P3}%9sR6HC@vp~Ix~LXtF_9h0P9ne`uQe$&!6F=01UDV^xGeo zZyd^XtR1`yh+x_}3_M-!SX>Q-u} zI|b0awCyyx@4;p3;d43Xg#G(-Qf)qXjJ?P?iQ4|Z0h~>7*$Ur!L()TvSu~N@7fWdL z9_2)ne_Ti(s8^M;`+uWe+0n?vilhgX&SzLT(j!{h4_N`WO(%W3OGg>o?5ks63R_T8 zO&F0fD~1I;t87rP&OiJF3Lu9ivn&*k3EzHe2{4B2FN$R?$@`HGwbgG!wn%WtK`z;O zlA#|ycDeP#Ub=f45B9B@bZ&QSn13we>3veM+=}L6y=7=B$=~o$-)_eS=lENPqlz6q zM>*J&Jv6xSpPysp2-*|53gtqq7lB<2&-%j9(B5>}a}*{Pnb$gMD;31xR*v!!1+6PL zllMHGUz`IUU8OPmqv&?E1BciN2cEkoZ%)>yiFrtQ>|I{juo@Q`Xpmw-Qq5*jy=|-Y z+f%^sdDiP$Bcf{0&2O8mythP)J`n33Rn&6@!IRYHIfw=Cp6%Nz3abbpb_yl5T& zvgPXw9ht*eUx5j^Dr~@eHMlV2nIcc82p|0HR~e`Pi(en!SIGpUt?$#*y6x&$BQPqk zLvVp_0C(o`$MrMj*j-#&Yj^VX+IN^keS#3<$035ym|5ILtU1$EwM@z6^#Knx#Pq*S zkal4YaG)&zMTTp!ieGtLw|)28aUcS{;9uKJEI#fpYV2m{9iLXZ@6WGOJ)GTWJrmZ! z>xT&cF)sX8ORv@^iVi6>t^X;sp?BZPUwMne-6$A;aKYGWA;;@%@IBdH)Y7qsVsm(- zkOm3n^=59|N_+yTG^WJu@{P*t_jenImm?Y7R_F(RR20F15 zmW+sPy^RyC-(7>_eQdoyt$6=aZQz+79@flNPAqur&XfvdR>j@8@Fj`7=%H~RV+{r} z%S!n6kSwei`CL>8+PWQLnrXbYbSQJHtF&KQR1r(~DnpVPV?7?8u!0%s`};~@=F2++kB0xr4A3VE?WT1)I={f(8Xe-l z9>8)q^OaM=XHphklSk*T=NWECEeT3v$M|u58e41!l`kIT(5|EpphbJwAi(Eo zhz&1+qy`zwhDFlqt#8rVV&b;Cu%%FWg(^K&?i zAw{598K{{i;NCdqvxo!`x**6GRZwHoG*P`F%cMC2h#90BJnm1wPqBc5t{WK@ZTv}j z`cPqEp6Pk;)%Og4_F`Cj^?>kTK;f*e%Er5>*~&o2RVBeAR}WWhxA4{R6ZkACf=6y} zc`XQGAjq)sRRWB2Pg;JTAc_OXWWlpniT}-B6?8k%(UlZNLR?8rjW&Zf&rN;CzuwlE z zXI%O1M_TRn1^*$vGZ0{K?H_Qzbf4RJx<6-MA4h)oS<0-V-IOBBo*}Wmdf^ei)}TCv zHb{C_o+bg0kFcxrkT&n8o;_AJbdt7*U8TZ>XXNrM8zdInSoh+Zj-M@d9H?0{L-W35$^t6NB_kX2s-5IC`4S6H$$Y-G7pUp-p`7LJV*0tMb zCO$attTwFQ*kMAY=^26SwM>JGKo;x>7$%2Qw1{(gilCw09P8KP(3Tb#mb7JqtH^ay zgI3aL^eT48>2II9hQEmX3=@i~Y${;xc$BAfQ|YeeQ{Linom0rYKEc-sc7RRQTQ-d( z%t@7_Vpr*g#g{e#QZxv&*Jcho3$VhwZ0n#fF$s!ThquRrh`{yLi=L-Z(3x)gvLnII zk_}YVtwvN^Laz4(Iwf>0>c_8ik02T`Z=SHnF{GBt%am6O^b}2i9}UBawFsSrCRklpG+)6?dCG--F(8J#sb3U+?9M{ z+RuaT2}BJ0&LX4IG330Y2FIp0$>tcHSCXL@&41 z30ID4YNZ~*4{|L?0gwP=iLdq~5zlhXQoIaktilQ2BSt5s_ntLE>}Hrrvb>$0+w03D zZ&SV0DSRaImG=IrY+64-v=C_}V?LVG z593WJ)8(-G&rBGKQrsbUJp^c4Hh|zN8}ajJ^_^(ar_8C_Wekxv^`oq?9|lo9kzII} z|BI}*V9V-#!ge1*O1itHmG16RIs~Mo8$`NW8l*uw1*N+~0qJh(?(Wz({{HW~kA3Vf z0MAdEN3{lFR2si^r5CAco44lOwjj`&s{^!eUTEvVrqep!zoJ0ou-w3fCC74@50c zrbPo?WvZ6=9)vQ5gq+!E2@vHvCWbd z(O_B}K|?uPtM0oAL0UD|AUA>6Ih(=*fFJEQ1++ zq_inHD9;D>HV*Xrs=50RVf&*j^pXM0!PHL#`nFH@0-$(JW#*cdvG7Y2jWB$R)nR(gPw;bttBJ2z=d$gpW(%+JGG;jW^SXzMc%h*$n?6j-%j?q z?>PK%i=?^{I~jiS|9YVQf3r_C*W)17=bnoO?2B0xmfRd0+*U4+!|pedH(FoK%O?r~ z1#duCgGz#Po@lv|A!^3t+c1rZq=mc{87T&;gU3L}vnJuD(?Q3lrEQ=~f{*m=LSJXJ zqLuw-U#FWHi=d%4J`wL-TI`?O!rSgy3*+~5*^`tuZo}MS|4T$?K_a@?Mn$spH!1~4 zld`!*X2s9nnjiH#wC7u<6y?()-fCHX?N(w5+r`mpCLif1_0slvsJfX7s65UWx3jPJ zGcnGsr;=Im>BURkPX>PpDn6`o{BU>po2;KpNoPN>%GJ6KuQO%6=VGgvR!MzIJA}Ie z5gC!bn=DY)gwoH#gp)Q%bf@U#Ikt+}jqyC<)<7oJ^_o?w@G=0M3YB!b%LtHkP(B7C z?NPK)sa<5j-(>8MMzRA4t{%L~P~9H_tGMOf&BdBRlPSFtr&T!n@wv+xpT0}>67{|C z-La4N=l(Ee0shiyi=$nMJ7=QysR0<~LLc_z&8;7pEO5Q=ksZ-MX5ww0Aj8N)B2SL}`oXdQ}|l*kqD(_#DAOI#@#1lX^^| zJQO_!6YKlJ8yt%eL@}D23Li;i_XIt{?}yYeT+}KAH)VI3J9j-ttu9ntHtgLG+ED3A zI`jU_3(cigs%2h|eamlIGc}|exg5gm85L-uK`GCVg(<9)ZilVqB>SIZYv(~aZDR2_&S(%G0UHMp(9AC9r+dAy$!iX5LN;{_^g#PfcboEcSfn8`B zh(Je&REv@AlNvw}(XRt(N_n{7G|!ccRuYKQ7n*fBuZ5G{@2-@+7PuXTV6J($wEB5Z zs7tf5)a%0eC+MKsUXb8VSk$WIS-8!`urQJD%^Edc zQ#BQg|1A3YGB2yS_Y1uNLSEXxGQ4dMkLg5Ycej(#&{+0qc?^N}ChU(#(N?uC)-|?r zruAj`Cb`a2!s@DsLN*_BlPG;f$RX0-FN>;yx%lks+GYxwueZ~3{LLmXZ+FDMwYNUn zg@szy)T{KyLwoKF-Th*jc5^{Z@xE-C@ef zmeuz5%r3h3!_}E)g|ODYeKsMD!=k@)zpMy@cJ(T~4O?eeI#?&L3*vu!okw)nS%&c$ zO`ac`F#PsQ^Y&r%CCTTWE@{k3%}KE^kS|rYOGQKlHdA zeAJvytD|Q7WW>r@!}fYwc(M@l9B~NfW>BJSXg?;&P2APyR<@qm8ymDFmBp1$`A%T7 zBj)85l!DDZ0XF*z{6-xk1KA-~A6eMXkK?^?#qIbHsy*VYa{NpXg0{|9N9~y%$HB(*)row^B2i#dP|w`>b1;a`6ea zZ&#Pb)Z59vbV|S$z=u0}GUPJ!Zh5Xm{6d_leC`eMi&q1E>A)lMrMA{^<4+-KYn~N1 z8e&E1dkwWu^4}QP@7)?BnQ1KGyKwH$C~AlW8nb0OogQPq#&YE~7>%@^bUwLO8?6K> zLJ6>}_!NTsTliX4*xh5QFvFATtR%bogDP@2gdon4xAe0S)3cPm-0yS%`kbkr{g>MR za?s4g?IYin4#Al}S?m5(5Kx=ZShzxa6tQ^-|0{N5qWy(38S`=p6ri!gtGi%3zTD2V z2}rpIZBEX7iTEQ^kSK?VL}0U?&L;9d0_f&gfIU6Z!LDh*>3=sfa^} z_OB97D$9SBU5xjXVG;0EpbK%W-Vtq(iB>!BI|nelBgnDwfST?q>tFbVI<~WzC6^xZ z(W;2`WK6zg+jvb43!gmn)V^-I z*P2ln&U$JZOJ8b8La-S@j8RRaJrQYXU=H=SkSetH1skczb#xqRB$DAp?`0s6K&&5J@ zq>`?-H>JrAs&*?y1m})NidZ6p!{&?OXQu z#ATXKu-%R$2dcimuK_uN^mnPg^To=XVhDyXnf(OQ+3Iepj(;W(D^Q$4X_`25TM0ijb9q`fxi1+JJhtV&ik~lye>IR1GVSv&sJXv+7-_Y$4ShewdAmk33I+**1i>! z$=ph{3mG$1l|Gr0BW@;7eUdmHtw-8Y`+st7E4RPFLqTievb6z*O@ zUPsGKvm(2-X5-4Vxmdb=x0u2l)X%8|XV-nwcNikQJ>z&BvzkA6c7uw6vFl%ps>UCc zv5pF`%p;=$Xi0q}yeSBEYs8fg|6FCv%REhpd7R*g%^5<;gm6qH^(>d2xYZ)`R02Wa zY;*2nM@QfDVPL>#sRn9~+iBzAs1Dc%Sc`oJtJb*>4?L`;J8#5W^GJAif+-hT+Qf27 ze1-9YIt=mk_UE$MazuspjA&^UDfm9jvVygW&-{u6XfnuNrn-Vt-&_n=wRGE@WWI2p zUZj+V#^6HFd2_I4n4ba}66BZLU-(c-^tQBif~~N&dRr;(k{E@aW=d$uPMF`?A#ZpQ zgGj6><&rJKwZZFnL*Z;nLdtm~m*-QsWJV&3beZG5)G*B5S*t*(3s6L04)8RZXch zqzuC4@&p+UiV%8D%wBU1!QgIlK9?7x0zRgW_q+o!As0Q;@f_h==3~2HfS3U5Y-@=r z>2oJLlFO4LTRsV9sZTLaerUiMgWz5!gKk>=z@^Y$cYo`Rt64t<a%0v%>BEw&9=StK)H6P4sOc_E-pK3xlsEl-h_ZB@?MH9XgHQh%L3X+0T>UJHpcJv5skvu0?U4X(+8zCg_F?z zn_gsUHe}Q3{6hRYSq7vILz3WFUn$&XQ~xp~U*8n?q4G?vlZX60rt=PtX9&;N>>qEE zO8<$ZD;ZSLb0`1VSMB*^#|@?odjVC<$J z#b@DmnPf=-cYGJA?pq^cs$5Y7*P4z?YlVeZ-Se6EOG2-dAlz^cxlLgttU8tx3IWd3 zS0f;P18{A~0UIm0VSJyFS!gF=F8lF+4$fENS}&v_MW9Z;7aMZDyPIm}9#~KHjw72a zbOPaN;-j!z9K`_(lrh8S%h%Bzc$0!UfkylhYErhAOYxwy$Y+7?^PlM@k?HPE?3Mk{ z=cDgAiR)=NY3;bXjXTXO5M;GWr20z)BS~%t?nOyeUy*X8=Ps^iZb=6Zmk~jMI_Dg2ZS2uyfSN#+e8uM83oGG)=c=}LaefLYq@dxJ<^>BPL zmyJ9w_F-6t3juq*H2Qv8XIQUd^%IBUOI&xECJKb2qFdNQ_vJ_uJ8LdGIpg)qP}lyk zg22~)S&H$$R7~0n#ZJtcoLztLgdiZ?p#Q^(Y>5-;xC5LBM;QA!Gnd@*7%9Ip{pe|W z*;;A=fmIz{-#dCrN1v_0kH?B6%vi4uSVUL~U+^Ah;Evs!ROt$uzJ0EOm9;En+-^)A zDjFX)ml2Wd@dI)HuQ^B#yz>29J)#ozZ!e-~ovGLDuBP$ZKjEkR@F^A^ettO^BIHU? zY0dNpBiK?w`lQaMV*el`dI11366Ifs{G3Y0KwK~Sr^x&}me3F?KKbO6z+nssaDqdS zCdVoSHbw<@VmuAw8_vlO9|rcAA(^JarPhN0eYgD!alA+3$eL%{aM^S3!4&6S#qFR| z=oWv3KJ{3oS93Q6c^WZ9ndfQZWOe#QwaDwSTGaV~))LY5gkU7MKL`JepGgW?i`kXm zZfWBy1;vF-CBRkn(ELJY>ih2SR|&7R!BpbrgG@rYQKyuLgMUG0;n#p8ONP+=>OY;; z)WKszT|?PlGfVD;~%fXlz?;Y z(-^8oN$jZsyTL#Ir#5DlQ03>2uGpa?yB?s)^zf_Hj(-4$;Vv=!eQ($4t;}z6FR^=v zW)E8cL%^bV4-4L_iBF^R3}g%{;o>>QL_fC1h%zQmiT(bwWivAKaV3t(vg9$Mu}EXv z@E{JX;O6Q@=<-)*$0a*WA!GNQ?<1jr2}r`WtXoB58llyI6S?Q;&nj_ScM~iE?f(_)d}6W*JAfjh;ec>wQ(hf=ZsA#PhFd9*e@p za3;7ItYJ zIS{tQ;j+=v-!_a)1#jj0Gc=3&!(r%pCcMh04fA|psbo}B4??GleRe9-+fJim85#?` z|CamzNhMR^-kC&uWbX$Ufp0{5eCO9Dn5?&RwZtlOes5ky!>$vk8u9#2n8r}yVrZZo zK4QV+sk>=6#_k~Fo8U~FY~eaFty$L|}zz;hAMB)>L9{WkeFbu(1nB>K4s<~-H5n9#bh zq}VLRRpxFew3B0vzM= z7oSgPF_e(OK2j84T+x?f6S;e=g z_ZufQURGS%+_Y?;`WJCwd2|9{;xnG_U`{AOrSOQK9j96U*JIjRaFjJq#uw%eFNlHE z9cVW6r8`0ilr&nemk;Ye(ukOI*kbaIORfh*t@|Zb(y54_loOCMcp~MhA3utff9QDH zHJhULHX+bqdveVwGY+%AlmK7Zk^%ev_9grot=*1Wfs4!|O%+}^GSyaQF-A4ppXZFR z!vL7NDNNk$7;hs-+pHqjz$3nz@u8(&|4+Xd<>6puF7)*%7q}Q8ZC|ar4b;Smv9^&;DPR z;AByV-%@YG<7?!rsCFRK6Y!8+b^8~4)--w3$c;X~6WO`gGsWMP?x?SSrjR$Cj{T}8 z5SKNZY|GevA~}2KJh%RJ2*i3A*zX&&5`<5?-dWXN@VC89`&9TMu=Tg_NrWmXVhrPh&1%^8>@b?D~+k68|b2)WvemlVwM{82g%=BB)( zl*L1NlWEMaI8!ci)5Ou~YXs&PoSVmcfpe+2_iZ|JOUr%{oR4*+IVCO+3RhN`cc?|3s@I>S|HwPrSwdHUh~3{{4}OHos`nxRfjZL^>H zWemcK7_WX<+bh+7eooXB=3-$xY)h7xci4;G6U}VG(EAx{JGa%aI*!};csFq4KfdJS z##RJ|rRN2b4e{eOT)NpFS%OnJ?U0b_?7b;WtY7x2Mj!-XPm99ge=cyO9p6mvu?BkL8_hO!A2P~M52_8%KO!8do%vOsqRHKWhK0rp zmgJ8LrqpA>65aL&$*6C~{Y-evUlY}uh|kT27zL0J?eiGyK&Z&}AxxrFWXmlAEcbNO z1galrw zJDP;o^+;2TMP5!&u9>qOvj+b_M$IxsD10rEgb~6^o$` zqW(@z(T@J>tpNfwEUQV|7Mwz#UNtOt*5UclKHlsWia1rP@O_J5TYoLgIqDoqbTiNA z4*1s7-l{y#fS)KYyj`XaH|}tK`4R9t=j(9be18V641Vkr8pgk|gT@R@XCh}}vgTww zy79cYvSj#`FDlsdx4FOXHUoFg4NV8VdH(36RX^M<>4??&iF4>taBfH4nGEKL4X*o; zx%SMF8g^y@%JAxKV*z`a;{e@La(uf%>f!y^5rfdlT@TmgkacLTqqv!nw6+?6D~nJd z%7PgtH%Q;7Vf%hhNO!P_ISW zPVQn#;jab>Hb8`uwZ+{w{o20`X~XE2%N#m-0lDNFe`U-;+l^bImNo>AO zhr}C?GV>$GI0zp}wut&gUe|+{c$=5!q*cqb?##%3A{{4rR#h>*9jz)XO(t9dLYJg` zm_ar@zfZyE1a?NFv{hC zJ8fhLk|?czFvh4d#(viqbxKM)B$Ow;Lk~g>#;{p^!~D)E10+JTrK5DiePToWCUu*G zGndO#4-c=!!{ONn4}NhE*P+8q8&bC=*cc>fyTF6%EOHnpOw}#VguBT6&te!6VUVHQ zUw{6zp_4e3?%ULo$B{DjJC^3xS0&F^fQj7EVa{1?Fq*Jg*){O}gs z^VHlT0pzmHBzgHn^&~~>G0-8EP^mBSa@+lozzfF~F5C}SS@QAgk{4j8+$Nv1VwxJ^mqK3b& znBZk}N`Jo>neW0@@W|k}Ot2W$Yi%e){Bn8QpX>UdEMPGWu?#0f#`lh?!5Q`aR_&kn zG2M_Ut=voINAtr;`|l^WtJ>0?ckC5z{f%O1CFHJLj{7)BbDG~2>mF7Mf&g|zOGb|K z6|9Ox(Xo>K06`3VayE=P;chZ;%-CzSDePc|Fc-?q z2==T`!qDzmm>LKKRtO!kbGC1SuRIjD82~XTDZOFFm3j33Np~iZ4Tf}(v_-=M8e$&# z`=oQ~wQH7@?s}@k>RC;cqjn5|0+&+Jakp*&EDJU47lc^r-^Juv-aRzc+ym>En?ckP zJNlGVFTK88o@edDkV_i8R@o}>eL>6Q!dq-s@zhGA$R^pyBwZQBweqxJzAqTWi{nBvfL}B4uE@Bwv88PBsq`qjksg zqRE;xfq=_5Fh|5h$~B1yic)N_-FX*P^JKTS>kGEsAgUO8ZLXi?Pn2c^=D1zfLE2ak zROB>rf@6!{34b?Xg5L5_B6SUjs1is2C5IG4U|G)EP47~0rkQoka?lc2e(sN|+PMLG zIT7TwkW3SKg6A`d>R|2{&vXt$O_D9n1^{rdT-uhB{@3;ch%`v@Xa^meMCPbDNPN4TuO1EG#>R8}hmU?3h2`jE ztrWVvN~ol1@+gT1nVbSP83AL{MS$xWNQ2@^--@rZrxV(%`Vz%gsX|Mk>*F1wtx9d| z@q3eCa!uK|3SGf44F$45Ws2;tfuRW2j3}9#KZ#%|MKJP8kO&JW3rO$xREciS+oNOO zp5wWjyLGT9HL0rx_>fgZ9J*d4m^fPBSz*vKUUE%hEg-Y02LI+Ipv(&rG2n>&_8*-l z`1n^Rmcd<@UMi6YEVbmT6x_3wdxRHic5a8zwwDIHu#9Pf?t72E)I&_JBCnP%&oQdrNnaNW#tOPq@^pW6us-?_GJShzIV7Los!g>~>GdZG8=}DF z!Gs5jSKL;_&n9^-3-}uD0SVwS=ne!OWHy`f<~Vx-DT4kWa3g1y(q zUk!=^_M`mVfV9tXA$zUJff$YM<8NLQ;c3aQNi!f&{Kg3h zCYYRuE_$CeJ;63G|JQcSAn4ekWGg2r!6n_2wUCwyv}i|NLD&+NQ0oFcdi@m1fPHy9 zv&$zchVUXdHz0pU-9p-_cldM%C5_DOu#t3a5ihMz$b;!_xkytz8<2#bF7lu7g!@?9 za7=ius;6sxo1~L4prP|$AC!NIL5F5w7c)XWmk6Bmv#A#RFjz95=&mwXff6}k;A{ZZ zvir^g9x8Z)@Y3xy55)G;Tot`wM+6J7v*`d3?)skQE(oSfFi$XmB8oH7#X&) z;?8ohxU?R{zuoi#94dUP3kzQY2KKR6Oa}(_T5J5WKnLlqa5yw_QL*l_1aFP$CuWOL~{nqFQEAl?8BR>E2uqyWeS1 z^~|-Fe#BQg|L~(QJ4*QD?sp@1tZIn)?DIaN{yVc3N-h!SWemCBzc>0KNOv$J>V#eG zenT#>#%lrJF-XeBYXosR_paLT7vrM#oZZ!(cbsgRUH!KCf4D#$z7#~3xh83Xi3juZ zgyBZ=Ai1AuY&fknUVX_K<=E$1c=o{|O4)x5HFOk+JZ_};MMt{PNo(P5;h=L=rD%qd z$RXx|hnwMV2IG)&;QZNeIbyNz)@L6`Rt$_PoWVTq=VoOwL2E~eDR^f;6W$H+B}b%uL7)TGaq{7!wCUx$W&Gk|v7<&*1O9u3R6?WnD7OR+F}zEWzyCw6v4=cU@u~RgJ7pw`Vax-*(XbQ(`WnDk5J(uF*JjmY;YJq zOw?S@>Oa*l8%zt=SmdC7nsHG_sAILxM!O{GmE}Hnkm0`k4O)r836Rh9iGUy6JRb?) zaWDP)aFR-!PZG}wM_xtCSgjlmb7U4xAERuEoL+Wlr>xkF*XApK>d`flh z4}T-TcV|XLUHy%h@zTrTe|&fEqpAacXzt@=F%fBpf<@jZHCmlOb^^zwHzX5>GhYff z--!EE|8eYe)qx#3P+POV6P1|E+ltc1@I8POh>r+NpRwwMh#+?J$QE}`f z3T|$7SFap5wAM~{9?N76OpBT#h7}QW_gS7BlgFn>D1?+BtiGl0Wm~!Aa8&>`IRpNk zmPJoQuv|m-O}!Sgn#TLe(ne44Y0sRySJ^APbm95n;CB-XRj1YE-L8#aU((Yp0;ui| zT=#M&-{INvNhyhnIoKo>zr0~DzFA}>l*iDDQ0V`?H z{f(fz%SG`1#uoIHel8XWa}2Z4BjX0=4qRY6p4p&=PqWEc13U3nT(&Yu(?dYdRMzU5 z?w~BUn99d7#|D5b$H!KOW$X$$U0c@-x5F`1i6+z1;hldyaFW9+@&@138VcIO_KDMj zOi!O+d0lY0t$7z9M}MM(HdgkM_EYj$PEKyS?c#V|rA-eCjIp%hnAoQ8!}QDYvq>l^ z5x16bIVak#8)c;K&SigO=QeD;VbcC3HEaS%p2oFXxtftsY8~bIz7}0^cV++CZ5{Uh zSlRM@YI!Hf(l0Wnd}{YhhBHEj;n_8Kl1&+aryP*45jcQ>%M0v`z{UwV+LhUndR%-D zhKh6oP)l$sDYG0uzSk}C^JZut}f(e zmJx#1!De27c071;z;TD%g{!It;3@K?9C|*^if_$MaWmxq7}7LA@2%mWM8}|!Q{*Lm zOGyoC!C0@srPxqP@)y*(Ma2vlxs}5&_=Yj8WKWW-*ADW3|ESJgH%q8k)84~it8IGP z<+KQ4fS~_2o8q<7R(yY-Do$Yg#=$~bR==Cw$l;m6*W=Nci${y4Uz~6n=xK$fj@`;b zOZztl3#<-3z}kls;M|fqEIUoC4uHI;^d&9Cl%7G$QF-UKd+k@}A*9XaHspAI;MhL< zpn0UVhXIAuQAl4@QY<6ny5zVA58Xj#L^lc+eq`F$mNt-8Rp=^<^bpgQk`M}1$n`lQ zQIwF^_a`Gy^A@5vw$+}%(QLuP;(7x8ckuOT2DErAIieM0VBy$VDI7m=3 zl1txy5(fYREHPYy*f>&INaR`UOg)@3$Apc2|4Nu|xE5bBjaZ3e+EjpukJDC;%Mn>a zJsXTEiumk)8!EM#HmUN4Y2u?Epi^r>medgqWDt}{fsQAD26(B=zwxTL^GWz>d*O&b zmPw?KZ2<{hr?DH-2>((Q!Yz`@)zN1fP1*#Tg+Lc3a+Xtn();eo{YhhNkq&6%vT!|s z9w`LLOcoMvYpuImwE!tnOhZ?7=lzQ_%|O5OG$b>ROUvREoC^dJQ0Lf1TCbe z@Y{PYJSUpXkXgpG(d;)~>xtF(=;I-lJ1p((k=~8o!4Gb6amTMO8p*;*7ngQ-hYS0s zkzP;dz`WkmNf?L>YL&(5b!J3|hQ5h)p3!8qYLZg&xX1*#Y!4 z!4yHhvB3;AG~9v+0r+6?5kd2)EF=*K=xVIU)N*(ja{|Wqu-1}S+>SNftMw+*%o*-$ zHOFSb!9qHFJOJyf8nT_ch0^d0pxf zk{GpTPUj*jYD(R>JOgg{qLU}uHmLE2Mvx<%0toNYC!y*Z;FDnsxlaWASe`mM8#3ERs+DLHO` z$*-KKnIUye&gFPb&cG$Q!!fQsXtVknuuNdvwTvK=u2oyP;7%;%6`OM5ocgf#Ba*Cb zSzVITSk=qIB>L4wVo;SNPg-#B?$A(Po5rh+?B84mQRljoth}^5w11bar`|`{{{$cRqAc#ka>`?5kIk};(W=HE2Z z+?SaZZMM%|ct=Pe%JR#ZayA31X)wa)V~SnL@9VS5^op;$qwqmE#Nsv<^_J`U_9X~> zw?fgATf9ipt4@=C7zZN>rZ(pybTYQ){%*6UAtH?jAs~V!vW~mzR1817-#)L5$1(AL z?TGwt*`OV9n!Bxo9{6Ve!1jnzjCh@uodjH@{~Wiu>mzr}de6-LAI^$}=Gu7_Dy9#2 z1=iT1&TyK5XKhw^JcpY!Nd-5cLr=(XMjO^=;!S~bQwm9;jNL~XjLgki2RR+srh4W{ z*{)1}_yX=>Q6;ndY%#sl62)`U;}LnwlyyV^x~ni~e7xHBhPQ(3i6+AQ2GoPVA1UxbdAOv-lCH58*s{?QBjR{9Wo&DyTO*%W3femw<9t#82+Oc|LV@ zS7TB=4S6G=G~rkN)LLReX&fk`B!?YNG%}+IOOd)u8vq&+eUK|O>}hA3DZi9Q7AObI zcAQnDa@5q*28Rki{%FaOL|=br1QFGWL!TFfci-8Me2Z|nNJW?{N?N}XOPe9EWzeON zU0`0-v)Up7h6WsdyBhl$E&OR2D)Hi{9Q|H6-=G&fHsmV;0VN%N+EFnKc{ZI=G6wx~ ztuZ6SdOGUARHDh5r!)FM;WcU%nOFkbdc!JlnadBz;-N>%-UrI&3k+XM5r1@H&%~PD z5*M;2<*=VDY`7_nrd@r?CmjddC303+ks+++c7giIEddXlc!lgMx8%>F~ni zJkM=7iw9HVax%zJE>~ZzCSGzYrdrHDVHlz&+)^{d=&Jm=)SHm55U@09vfUm)NN}Cv z@g)5;7Ud3#E(I&(dk!372N?sTHfUxF?Fbej()>yFzj?7UC!ZVc1m&l+zpZ}UpHu$K zt~w-K!=GZ#Llt-9UK0RGwHxSAcrtf~u91srO+))4FgRvDGzTe)_i4h) z$l)MlzP7FGk3%UUThhCoP8r~o)YbbtUqJw%0$H%2^+5oQ-|2uSLgr)!IVHh)peAE0 zdHXc4nktFcZ5Cg|z1~q4q(iwB4BQ-K^Os`k`LC=>awHQ3aVJe?nPF=mLC@MrFpAAM z$z7iTq6L^IFodi><>F*F7WM73I21b5C*Vq8V>*~werM5>B@}t6%q&?)aPqA>01R*X z;v2uQ`_wEWVpO!42yp!WVIVJgb%}(7AJvqDa##P&&1e=*_{E6!$p3#y;}v%x79N%1 zSCL=|6p@jOcc&Ozd|u%Dg~%Af{72^gAXU||-~$=WI|YSD*a8 zf~@Bv0ZL)<;eJ#)BzUeysR=M;AcQ^Xq$U^v;42f=Kj?;d??KSJ8Vd zTLXRDhsIEe0s~i_|I*=rhst3Y8L5H-;mw%_njF9NigcZ7K-WGzhB_8?bM;AJx`&Ya z&m}|DI*~tuXsMkUIv+J)WdLU+Qb*Byf0JrzegdXSCXYlCgV^!$yVbL7qX=Z1*Bi?D zz)7#CR-b6EF@^c&Dg`jG7(uQ5O^}SotLYl5HLxaHCPY9IL)JSSyTul=pO%jG+rwrF zrB{|8WHXL2isD*J=tpw3R??CpW=o0dQ6YM+Mqal`b={--u&pI8+MZ{0>?>j4xAC6E zXIjr)X8=}n3QJYl0&~oMSlE0!FXI6Va5#g){DwEGFGw*|M$07?x;mz)itq$8;+)qj zrTb56S$(@oMg8&eNs1vN*lAi@%~S53er2+;0H+_6K5YNMH4qm0FJ`ejl z{WAJxtv@JMYE@i|mROjb11%i}LXOE(zxT>v7wd|JDR-pn&(jGb1L<*k_|`Y^nNx!% z*UY{Vb*XO3_X^q4s`zRv8fDr;^O3PFfuR2z`qOMmc;prI%k$JMK>ODlVa5535^iOA z+}$*kHJ|ByniZ6IF$j)7q>R4J6p(r%`Xfn6^mP`Vy%0%4we;X#{mv`>$wxh|Fw7A_ zL|aX%Z}i&!jv|m_UPll#7un8cM7wKU@-JMWlrccY9n!@27zFBm{Xhr2e79#_{8W0 z`!ym(UAyp9O$4hQa=i|s^$hHg0Wvo{Y(rUFO-P6Q7uM)l(b*+}{v%dbWS@Tj61UH6 zG@iz8)^Siuu$#>37P@1&cV|gu!agh~4IZo8+q+D??~y*)jRB#)B!`?JJlsd%YCsD< zb@Qh(y;XA@V{_#1b`dZ1>jkKQsU_~xGEdiWe{wfAH`wF~A(bxu=KDfdhf54<TQy?N_GB-k| z@I@IkHN|AYnf~mwRJJVQ7oAM(tU3QNSfMY_Gp@6dVR7$-(to%F!9ic}@5fK>~aN z5p9I!JQ@PtqDD%BQU5=9?zOuCQgJ=Sw@?sN3G6 zuJL7@6nh@IFDsz~xexNsa|b*{CZT*>6^F4S?tk(g5hKueebq25fUu)ni}@asdGs~X z$}!cJ8|5i3kwC`udu*(eD463Dg>~E2gwqa9oQWib5&xpQz%vmwCFIGnQdE^>kl<(I_ zZxFg1As4H9n)Thh#0(y2g+nrJ z=XeUi4%z0Go9* zhOU=hmP#o3941ws1^nArz68+6&K_?_o#6;)YmDe)RW9{J`Khp{Bj@`dVY@=jfl81b zXyS2Qy-THyQx<9i@!uLvgwlqua{AEaY`t8Cdz}x=i&6VCh>o6qk4q16k({b;P2!W= z{X51?B|DZ9NfqkaS~s_6YrS-X;&B*0(2LW|2X($2&w?>1>g?$=apYwy{8$)f6GYDP zx`;;e%0{Cu--;8Bga0iSMY_!}PVeHYy)n@PKuokeWILBm>s6hDYezw^MU){L@!Tk~ znc;6Jh}P(SsM~EIfxF_MgKkm45pyO(RnG{TZgx=z%D;AEbxQQ(vSOi)UnObE=f^w} zx0g{kqjZh@)7f5GZO)Q>PP#Sm((BOKo%>u+i@(jPKgjwSq%Zu6y{e8WI4m_)@uE!r zj(Gk)+9qH)Hv5wh85ahon24d~8m}i?Fd%xmR#^4e*BYBFh-4==%^$x(co7rTbXB~2 z*wzAvK}|nnJ-w#n4Lv?Vs(2u1>ZMsC?wvSnJ#*Td`n1b+=U5K ztcuXSIy~(I- z?UuIyCOK{XcQJhQ7Kow8AE%>MHYDd^Iig=PV>a|y5&6d3UT#+mAVOpiko#DO=R4R= z@Hv!Q9oI6Hp1~+L0E}`bT$T5CnZnj$Ao|EsPsj!lbKA=2<&xGkhSh%q4|SOxeWuI+ zG_2&UFf1~8KGO080r58+3ipt`g(Iw$_Mo4!#tC*+y4!w=n5I*zTwAP7L=vFln|(&s z{K79ggRRwM?j<#tW0knbq@e61a@GVj#`RJ#wNnK4^o4yJ50>J4d8jwlm)va#(49i1 z>3^3$T#T|W6)B=tcU7NZkS&%h8wfS&8t;h@j@{ zQia>f;ve5T`f0yOxClJ$Q=HlC5hWZg2gaQd45EulLbV*wtO-D5VATr3%^Zy#CW|I? z!pJ3u5FB=QGHhFxis%bXPv|KO&?%^2c^)NWVcyq<{*yuivB}89-mS&cvX;WiN$GTG202wxV&VC+=1zZFb5zl8s#+!%SD(oT9 z)*7>jdc()y5`9fvHRlX<4j)T) z&(sX?gJG?=XKd9C2{b5+saJqpkzK*i3&Snoz7)rpi0}N>ipJ^4VeEMxb<{za|I_== zS=TqWrQ;uKAA(pM4fO4{>>Z-e(dp1ELIy>0=7x81&d^f`Fgb$NF+|BzaYwMqP;5uI zX!qoG=VHe>_+}};jVhc`H9EJ0ks4!#hWiMU&m+<_4^cDW{74H4F`D2*=!V@Cz+?ab zRYA)%`FXpm+hY^|@nyuox-37XX`5RB;FEM?VgaJnM6%=9|lZtR4Qhzf4 zIH;J+MypQ!5cyG)hE{&B#(a;hy`&5BEPwq+gt z>^g@ZBQ@B)YAosyv(Jv#BWUnH>xN}nX6WUnBz5!VH?}ce zsZoJV;i~bK}=YS z4hE|k0^#(Ll@L_}nJ7Q1ki`A!Aa_){$-I7~3<2rUJeSIxvSn)tX1mgRGlw1G()UWd z6#Udkv7O`M@t(Fl8aC1erD}O3nC&A7MkfzV+W_6$L2nU43llia=u#Db_&`5b!1p(fswNFjAqBl~An>b4YDlXJ38ZscpgK!jAQS0E%xht?3eAEax&v zzerKh^|(xi#Ux6ryP`f42uH|qzF;%%63rxbu1n{TsR{jm=nExvvioD*j^e=P1p5k~ zOeqyMQj99?kD;a^h>JbDMxc|r3mctXc|>A*YtPvhw8pHsrMn76MWayuf7d^=>e)AL!_m{{?k;~6C-mVKp6om;Lb?^b`B_V}x zx0V;=QX@WAiim{X3XNYCK!Iv`I|g2(X2Om1n38`5&)j~`IVM&o=!n5@xo zv~C~3cIUW}Myn^NN^!n2XhFar*2nkByIOH7L0PZpUJiqF3S~UAP)%I+UC+UxG_O1~$ac0$eqB`ab<<$3kZ5G%gSV5#v_z9ZfHpNevH zv=F%(=5O0oe7~$7oLVUpoblHtIikLP_@^g5k2}QHoLYGgX8Ivtd7ft-vbzul`a@|! z%>^Ug81+J{Ia`VyL3qhg4CWo6XI z3tbZkVkVNOIj%J7_ml}fG07O3G;!12lLP+qJatv;(NVv>Kye9#Xz_6#hA+>PhD_kx zCZ8t_b-gMtWvS>n+9_xkb4W_C!BT3`XlOcfn=$4#@H-P&-Gx*9OZ|VUI;C&_tLof$ zkWs~MFC9%U9+l9Oh8yW@@d-5L7VZZ8Sx>Uc6R@p3TnPiGxFLB}wF2DIl9b;v&ky@< z2$YhzHud)}Zv@7%PW-%ne-`@Gy=q9$nxa|@l=X5k~m~{R^;8!jBWvNb4UOw8r1kVt=1I+IMIZk_GD(4D;1bZBx402}Uui z<+-d`k>%(ax;?|oKPv8OSDJVZK`~Nr{)9Iqh@qP!F@)kdTaS3KMgFD)(n0s?EiB&3 zK7s!d*LQ$x_0XJ)b&4{)1moE1x#1i7bd@8lVoB#MMsN(&O zOyGXp^O~3OFlx&uxpRV3mB6?3d8zMq7P;fl)5`bj1Me(ZKZIS)6c}k(adPuQ$)nSQ zmsfY2fwzdE+WjI&>)XiW!S8aC%)K5&gSKCfMt(BO%cV)bzAG+tb;(EQi4n|V&(NKl zpCUK>=9RE@P(W0xQtSh#8i78_VYlRr@uXYnRxX2YdFB?`l%*~k^I z4)`4ogqEUG$Dsk3R4_R1X7Ra-G+Igan|Cck#s$F-Z^(n}Z>3YeKM;JG=u8$%W%My_ zO4)ql8GOgh?2DY9fZslRiIxF|%={%S!#PUOK!FW_hy!Ln+prrG;=tiy-Ug!&A}9O% zYyJXH_E!h?0eLCTF!^{C{<+e}@b1Ek<);1u&ZL#CB`A(W2cY%rCSlIlhyTZe(oF>8 zI2f_%@bte?lPAAMYJd$;N@#EWQ)|g@@>Enb$>|?IC8WAVFfQ&+jU6;M{~(Q!usv52=8lwrO@y(@Gv(C}b?rCNQf zIa$MNW&kYAGJw_M+59KYj{Z-a{bn(*AH@=KN`iw8`CtG_kK`B=_+%M_IPhvX)m`5x zWi~d;<37}QHQDw4=yw^+NtxuhtxE0gTzG}vDHT>dFcM!41A4u?Dw^FKSuiC#Xqqry zrki1?a=YyUXIhwVDZ>QMPp*r0{mg}tt(OssbF7=Z9AKFH-wa|)9`^slTRh7keZ%0C z*8`(3mzz>e+j)v(#kU5j+sevc-ouI@7RScxAAD8(f9m?GsJMcr+rbA3fk1GAOOW91 zEftGcUPLY@*d zIXdXyRTViHM-Kjt0cw>uD9Z5F$@>d9Kk0Y#;0eBgl?jZk<6!8g00P$-Q>w(t0$l69 zD>EUJti)U8ADnKvixd?Ax5Z0Zx8$LZpBJhIr({4IXYa}GN?Rd zGGCfO6&qhxeL43{=k!LQ?Prd?pKcsU95`=fwBQkx0ZU71ca60V$iKE+hz2r6GT ze%Knzn9T-5y1(ToW&U&g>whp)YF`N}S9BZ}JXpUR$ zqIYSz?|a-hsKA#6)aai|7|MQr!c#xKi7*cfG+2v&J0wG|lU2r|C_NMv<_7?BBIRI~ z2Dl;nMS$Uy6!!YZV`CaDKeg4jM|N<*4ex4HMD0@9O=bhdaXatvX6j8`;njJ}_49^0 zP-kf9mLT8T7+8hhhuHdB_lAG0;hss#6D~W#fJS3X6cJ8%XO~B&Z%2(udFsD=r;HbV zsjMzJ`-uQ5oW~_t&h+_gMFN?k`Q?_h^{JS5>G!O0yn!N6OSTSZbW}(@4Z|=~xqYMk zxtgJ>AsWdI#NZhg48XtaB>@VIf&=piL zBu9SSZ-?ym0ws_soe?g*>$x4jWzphjq{o$@+@I{(lV1(blNms(vMUZ!(@X(b^Bw5Y zCuAo5$LT5>*4lmLJ0rSu3vPMRg}F}gRtDUVCl?Y==q8w0=R_Mwd4+p4PO9va>11L7 zS+3a8&48GHS*~XiRU`WvU)64*GIutBKRow&JEptyANo{}=;xF_=-C8K-(Tu+;7OC{ z(g%=aO7#BO1(lFP z7r#RI(ekUOo!WW*Ib8i)(sEP?`KJLqT=+ptWA?LODT65;9fSNP@2=#GaeSA2+{42B z$Rk4YjCFhTwBQpZT!W2%tqsDNfp1gf%(C+(@)j_Rh5I{1OX`s6_Xt@2BAR}`uZd0iO95BcUE%uE zV%K>p2bG7qw}#>|(Pz{M`{*h-ESe?rAhBjDWEKD*A*I}QJvJuuIaA2s-@GG^ zCW0puahn7%N&Yl?>1qVLOl!o{^Gq*FAn?F2rkLdIIVPSmg~)9bM?~!UCRMzR;el!= zKJw^7>WVR&O4+Yid=XicueHYc9>nv98p3F<)0fAeI0*ak$XD! zvel5^A+0Di_=c;ND#wP`uB_;pS|)UCD?9Z-Gt-1;2H_S26C~E%8X8|U_B(m2b#{?T z71d4BDeFy~OJlm;xhJL?E!4_6e2atw+KVUVp(9{zUpT$)U|=ZMbTp_C)k;;qeo}A` zp3#y8L-EMRqNy2?*#@*J%gKgk@%%DIzkXDp|A<6*ym0o|!uqsIFHx-Lc{1;oh>eq5 z^gz2_DVTn}=Jz^u80@en-BO8|cMeJP>*4um+U*PLRHQAb3eAT3GHrw06)cS)G?i|0 z4A&@TfhIVSi$NAQ5bDvOP~tk~*HK0suWkn|B@Pb!K1Dp<^PW8v&GbOZ;nM~qaiiF8 zP)|7Y_d)y68ha-80e$B9TRukDw{A_GIOi;jaNX}=HmGA`dhOFL@t%F;4=)H3Gf4J3)zVv6@JfcV z?SXrv10~d|;fzq34P_~iapeB3%&}-TYc@yr!_q|u;u_~K&;-Z0l_T@?A4rpkGRV~>#;ir#W-mhhT>ZgTN_@2C#z3kR~ELFuh~|3&5I!-ue0mld!0k$ z^npV&U@$gi{LRXKO<={=oKR!)CR(HgU*=0dR{%uKFfu92z3T@HqR@WLJ2IodA{D;; z0LW0{q#EC@%IO!AC>TmPMz^|v?*tTtR1boY8t4Hs>Y4fW%o9q^Msc@?e{;GzSVxbP zV%J)^i`RIPad2`amHylg8kqjPJ#Y8wsCp$=3N489IleABr-K1&V&gR@ec3Kd(&9l$qdi7NgVm|18pVK$qx5tW z?4z<>US~eNVu=f69;zTVh>be-lj$n_@9Lp4y9u_)S7*!{&-$ZWQN@QU6p&9!(4R`= zM0Tp6n6Zc18L_P~a+G&@y>D)An*5U2=B6<07^*PHj8Zvq&asTkh%x<|q^ z;R?yYA#Ux}kU#AdXMJ9JzEd(51{EyI_D$Xz{qP5DbtCQ$h=d3n+N$F;JC*UA?&7-v8*DE=?Y3Qsh9-? znQ=xZ_ns}BGX7%NBpl&7 zfF0+Y(0Dd_o_CpD%MpSUw)l`CzCg0zh27>ABtx^bl^PeI?9Bv~kv{!0KEX3Mb9qzA z#qV$t2iE@aba|OJ+97-H64dsR08c(-$rT1GJTM*X06~KJvw|JUH>9*Zi0|c0qsk|y zmMX?1ZTY6U^5$c@^jk-Cmsxv)N%Sl=zE9?IMcde$@Qq@qaJrljDF(%&pz(9Hk(XNs zHfGX3J$1+#icT!(;Su$Xy z*YW(EqIpLpoAw5=yKuuzf8`syPJCpLW}^$Q%4|_=#407iyTLS}p#a%~VTAU<%!mRb zvZETPn~{YoNW5v%#a=UW7PF_f&PUWV>vkK#7>^+dZ)IpmknA=0cK4+g=03>r?+N*; z;%+&IxXwinZrdH+Q$Fx{m^!@lFvCZ9v!Dr!k6+Uft~L${%1b`7MaS4R(!^>34g15`Md`7?s|hKbHdn!w*t7ds9+%sBJ2v>il#i%QWX97XbXy~Mr7(lj6r8~@wsNpW>K`J&q}wy{a9O`u7C&w{Uzm)cx~$&rO2rJW zA0O0c+KA0>313{~Kfj_2F+0DU%%1XQ4$16eM1qOzW;Py81omE4)mTr1FZ7Bj|Mntd zl`WParu*~{;d*)iMw&ka|oQepwO3zEzSDkf-o4mnOys51@Ry>(fu@Oi5K`qoHQ@&x38vtr+5Du zr+xE6fDS9GlIx@1%Vfp8gy01Ib*|tH3?dAemtkNZCzfr6>~sfX4*U{Z>nSB+sN_A< zOkt-H=m+#glAu2-b(9A&LsIi%M~r7FK{7WTeO6^2<13Fr&moGRaH_MLgmnNCBb zPU#zOx(Eg}uk}H)C|CW;u45z!BOFEC#i68B-uJfl5p_TyhL~AHM;EA;|FlWDC=T9W zSdfFZ4&`YAq0^d|B8Iv*myl!rUtz!a&q<*;I5z*cGueVZugcTG|7;%o+Sl? zi&kXPbb2ynl7G3!S@_$l_TaM5cRb5n$(FVCmsrhj-Fj?N!rHFnF}YH8%=O*3$0>P` zq>>UjQBKEOIhBkjah6p#@TDr{n^$p+0yPA=F*)dPN+aw8$?TQtG<0(eMk?M(qUMGQ ztLE#yVSK2w{aih$_}Ya4QkXsD4U5%Q4$pa?qW(c`4k4cd0(L;$DP!5q%w!#wtJ;DS z!+>wPrAsk9QB%%svod*loiq4nBfu&6a+uFa8{FIqWv$3v9R{%=|eL#ySuD-jS z9qsORYSJu|Cz@<;~3@8ggo` zKeFC*i4qbPfpnZ_WgHr@hFOc1`B!3WwJJ|PKTS+@Z)Gi9ceCg3otoE@T95l#{DCMv zW|D8B-j|vA_+32w!sdWyL}8{OnSf2PQ5!v-at7`Iqo=rwqN-Q={&d^ed0$dhdeShC zS&h_OY_Yl7+_kd-Y5a8Q)aBdo-s&Fv{iI@7a&!SMiy%tYE53+nQ-q1`G|F7b(v@(Z z%RG0eA46l>8N+%EHj>O=3`+tsqW8JN!`~qDnI{jx5napTNxpk905jeGo}wG;0Yw#M zPGXI_pZP({m}I1^jPi?JiE!n*rz$HzcL|Cd56ze0`zY&{a5g zfh+F)aNR9fC4LAtNs?!PU1wt%dbN+{I>0;(+qWL}V&*uvK0D~It_jNy_5~?b?_hG_ z=`S)+HpgnB-nnf!iF3gCPAxWB#hkn6A>~`UHT0g|IJZ`=9E%X||9F1PD<_UlS>fKm zBYvMuF}tMKV7g2MQ5cT>oL#mC@1UO_sg%E(Il&!Bnz(p;*p!E|S2yw(P22^5OGDE` zl<%sCt@?4)+mxI-`IMO;RWkega5HBE_1kq12`%<%_BCA10r<^~N46t9?K6&JY? z&BbAiQYS>}5Q4GhS{?q=*{9HhisLLWGS^Nr_%*KDnmE2~06=QHc{Qq&y) z$QP6d-@gnaxjv$hnCM8-HTIK|Chkz)KwPBCVl^pgcvSR4x|aM`s~JlX#D;bcl6#(VX*>zX$7eZK2cLrFoMtlB;j z9%}SJ30Z+X-okF2lUJS%J#a)5O_bDTmRJzS=g5xUmD5bG8UH3q&7!e-;aJ2`=A(Z5 z_UImKgqeTNRXznr6vh|q4(3C8XWIHihY|83?$q4DJjr`!)WSVys4$2KyMQm^R!Lu3 zmA9`uR3a)j^xHiNy)`G3S3S;}6@&AmbAt+BlkKmR7G1mMXkpP7N#Sq0?|{oa{U`4x zVz-Abr!A6_EV%GDKrrDY}9J?3>kd(xVb4WvLsI2)r zeI`=U8m#3h$8I>QhNTQhTY0ih&6&}3lRW~fKIn`y;M`M#<4T0N>G*Gk8ZCUTI>}F^ ze3w>Fn~HDn93Vq^sGlJGCE7Ufc!Vso=&%?K<$;8#!)f$(ud4DicVd!D6X8Aeze93R zpN+IQ=*l||aXJy?YhXKn{X+YtV z9A9_QlGnkiJfE2dFJ5dB`#jA+&E`UZ!P)R&{(RU@vQp#wP~aJtu+-j|Y)ba!a_a4$ zfR3jCj_^9tVNKJ5z!(DXIgY+KAqtamXL3HlA*qq$V+*;#uDGdwd*WOx@tEO3%ct`1 zpGLlJhMUg50fD8Ew@PTszlJibHE+vagcR~Awm;)D?g9X}Rp)fw@M!mKAAG3rh5y0_ z57UdCu_VMXNLOqT*!t%Lm4KwXx%J*G?{$j)IDbuBklEj=Uxq;=}cv)5L?nr4zp-jWg#<_T#eS z=wCg@pbK8k#<86_sn3L{k}*3CI<_(KdD^Xkryfw*5T8G{?s9V_l{!Yjonm~)0Ef+& zwH>I$2sYkm8uy{F6ALXDo2Yw6la>}>`)2c3-qw^ZJ9xDeF9k&I6owfT)m0!*<81ly z4zoJk&l9AP(HCFAw_pyAo7~RKUUN=R<$nd4H~P@hHMN%a({nPlq1#n43hfA5m{9@q zLzydZ^|Gw5#+%UgMTE)a$JN@~WeX9T&>{x~<^`&EzdUSc%y)FVj5H?3&k6+?w;B1( zS7Ua|%j&O;C252>#o=HYd786y@?OwM;<$hQyQ7pJY23x?#Jj`XNML z_qv~6y5{M-nhB*8|7}yhb$p=2eQ)xd*=pNfk!<0WN%En}DsTsfFj=898O8Re?b*Fq8$j=I_RZn&r20e@WwwPo__FzpQn+)BwqDpN6`D_xx208 zaNXdP|Jd8pw354VM=N2OMW{!!-(Jg!97I+e#N6zcsda0#*M#&|DQfdA0iWQ~v5xFR z$8cE`Be(M5;lc(Wr2d4BBdQXCeNCGg?M!S|{>av2(Pfo3I4|446OHsAjUm2gjkq5L zP?`ZMQs2>&c+^zLqs~&NFqDkyt@-9QZozmi@OtMu%S9{%jANH*XEu86pG2334Am%| zhGJlJQbrKrTCi%}zpxxhwiJEjeeI>;B>qJ7C}vuOKGqXU3tuNIdtt8>wtv9DF359xQ*uHu>=goov+i|9H?75KP-j`=h)8WIJK(3O=0K5G;-61>EypBFxRB4(2B_qU`WVR z$}h;AXhAmCj&ggSLN@m?N8O|Y@K&ZBw__O`gg&ncounwY2Y8+7%7rNY^aeiuXXB&m zDne)_#AFkOx&q0W3GrzKUcPt$BqN{{Lw&DzGj+soKJDw2lnVhp#ycen_rdX>WiKMr z%yYPi25NN})ETmPBch9TM|1)M5(d3~*9DAaPPpO`d>!Ty#x)-)o&yi$K|{GcDp($_ z^MnjAUM>upZ|?9q>(T+rY%PODx|I@aSTZ02BDD&_E!<%h3sGjS$UhTzz!LE3E7vK$ zT|bU?sABXK@T~&aOcc1#Z0^5R-Z{vQLaBqQ@kZI(MFL$+io^bXW-^nTcA*A2{?{5r zz@oMC`jwJw5O%<_nATs9FHeQMoZo^pVq#{j?Y7B9wE^Yuk?!`NiWY0!!v@Mi^W5)_ zImx!wVVvjC(YX?%@OQdTioE`P{nj${tl}9#q5rn(=a}fy^Tvg*dAJG5RLB5IAjIMw zV>Ty}phbGc1*|FVRZ`jveh3|T3rBb4gv`}vVEU<9y`q>7Q+mYuC+^3Nr*=#qKvVNO zJJ-vOU#pSA`gEPNAyqt`LW`#Z0AEae#%)UD8KXN?rhoxU2Ch$42zc&KHlKH1KV1w@ z;H`?V$b)qo>;=yzN7gioX;H;8-?sTi`fW8jq|8Lust7z4G@)yl5=)^qnoIy@HLrN2DJ=Q&D5c@M3hILBUV47Y-GJ{0E>cu|}KI zs&Dn!ZfWbKxoM%4r48meU4Jr!9v7YNn}x*erb`{Jm%alNW2tQPJXGl2yAA6t zuQ@}2ZZSD`6>#igy1$D|tEM9ZzB26tix>?uPnI7(b8!u4TlqH<)^}*s`EUA^L`fM0 zmvr|$U0#3Imp)0K{~OKEYjSz8m-}y&HZFSWF91Y?`ddD|otPFcEhT)3bvO}Lvs|y6 z^NeZls)3n%o&^#De>?=F={^pQey-;3+A!AI-0$!(q>&BO_JsXsq>NQSE|V~Lz_{yl zuJy8tBR+Sh?IeJmqTQc9+|<{%;j=OVp-uzB+n28@<74MOh$3&idCj}Ir2x)+E<+)r zjZxu4tjJ2G_WKiPTIXYicJl5M_YfuuDg-i@;{E6r()4t2h8mkC$UmRT`UXz1;xV=> zBe|I7^k>V@FYb{)_b_-x9a#*Dq~zJ8TP9Fat{KSND%NJay~#{D=n{31wVSPoww5HDoRXJH0g219#l zv$Uf#BG9~N0VNb#lw`%Wbab;sX}v%b$*8*U`_b`3_%ZTCC!)sd#y&A+2nVeB>;Wo4 zIZkDn&f|4{w}iR&YvANFWE^18w5wt+BIP-sLRT>CcPrOzC)8B7CGt~yykGFza1YB1 z^CZ4W2TW~3qGIYmLU~pHt!SkG?aBPhq4P_9(pBR#kJ3BwJ9balPM$4UL>Z((Zd05q z0i`T@yIH&i2t$%;ISUTQy!yOEw$?S&`vob7+b_Kh@cai0#UB+;x$AtQ?s9e$qlVs_ znfWU5^+k%jUg9|_$QZ?6K9j)%tYNMwze}RX@KV9qd*eBKI}?FCy`NMSU!t6N@ulOODwXhHaj7+aop=Fa0;b;9 z+`8x6?;%AGP9f)oE4+l6D^7>C+zrN~TMuRd8r+rc2bT?n2p1#YFMMdQq6o+Z`Z+^H z=p*y0yre|$Ex*nfJg-e3HJZ_zD@iW@kuA=SWbX%6>QMOPq&5_#T`fEWEa$>}Yd4|>_YlZtw$zTa2TE$x?cMzNy*Vr zQySc*cpimyW5=86_;l-thg*eR>UB={+`qFjURlhtZuVTU6cdv z)%E2f&|2edHPvhmeF&zB@K+WCYRWz5zDz<>*@`(ZC|wd980p=Pj*v~$_F3mji$40p z@di~9jAxmyxHQwp!I+3}flf8xFwgfeoW51jy|LJoHq1F>TNsn$<N(Ri6Q!|D7DURXN)xq|mt!G%R75@@Scc)dJOo?7A6UMgohWC{Lx0nd!E~Eb8Bj`M`0EEP<)FMCQ54YK*_JF>D;0X?5*)pZ=Ca-S0Tkz zDi$hw%FzIu#_EhM=vpKt&LAhWDPhk*su^ikOvoP;8tb*2u}rGVvW zm@Cj2MkA?E%|qbF410k%XZ7-IN4Tqx9_>+27$aIs(3<|+qTT4;ri`x_r|fEcs}^ON zvj`Y;^3*>YP15wHtKI^kUW|0@8{C&2Ux5r9K@WVhs95sMvsfcM?hc=*Z|qbq{SFfD z_cqkBWYCLC9)rA29;`wf7DUqK|J~T&5n=aa1wEFKYAIAg@g`R^)-b#uxKNwtX!M(E zH^8rYMqeZ|*vafqg$!~%0qMY|%qQxK;A{fJGsIm$jP4Qukc8RF?ISMnr3dU=fkS-G z|L8du^plB|vunF%ZQKX$>M6x~0Zbze53)|yLkKlP{HGdczcJ%38t$xk$*5p|{L&9( z!3o|n`EEgTH{T1r+er>n_a+M#5<3!sqTCNB854uh=y z{HoopoyOggX56D^E5;&jAcz=^1(2Ul2JVghWxuO=mo{j3X7vFr`|l5Vk?q;&5Yz~M zsrFJ$O4#ooIgOow2nZyJ*};&!3=@b{4V80pc8g-q5@<(ZYW$ZI(CO)Fx*0CB*Itwf zcXC|SGEFdy#mhs0CBvyw6TYg|iTNIkFwVikEjH zqQ81vZsFr4aQtzu5^0|Sct%(J$LY%TZ-s@UDmomBNRf;*v1q56mVz`m3>g2|UmgY2 zPqRCdHsuO(V*x)|+9J2eMS+<%*@{q(m!g(QiP;Ow%%Avv;LM(eUlwC7p z_eaq{{@n1J%hUS?r?JGz2z!j`FHwL%;82SN)?uK5U?_2>H69>mVOl!-4td{;^`Wf(}~( zQ4Vvy+%Zj0`2`&mNG{6WaPZmAv%&bW+v`S`paOsT3)u|wpWWrRhe)MXI_ca7AB@p# z%MgAR=}M_tAvi|h!Nuy0adpkpQWRs|>6T+M4-G(ZPFwAs9X0NQ5{BO3ZFSS5h0(}j z!Pe^=$hcXUpIaGvg$eaSrdCDIh8x%<&9n8IG*oHsV4=}BeA+3K1PjYkM1sj)>@9=M{2>B4s6ZN6vv|(-TBQPq-GOzrQ#2V7ic(5}GWtIwE z8rqeTKw$4X^!A2tMV}lJF;BFB=n`fnoy`r%RAD8SFG?Tut>t|#p!94O%f$r>7f5JZ zB;i;D9?WFYe9IW#M;osVOIbN8pz^5vT!#`x>&-ZFc;(kS9WSO4{fi<!A$<4jR9@Qq}XlIPhvY z9eyoS+y3%>xmTHd)lmm6kXqlPFFDUj1YS=o^T#DCFQq462gI{>XTgtP;W)#Zq<@P1 z+uWoMSAf-D@_%+>gFzB_)_xyG@UF@x!9F(S>k5(ZAm&(5OIfV35+(_iVwXIg#t%(~ zA62`aV9-uMNix=Bvv1hxB)43bv|Bzil7{VpbkoT$0X|KU^UG5K8@!6m!Gu36wCAo= zpVTKSlFfK*J#46lm*fMD@A~~6D3$occ}{)|5t>*H0AOalKGeI^+1g5m*2B+bI^xpR`btuZ5R4j#shPVIDm~14mtIZrt))8cydM^iVUDv z_wxs>YSiJ@Ly7#UTjc1GX)1bq=#I%lNWML7HYJ)yBxf7rJMZ859;_4WUik+sfj}TC zX9-PbQzK_HUK2+%-~+_U%)-sc%*x2hqRPz8%gn>e&d&Jn3p1F~V)p-ez}C*x%G~|` z|A4#uh9&R-{=a`vu`_peGjcQoiJ00Mn~}=c8d;brn;Dt9JB*m|13v=ENGOO`i5h& samples, string* output_data); -// Reads an video file using ffmpeg adn converts it into a RGB24 in uint8 +// Reads an video file using ffmpeg and converts it into a RGB24 in uint8 // [frames, height, width, 3]. The w, h, and frames are obtained from ffmpeg. Status ReadVideoFile(const string& filename, std::vector* output_data, uint32* width, uint32* height, uint32* frames); diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index bd764ed57a6da0..72835c3ad86e63 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -202,7 +202,7 @@ def execute(self, fn, *args, **kwargs): or lazy way that may cause a deadlock. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same - resources as in `*args`, `**kwargs`, and any additionaly captured + resources as in `*args`, `**kwargs`, and any additionally captured inputs in `fn`. Note, even if `exclusive_resource_access` is `True`, if another execution in another `CriticalSection` was created without `exclusive_resource_access=True`, a `ValueError` will be raised. diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index e3fc6bf0f03405..4092b320042162 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -112,6 +112,7 @@ def __init__(self, generator_optimizer=None, discriminator_optimizer=None, get_hooks_fn=None, + get_eval_metric_ops_fn=None, add_summaries=None, use_loss_summaries=True, config=None): @@ -146,6 +147,9 @@ def __init__(self, list of hooks. These hooks are run on the generator and discriminator train ops, and can be used to implement the GAN training scheme. Defaults to `train.get_sequential_train_hooks()`. + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. add_summaries: `None`, a single `SummaryType`, or a list of `SummaryType`. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. If `None`, uses defaults. @@ -160,7 +164,8 @@ def _model_fn(features, labels, mode): else discriminator_optimizer) gan_head = head_lib.gan_head( generator_loss_fn, discriminator_loss_fn, gopt, dopt, - use_loss_summaries, get_hooks_fn=get_hooks_fn) + use_loss_summaries, get_hooks_fn=get_hooks_fn, + get_eval_metric_ops_fn=get_eval_metric_ops_fn) return _gan_model_fn( features, labels, mode, generator_fn, discriminator_fn, gan_head, add_summaries) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py index 387a62bd741bd4..955482599b372b 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py @@ -38,6 +38,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache @@ -194,6 +195,12 @@ def make_opt(): lr = learning_rate_decay.exponential_decay(1.0, gstep, 10, 0.9) return training.GradientDescentOptimizer(lr) + def get_metrics(gan_model): + return { + 'mse_custom_metric': metrics_lib.mean_squared_error( + gan_model.real_data, gan_model.generated_data) + } + gopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0) dopt = make_opt if lr_decay else training.GradientDescentOptimizer(1.0) est = estimator.GANEstimator( @@ -203,6 +210,7 @@ def make_opt(): discriminator_loss_fn=losses.wasserstein_discriminator_loss, generator_optimizer=gopt, discriminator_optimizer=dopt, + get_eval_metric_ops_fn=get_metrics, model_dir=self._model_dir) # TRAIN @@ -213,6 +221,9 @@ def make_opt(): scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) + self.assertEqual(scores['discriminator_loss'] + scores['generator_loss'], + scores['loss']) + self.assertIn('mse_custom_metric', six.iterkeys(scores)) # PREDICT predictions = np.array([x for x in est.predict(predict_input_fn)]) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index a21358c50bbdb4..ff903a78cc36c1 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -25,17 +25,21 @@ from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.canned import head from tensorflow.python.framework import ops +from tensorflow.python.ops import metrics as metrics_lib __all__ = [ 'GANHead', 'gan_head', ] +def _summary_key(head_name, val): + return '%s/%s' % (val, head_name) if head_name else val + def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer, use_loss_summaries=True, get_hooks_fn=tfgan_train.get_sequential_train_hooks(), - name=None): + get_eval_metric_ops_fn=None, name=None): """Creates a `GANHead`. Args: @@ -47,9 +51,12 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer: Same as `generator_optimizer`, but for the discriminator updates. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. - If `None`, uses defaults. - get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list - of hooks. + If `None`, uses defaults. + get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a + list of hooks. + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. @@ -62,6 +69,7 @@ def gan_head(generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer=discriminator_optimizer, use_loss_summaries=use_loss_summaries, get_hooks_fn=get_hooks_fn, + get_eval_metric_ops_fn=get_eval_metric_ops_fn, name=name) @@ -72,6 +80,7 @@ def __init__(self, generator_loss_fn, discriminator_loss_fn, generator_optimizer, discriminator_optimizer, use_loss_summaries=True, get_hooks_fn=None, + get_eval_metric_ops_fn=None, name=None): """`Head` for GAN training. @@ -85,8 +94,11 @@ def __init__(self, generator_loss_fn, discriminator_loss_fn, discriminator updates. use_loss_summaries: If `True`, add loss summaries. If `False`, does not. If `None`, uses defaults. - get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list - of hooks. Defaults to `train.get_sequential_train_hooks()` + get_hooks_fn: A function that takes a `GANTrainOps` tuple and returns a + list of hooks. Defaults to `train.get_sequential_train_hooks()` + get_eval_metric_ops_fn: A function that takes a `GANModel`, and returns a + dict of metric results keyed by name. The output of this function is + passed into `tf.estimator.EstimatorSpec` during evaluation. name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. """ @@ -104,6 +116,8 @@ def __init__(self, generator_loss_fn, discriminator_loss_fn, self._generator_optimizer = generator_optimizer self._discriminator_optimizer = discriminator_optimizer self._get_hooks_fn = get_hooks_fn + self._get_eval_metric_ops_fn = get_eval_metric_ops_fn + self._name = name @property def name(self): @@ -173,13 +187,26 @@ def create_estimator_spec( gan_loss = self.create_loss( features=None, mode=mode, logits=gan_model, labels=None) scalar_loss = gan_loss.generator_loss + gan_loss.discriminator_loss + with ops.name_scope(None, 'metrics', + [gan_loss.generator_loss, + gan_loss.discriminator_loss]): + eval_metric_ops = { + _summary_key(self._name, 'generator_loss'): + metrics_lib.mean(gan_loss.generator_loss), + _summary_key(self._name, 'discriminator_loss'): + metrics_lib.mean(gan_loss.discriminator_loss) + } + if self._get_eval_metric_ops_fn is not None: + custom_eval_metric_ops = self._get_eval_metric_ops_fn(gan_model) + if not isinstance(custom_eval_metric_ops, dict): + raise TypeError('get_eval_metric_ops_fn must return a dict, ' + 'received: {}'.format(custom_eval_metric_ops)) + eval_metric_ops.update(custom_eval_metric_ops) return model_fn_lib.EstimatorSpec( mode=model_fn_lib.ModeKeys.EVAL, predictions=gan_model.generated_data, loss=scalar_loss, - # TODO(joelshor): Add metrics. If head name provided, append it to - # metric keys. - eval_metric_ops={}) + eval_metric_ops=eval_metric_ops) elif mode == model_fn_lib.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None.') diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py index 8168f005cd1105..6587f1fc600b94 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py @@ -62,9 +62,14 @@ def setUp(self): generator_loss_fn=dummy_loss, discriminator_loss_fn=dummy_loss, generator_optimizer=training.GradientDescentOptimizer(1.0), - discriminator_optimizer=training.GradientDescentOptimizer(1.0)) + discriminator_optimizer=training.GradientDescentOptimizer(1.0), + get_eval_metric_ops_fn=self.get_metrics) self.assertTrue(isinstance(self.gan_head, head.GANHead)) + def get_metrics(self, gan_model): + self.assertTrue(isinstance(gan_model, tfgan_tuples.GANModel)) + return {} + def _test_modes_helper(self, mode): self.gan_head.create_estimator_spec( features=None, diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py index df71187fbd98c8..a9b8faa7126253 100644 --- a/tensorflow/contrib/gan/python/features/python/conditioning_utils.py +++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Miscellanous utilities for TFGAN code and examples.""" +"""Miscellaneous utilities for TFGAN code and examples.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index a320a3f232fc1d..592d37b432ee60 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -677,7 +677,7 @@ def replace_t_with_replacement_handler(info, t): def _add_control_flow_ops(ops, control_ios): - """Complete `ops` so that the tranformed graph is valid. + """Complete `ops` so that the transformed graph is valid. Partially copying a graph can lead to a malformed graph. For instance, copying half of a while construct is likely to result in an invalid graph. diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc index 60281951dda940..66939fbb0f0d3b 100644 --- a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc +++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc @@ -115,7 +115,7 @@ static void CheckOpsSupport(const GraphDef& graph_def, HexagonOpsDefinitions::getInstance(); LOG(INFO) << "Checking " << graph_def.node_size() << " nodes"; LOG(INFO) << "dump_all_nodes = " << dump_all_nodes - << ", dump_shape_and_tpye = " << dump_shape_and_type; + << ", dump_shape_and_type = " << dump_shape_and_type; std::unordered_set unsupported_ops; bool all_supported = true; diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index 8f406ace1d5dcc..f230d93da4a9c0 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -17,7 +17,7 @@ ### API This module provides functions for image manipulation; currently, chrominance -transformas (including changing saturation and hue) in YIQ space and +transforms (including changing saturation and hue) in YIQ space and projective transforms (including rotation) are supported. ## Image Transformation `Ops` diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py index b261f41bf97db1..d6b1a61b716ab7 100644 --- a/tensorflow/contrib/kfac/examples/convnet.py +++ b/tensorflow/contrib/kfac/examples/convnet.py @@ -325,7 +325,7 @@ def distributed_grads_only_and_ops_chief_worker( All workers perform gradient computation. Chief worker applies gradient after averaging the gradients obtained from all the workers. All workers block - execution untill the update is applied. Chief worker runs covariance and + execution until the update is applied. Chief worker runs covariance and inverse update ops. Covariance and inverse matrices are placed on parameter servers in a round robin manner. For further details on synchronous distributed optimization check `tf.train.SyncReplicasOptimizer`. diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index 45a760c9f1013d..b7f63d8d94a7a4 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -66,7 +66,7 @@ def __init__(self, the local approximation with the Fisher information matrix, and to regularize the update direction by making it closer to the gradient. If damping is adapted during training then this value is used for - initializing damping varaible. + initializing damping variable. (Higher damping means the update looks more like a standard gradient update - see Tikhonov regularization.) layer_collection: The layer collection object, which holds the fisher @@ -114,7 +114,7 @@ def __init__(self, self._estimation_mode = estimation_mode self._colocate_gradients_with_ops = colocate_gradients_with_ops - # The below paramaters are required only if damping needs to be adapated. + # The below parameters are required only if damping needs to be adapated. # These parameters can be set by calling # set_damping_adaptation_params() explicitly. self._damping_adaptation_decay = 0.95 @@ -195,7 +195,7 @@ def set_damping_adaptation_params(self, min_damping: `float`(Optional), Minimum value the damping parameter can take. Default value 1e-5. damping_adaptation_decay: `float`(Optional), The `damping` parameter is - multipled by the `damping_adaptation_decay` every + multiplied by the `damping_adaptation_decay` every `damping_adaptation_interval` number of iterations. Default value 0.99. damping_adaptation_interval: `int`(Optional), Number of steps in between updating the `damping` parameter. Default value 5. diff --git a/tensorflow/contrib/kfac/python/ops/placement.py b/tensorflow/contrib/kfac/python/ops/placement.py index 8a20ebe19844e6..c4454325aebe13 100644 --- a/tensorflow/contrib/kfac/python/ops/placement.py +++ b/tensorflow/contrib/kfac/python/ops/placement.py @@ -51,7 +51,7 @@ def __init__(self, cov_devices=None, inv_devices=None, **kwargs): self._inv_devices = inv_devices def make_vars_and_create_op_thunks(self, scope=None): - """Make vars and create op thunks w/ a round-robin device placement strat. + """Make vars and create op thunks w/ a round-robin device placement start. For each factor, all of that factor's cov variables and their associated update ops will be placed on a particular device. A new device is chosen diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index f708da66937ed7..b7194ae3330450 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -932,7 +932,8 @@ def convolution(inputs, variables_collections=None, outputs_collections=None, trainable=True, - scope=None): + scope=None, + conv_dims=None): """Adds an N-D convolution followed by an optional batch_norm layer. It is required that 1 <= N <= 3. @@ -993,6 +994,10 @@ def convolution(inputs, trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for `variable_scope`. + conv_dims: Optional convolution dimensionality, when set it would use the + corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When + leaved to None it would select the convolution dimensionality based on + the input rank (i.e. Conv ND, with N = input_rank - 2). Returns: A tensor representing the output of the operation. @@ -1015,6 +1020,9 @@ def convolution(inputs, inputs = ops.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims + if conv_dims is not None and conv_dims + 2 != input_rank: + raise ValueError('Convolution expects input with rank %d, got %d' % + (conv_dims + 2, input_rank)) if input_rank == 3: layer_class = convolutional_layers.Convolution1D elif input_rank == 4: @@ -1061,10 +1069,134 @@ def convolution(inputs, outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs) +@add_arg_scope +def convolution1d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=1) + +convolution1d.__doc__ = convolution.__doc__ -convolution2d = convolution -convolution3d = convolution +@add_arg_scope +def convolution2d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=2) + +convolution2d.__doc__ = convolution.__doc__ +@add_arg_scope +def convolution3d(inputs, + num_outputs, + kernel_size, + stride=1, + padding='SAME', + data_format=None, + rate=1, + activation_fn=nn.relu, + normalizer_fn=None, + normalizer_params=None, + weights_initializer=initializers.xavier_initializer(), + weights_regularizer=None, + biases_initializer=init_ops.zeros_initializer(), + biases_regularizer=None, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + scope=None): + return convolution(inputs, + num_outputs, + kernel_size, + stride, + padding, + data_format, + rate, + activation_fn, + normalizer_fn, + normalizer_params, + weights_initializer, + weights_regularizer, + biases_initializer, + biases_regularizer, + reuse, + variables_collections, + outputs_collections, + trainable, + scope, + conv_dims=3) + +convolution3d.__doc__ = convolution.__doc__ @add_arg_scope def convolution2d_in_plane( @@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None): Args: tensor: An `int` `Tensor` to be converted to a `Sparse`. eos_token: An integer. - It is part of the target label that signfies the end of a sentence. + It is part of the target label that signifies the end of a sentence. outputs_collections: Collection to add the outputs. scope: Optional scope for name_scope. """ @@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None): output_collections: Collection to which the outputs will be added. scope: Optional scope for `name_scope`. Returns: - A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but + A `Tensor` or `SparseTensor` containing the same values as `inputs`, but with innermost dimensions flattened to obtain rank `new_rank`. Raises: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 997f910a2a9756..b01fd5d5c95ac1 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -310,6 +310,17 @@ def testCreateDimensions(self): class ConvolutionTest(test.TestCase): + def testInvalidShape(self): + with self.test_session(): + images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 5, got 4'): + layers_lib.convolution3d(images_2d, 32, 3) + images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1) + with self.assertRaisesRegexp( + ValueError, 'Convolution expects input with rank 4, got 5'): + layers_lib.convolution2d(images_3d, 32, 3) + def testInvalidDataFormat(self): height, width = 7, 9 with self.test_session(): @@ -3155,7 +3166,7 @@ def testRepeat(self): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32) output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3]) - self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu') + self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32]) def testRepeatWithScope(self): @@ -3749,7 +3760,7 @@ def testStackConvolution2d(self): layers_lib.convolution2d, [10, 20, 30], kernel_size=[3, 3], padding='SAME') - self.assertEqual(output.op.name, 'Stack/convolution_3/Relu') + self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu') self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30]) def testStackWithScope(self): diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index c7cdb4131215c3..f8106d1e4a7e79 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -343,7 +343,8 @@ def get_temp_export_dir(timestamped_export_dir): """ (dirname, basename) = os.path.split(timestamped_export_dir) temp_export_dir = os.path.join( - compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename))) + compat.as_bytes(dirname), + compat.as_bytes('temp-{}'.format(compat.as_text(basename)))) return temp_export_dir diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 01c76b7a66e93a..55b984f260ec49 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -6,8 +6,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops") -exports_files(["LICENSE"]) - exports_files(glob([ "testdata/*.bin", "testdata/*.pb", diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 1053cce3857420..cc8a8035d1dade 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -1,4 +1,3 @@ - # Find where we're running from, so we can store generated files here. ifeq ($(origin MAKEFILE_DIR), undefined) MAKEFILE_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) @@ -69,12 +68,12 @@ LIB_NAME := libtensorflow-lite.a LIB_PATH := $(LIBDIR)$(LIB_NAME) # A small example program that shows how to link against the library. -BENCHMARK_PATH := $(BINDIR)benchmark_model +MINIMAL_PATH := $(BINDIR)minimal -BENCHMARK_SRCS := \ -tensorflow/contrib/lite/tools/benchmark_model.cc -BENCHMARK_OBJS := $(addprefix $(OBJDIR), \ -$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(BENCHMARK_SRCS)))) +MINIMAL_SRCS := \ +tensorflow/contrib/lite/examples/minimal/minimal.cc +MINIMAL_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MINIMAL_SRCS)))) # What sources we want to compile, must be kept in sync with the main Bazel # build files. @@ -100,7 +99,7 @@ $(wildcard tensorflow/contrib/lite/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/*/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/*/*/*/*test.cc) \ $(wildcard tensorflow/contrib/lite/kernels/test_util.cc) \ -$(BENCHMARK_SRCS) +$(MINIMAL_SRCS) # Filter out all the excluded files. TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # File names of the intermediate files target compilation generates. @@ -119,17 +118,17 @@ $(OBJDIR)%.o: %.c $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ # The target that's compiled if there's no command-line arguments. -all: $(LIB_PATH) $(BENCHMARK_PATH) +all: $(LIB_PATH) $(MINIMAL_PATH) # Gathers together all the objects we've compiled into a single '.a' archive. $(LIB_PATH): $(LIB_OBJS) @mkdir -p $(dir $@) $(AR) $(ARFLAGS) $(LIB_PATH) $(LIB_OBJS) -$(BENCHMARK_PATH): $(BENCHMARK_OBJS) $(LIB_PATH) +$(MINIMAL_PATH): $(MINIMAL_OBJS) $(LIB_PATH) @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) \ - -o $(BENCHMARK_PATH) $(BENCHMARK_OBJS) \ + -o $(MINIMAL_PATH) $(MINIMAL_OBJS) \ $(LIBFLAGS) $(LIB_PATH) $(LDFLAGS) $(LIBS) # Gets rid of all generated files. diff --git a/tensorflow/contrib/lite/examples/minimal/minimal.cc b/tensorflow/contrib/lite/examples/minimal/minimal.cc new file mode 100644 index 00000000000000..106e3b027055b6 --- /dev/null +++ b/tensorflow/contrib/lite/examples/minimal/minimal.cc @@ -0,0 +1,71 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include + +// This is an example that is minimal to read a model +// from disk and perform inference. There is no data being loaded +// that is up to you to add as a user. +// +// NOTE: Do not add any dependencies to this that cannot be built with +// the minimal makefile. This example must remain trivial to build with +// the minimal build tool. +// +// Usage: minimal + +using namespace tflite; + +#define TFLITE_MINIMAL_CHECK(x) \ + if(!(x)) { \ + fprintf(stderr, "Error at %s:%d\n", __FILE__, __LINE__); \ + exit(1); \ + } + + +int main(int argc, char *argv[]) { + if(argc != 2) { + fprintf(stderr, "Usage: %s \n"); + return 1; + } + const char* filename = argv[1]; + + // Load model + std::unique_ptr model + = tflite::FlatBufferModel::BuildFromFile(filename); + TFLITE_MINIMAL_CHECK(model != nullptr); + + // Build the interpreter + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model.get(), resolver); + std::unique_ptr interpreter; + builder(&interpreter); + TFLITE_MINIMAL_CHECK(interpreter != nullptr); + + // Allocate tensor buffers. + TFLITE_MINIMAL_CHECK(interpreter->AllocateTensors() == kTfLiteOk); + + // Fill input buffers + // TODO(user): Insert code to fill input tensors + + // Run inference + TFLITE_MINIMAL_CHECK(interpreter->Invoke() == kTfLiteOk); + + // Read output buffers + // TODO(user): Insert getting data out code. + + return 0; +} diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md index 7a3a231626d0e1..ab507893074142 100644 --- a/tensorflow/contrib/lite/g3doc/rpi.md +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -32,7 +32,7 @@ This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc v Log in to you RPI, install the toolchain. ```bash -sudo apt-get instal build-essential +sudo apt-get install build-essential ``` First, clone this TensorFlow repository. Run this at the root of the repository: diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index f23b90d9dce694..d48178d608b905 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -3387,7 +3387,7 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data, const int32 output_zeropoint, const float output_scale) { // The arguments input_zeropoint and input_scale are expected to be an array - // that have the quantization paramaters for all the inputs to the concat + // that have the quantization parameters for all the inputs to the concat // operator. gemmlowp::ScopedProfilingLabel label("Concatenation"); TFLITE_DCHECK_GT(inputs_count, 1); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index f6d8d3257b8795..62d6fe0bb300cf 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1789,7 +1789,7 @@ inline void Concatenation(int concat_dim, const uint8* const* input_data, const int32 output_zeropoint, const float output_scale) { // The arguments input_zeropoint and input_scale are expected to be an array - // that have the quantization paramaters for all the inputs to the concat + // that have the quantization parameters for all the inputs to the concat // operator. TFLITE_DCHECK_GT(inputs_count, 1); int64_t concat_size = 0; @@ -1975,7 +1975,7 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, // requiring a power-of-two representation interval. Thus, we should right // away quantize this array to a power-of-two interval; otherwise, // implementation will need to rescale that, losing any benefit that a tighter -// representation interval might otherwise yield, while introducting some +// representation interval might otherwise yield, while introducing some // numerical error and computational overhead. // // Now, Logistic and Tanh diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index e5b640fcee82d2..8bdeb035f5a778 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -65,7 +65,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v0.fbs b/tensorflow/contrib/lite/schema/schema_v0.fbs index 852ea988f3ddc7..891d8366ccae35 100644 --- a/tensorflow/contrib/lite/schema/schema_v0.fbs +++ b/tensorflow/contrib/lite/schema/schema_v0.fbs @@ -48,7 +48,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v1.fbs b/tensorflow/contrib/lite/schema/schema_v1.fbs index 06cd9408edb710..b438b569e67ac5 100644 --- a/tensorflow/contrib/lite/schema/schema_v1.fbs +++ b/tensorflow/contrib/lite/schema/schema_v1.fbs @@ -53,7 +53,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v2.fbs b/tensorflow/contrib/lite/schema/schema_v2.fbs index 96731c8aaebf69..b90408ff6d09fd 100644 --- a/tensorflow/contrib/lite/schema/schema_v2.fbs +++ b/tensorflow/contrib/lite/schema/schema_v2.fbs @@ -54,7 +54,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/schema/schema_v3.fbs b/tensorflow/contrib/lite/schema/schema_v3.fbs index cedefe08f35cbb..020da38493980d 100644 --- a/tensorflow/contrib/lite/schema/schema_v3.fbs +++ b/tensorflow/contrib/lite/schema/schema_v3.fbs @@ -53,7 +53,7 @@ table Tensor { type:TensorType; // An index that refers to the buffers table at the root of the model. Or, // if there is no data buffer associated (i.e. intermediate results), then - // this is 0 (which refers to an always existant empty buffer). + // this is 0 (which refers to an always existent empty buffer). // // The data_buffer itself is an opaque container, with the assumption that the // target device is little-endian. In addition, all builtin operators assume @@ -64,7 +64,7 @@ table Tensor { quantization:QuantizationParameters; // Optional. } -// A list of builtin operators. Builtin operators a slighlty faster than custom +// A list of builtin operators. Builtin operators are slightly faster than custom // ones, but not by much. Moreover, while custom operators accept an opaque // object containing configuration parameters, builtins have a predetermined // set of acceptable options. diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 07d2b28bbe1f1d..0e036bda92e4c4 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -109,7 +109,7 @@ class ExtraTocoOptions(object): - """Additonal toco options besides input, output, shape.""" + """Additional toco options besides input, output, shape.""" def __init__(self): # Whether to ignore control dependency nodes. @@ -2016,7 +2016,7 @@ def build_graph(parameters): return inputs_after_split, [out] def build_inputs(parameters, sess, inputs, outputs): - """Feed inputs, assign vairables, and freeze graph.""" + """Feed inputs, assign variables, and freeze graph.""" with tf.variable_scope("", reuse=True): kernel = tf.get_variable("rnn/basic_lstm_cell/kernel") diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 1f07068aee2be4..8cab6cd8cdc41d 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -227,8 +227,8 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) { if (!IsValid()) return; auto* tensor = interpreter_->tensor(id); if (expected_output_.count(id) != 0) { - fprintf(stderr, "Overriden expectation for tensor %d\n", id); - Invalidate("Overriden expectation"); + fprintf(stderr, "Overridden expectation for tensor %d\n", id); + Invalidate("Overridden expectation"); } expected_output_[id].reset(new Expectation); switch (tensor->type) { diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md index 495014c6fc67ab..7680cdd344814b 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -115,7 +115,7 @@ bazel run --config=opt \ In order to evaluate the possible benefit of generating a quantized graph, TOCO allows "dummy-quantization" on float graphs. The flags `--default_ranges_min` -and `--default_ranges_max` accept plausable values for the min-max ranges of the +and `--default_ranges_max` accept plausible values for the min-max ranges of the values in all arrays that do not have min-max information. "Dummy-quantization" will produce lower accuracy but will emulate the performance of a correctly quantized model. @@ -338,7 +338,7 @@ below outline the use cases for each. ### Using `--output_format=GRAPHVIZ_DOT` The first way to get a graphviz rendering is to pass `GRAPHVIZ_DOT` into -`--output_format`. This results in a plausable visualization of the graph. This +`--output_format`. This results in a plausible visualization of the graph. This reduces the requirements that normally exist during conversion between other input and output formats. For example, this may be useful if conversion from TENSORFLOW_GRAPHDEF to TFLITE is failing. diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h index 50f0620b3cce49..5e9c20e40dd627 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.h +++ b/tensorflow/contrib/lite/toco/tflite/operator.h @@ -25,10 +25,10 @@ namespace tflite { class BaseOperator; -// Return a map contained all knwo TF Lite Operators, keyed by their names. +// Return a map contained all know TF Lite Operators, keyed by their names. std::map> BuildOperatorByNameMap(); -// Return a map contained all knwo TF Lite Operators, keyed by the type of +// Return a map contained all know TF Lite Operators, keyed by the type of // their tf.mini counterparts. std::map> BuildOperatorByTypeMap(); diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 253f022e6b3ade..8589ca361dae25 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -127,7 +127,7 @@ message TocoFlags { // transformations that are necessary in order to generate inference // code for these graphs. Such graphs should be fixed, but as a // temporary work-around, setting this reorder_across_fake_quant flag - // allows toco to perform necessary graph transformaitons on them, + // allows toco to perform necessary graph transformations on them, // at the cost of no longer faithfully matching inference and training // arithmetic. optional bool reorder_across_fake_quant = 8; diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py index 37539b959959b5..5ed8057b865cf4 100644 --- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py @@ -58,7 +58,7 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"): # Creates the workers and return their sessions, graphs, train_ops. -# Cheif worker will update at last +# Chief worker will update at last def _get_workers(num_workers, period, workers, moving_rate): sessions = [] graphs = [] diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py index 6cca0a8a009456..3acd9402684fa2 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py @@ -57,7 +57,7 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"): # Creates the workers and return their sessions, graphs, train_ops. -# Cheif worker will update at last +# Chief worker will update at last def _get_workers(num_workers, steps, workers): sessions = [] graphs = [] @@ -146,7 +146,7 @@ def test1Workers2Period(self): self.assertAllEqual(1.0, sessions[0].run(global_var_1)) self.assertAllEqual(0, sessions[0].run(global_step)) - # iteration 2, global varibale update + # iteration 2, global variable update thread_0 = self.checkedThread( target=self._run, args=(train_ops[0], sessions[0])) thread_1 = self.checkedThread( diff --git a/tensorflow/contrib/signal/python/ops/window_ops.py b/tensorflow/contrib/signal/python/ops/window_ops.py index 50094010dc75cf..59e67e8ba414df 100644 --- a/tensorflow/contrib/signal/python/ops/window_ops.py +++ b/tensorflow/contrib/signal/python/ops/window_ops.py @@ -47,7 +47,7 @@ def hann_window(window_length, periodic=True, dtype=dtypes.float32, name=None): Raises: ValueError: If `dtype` is not a floating point type. - [hann]: https://en.wikipedia.org/wiki/Window_function#Hann_window + [hann]: https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows """ return _raised_cosine_window(name, 'hann_window', window_length, periodic, dtype, 0.5, 0.5) @@ -72,7 +72,7 @@ def hamming_window(window_length, periodic=True, dtype=dtypes.float32, Raises: ValueError: If `dtype` is not a floating point type. - [hamming]: https://en.wikipedia.org/wiki/Window_function#Hamming_window + [hamming]: https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows """ return _raised_cosine_window(name, 'hamming_window', window_length, periodic, dtype, 0.54, 0.46) diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index f2d31dc8db5688..d877831fce99a3 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -102,7 +102,7 @@ class BoundingBox(ItemHandler): """An ItemHandler that concatenates a set of parsed Tensors to Bounding Boxes. """ - def __init__(self, keys=None, prefix=None): + def __init__(self, keys=None, prefix=''): """Initialize the bounding box handler. Args: diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py index 8a2c74742a8ebb..6e55b9407bce5c 100644 --- a/tensorflow/contrib/slim/python/slim/learning.py +++ b/tensorflow/contrib/slim/python/slim/learning.py @@ -571,7 +571,7 @@ def train(train_op, default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss - and global step and logged. + and global step are logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index d5d8e4100f11ca..cfdc884277a025 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -1080,14 +1080,20 @@ class SummaryDbWriter : public SummaryWriterInterface { // See tensorboard/plugins/histogram/summary.py and data_compat.py Tensor t{DT_DOUBLE, {k, 3}}; auto data = t.flat(); - for (int i = 0; i < k; ++i) { - double left_edge = ((i - 1 >= 0) ? histo.bucket_limit(i - 1) - : std::numeric_limits::min()); - double right_edge = ((i + 1 < k) ? histo.bucket_limit(i + 1) - : std::numeric_limits::max()); - data(i + 0) = left_edge; - data(i + 1) = right_edge; - data(i + 2) = histo.bucket(i); + for (int i = 0, j = 0; i < k; ++i) { + // TODO(nickfelt): reconcile with TensorBoard's data_compat.py + // From summary.proto + // Parallel arrays encoding the bucket boundaries and the bucket values. + // bucket(i) is the count for the bucket i. The range for + // a bucket is: + // i == 0: -DBL_MAX .. bucket_limit(0) + // i != 0: bucket_limit(i-1) .. bucket_limit(i) + double left_edge = (i == 0) ? std::numeric_limits::min() + : histo.bucket_limit(i - 1); + + data(j++) = left_edge; + data(j++) = histo.bucket_limit(i); + data(j++) = histo.bucket(i); } int64 tag_id; PatchPluginName(s->mutable_metadata(), kHistogramPluginName); diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index c34b6763a1420d..2e8d4109dd624a 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -100,6 +100,56 @@ class SummaryDbWriterTest : public ::testing::Test { SummaryWriterInterface* writer_ = nullptr; }; +TEST_F(SummaryDbWriterTest, WriteHistogram_VerifyTensorValues) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "histtest", "test1", "user1", &env_, + &writer_)); + int step = 0; + std::unique_ptr e{new Event}; + e->set_step(step); + e->set_wall_time(123); + Summary::Value* s = e->mutable_summary()->add_value(); + s->set_tag("normal/myhisto"); + + double dummy_value = 10.123; + HistogramProto* proto = s->mutable_histo(); + proto->Clear(); + proto->set_min(dummy_value); + proto->set_max(dummy_value); + proto->set_num(dummy_value); + proto->set_sum(dummy_value); + proto->set_sum_squares(dummy_value); + + int size = 3; + double bucket_limits[] = {-30.5, -10.5, -5.5}; + double bucket[] = {-10, 10, 20}; + for (int i = 0; i < size; i++) { + proto->add_bucket_limit(bucket_limits[i]); + proto->add_bucket(bucket[i]); + } + TF_ASSERT_OK(writer_->WriteEvent(std::move(e))); + TF_ASSERT_OK(writer_->Flush()); + writer_->Unref(); + writer_ = nullptr; + + // TODO(nickfelt): implement QueryTensor() to encapsulate this + // Verify the data + string result = QueryString("SELECT data FROM Tensors"); + const double* val = reinterpret_cast(result.data()); + double histarray[] = {std::numeric_limits::min(), + -30.5, + -10, + -30.5, + -10.5, + 10, + -10.5, + -5.5, + 20}; + int histarray_size = 9; + for (int i = 0; i < histarray_size; i++) { + EXPECT_EQ(histarray[i], val[i]); + } +} + TEST_F(SummaryDbWriterTest, NothingWritten_NoRowsCreated) { TF_ASSERT_OK(CreateSummaryDbWriter(db_, "mad-science", "train", "jart", &env_, &writer_)); diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 6d6feb3c399996..a5d8b061b6b26f 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -67,6 +67,7 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = [ ":trt_logging", + ":trt_plugins", ] + if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), @@ -86,6 +87,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":trt_logging", + ":trt_plugins", ":trt_resources", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib_proto_parsing", @@ -197,10 +199,12 @@ tf_py_wrap_cc( tf_cuda_library( name = "trt_resources", srcs = [ + "resources/trt_allocator.cc", "resources/trt_int8_calibrator.cc", "resources/trt_resource_manager.cc", ], hdrs = [ + "resources/trt_allocator.h", "resources/trt_int8_calibrator.h", "resources/trt_resource_manager.h", "resources/trt_resources.h", @@ -221,18 +225,25 @@ tf_cuda_library( srcs = [ "convert/convert_graph.cc", "convert/convert_nodes.cc", + "convert/trt_optimization_pass.cc", ], hdrs = [ "convert/convert_graph.h", "convert/convert_nodes.h", + "convert/trt_optimization_pass.h", ], deps = [ ":segment", + ":trt_plugins", ":trt_logging", ":trt_resources", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework", + "//tensorflow/core:gpu_runtime", "//tensorflow/core:framework_lite", "//tensorflow/core:graph", "//tensorflow/core:lib", @@ -241,8 +252,7 @@ tf_cuda_library( "//tensorflow/core/grappler:devices", "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler/costs:graph_properties", - "//tensorflow/core/grappler/optimizers:constant_folding", - "//tensorflow/core/grappler/optimizers:layout_optimizer", + "//tensorflow/core/grappler/optimizers:meta_optimizer", ] + if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), @@ -256,7 +266,6 @@ cc_library( "segment/segment.h", "segment/union_find.h", ], - linkstatic = 1, deps = [ "//tensorflow/core:graph", "//tensorflow/core:lib_proto_parsing", @@ -279,6 +288,46 @@ tf_cc_test( ], ) +# Library for the plugin factory +tf_cuda_library( + name = "trt_plugins", + srcs = [ + "plugin/trt_plugin.cc", + "plugin/trt_plugin_factory.cc", + "plugin/trt_plugin_utils.cc", + ], + hdrs = [ + "plugin/trt_plugin.h", + "plugin/trt_plugin_factory.h", + "plugin/trt_plugin_utils.h", + ], + deps = [ + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + +tf_cuda_cc_test( + name = "trt_plugin_factory_test", + size = "small", + srcs = ["plugin/trt_plugin_factory_test.cc"], + tags = [ + "manual", + "notap", + ], + deps = [ + ":trt_plugins", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ] + if_tensorrt([ + "@local_config_cuda//cuda:cuda_headers", + "@local_config_tensorrt//:nv_infer", + ]), +) + py_test( name = "tf_trt_integration_test", srcs = ["test/tf_trt_integration_test.py"], diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 07740277115fe4..b7b26cfb1c05ae 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -24,6 +25,9 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/common_runtime/gpu/process_state.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -31,8 +35,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/devices.h" #include "tensorflow/core/grappler/grappler_item.h" -#include "tensorflow/core/grappler/optimizers/constant_folding.h" -#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/optimizers/meta_optimizer.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" @@ -75,7 +78,8 @@ bool IsTensorRTCandidate(const tensorflow::Node* node) { // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) - return candidate_ops.count(node->type_string()); + return (candidate_ops.count(node->type_string()) || + PluginFactoryTensorRT::GetInstance()->IsPlugin(node->type_string())); } void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, @@ -144,7 +148,8 @@ struct ConvertGraphParams { size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, - int engine_precision_mode) + int engine_precision_mode, const string& device_name, + std::shared_ptr allocator, int cuda_gpu_id) : graph(inp_graph), output_names(output_node_names), subgraph_node_ids(subgraph_node_id_numbers), @@ -152,7 +157,10 @@ struct ConvertGraphParams { max_workspace_size_bytes(max_consumed_workspace_size_bytes), graph_properties(current_graph_properties), output_edge_map(output_edges), - precision_mode(engine_precision_mode) {} + precision_mode(engine_precision_mode), + device_name_(device_name), + allocator_(allocator), + cuda_gpu_id_(cuda_gpu_id) {} tensorflow::Graph& graph; const std::vector& output_names; const std::set& subgraph_node_ids; @@ -161,6 +169,9 @@ struct ConvertGraphParams { const tensorflow::grappler::GraphProperties& graph_properties; std::unordered_map>* output_edge_map; int precision_mode; + string device_name_; + std::shared_ptr allocator_; + int cuda_gpu_id_; std::vector> subgraph_inputs; std::vector> subgraph_outputs; tensorflow::EdgeSet subgraph_incoming_edges; @@ -194,7 +205,7 @@ static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { subgraph_outputs_set.begin(), subgraph_outputs_set.end()); return tensorflow::Status::OK(); -}; +} tensorflow::Status GetCalibNode(ConvertGraphParams* params) { TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); @@ -203,7 +214,8 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { params->subgraph_inputs, params->subgraph_outputs, params->max_batch_size, params->max_workspace_size_bytes, params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); + &trt_node_def, params->precision_mode, params->device_name_, + params->allocator_, params->cuda_gpu_id_); TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); @@ -233,7 +245,8 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { params->subgraph_inputs, params->subgraph_outputs, params->max_batch_size, params->max_workspace_size_bytes, params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); + &trt_node_def, params->precision_mode, params->device_name_, + params->allocator_, params->cuda_gpu_id_); TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); tensorflow::Status status; tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); @@ -331,19 +344,12 @@ tensorflow::Status ConvertGraphDefToTensorRT( // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; - tensorflow::GraphDef gdef; - - // Layout optimization item.graph = graph_def; - tensorflow::grappler::LayoutOptimizer optimizer; - tensorflow::grappler::Cluster* cluster; - // virtual cluster tensorflow::DeviceProperties device_properties; - device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); - cluster = + tensorflow::grappler::Cluster* cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); // single machine @@ -351,27 +357,38 @@ tensorflow::Status ConvertGraphDefToTensorRT( int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); VLOG(2) << "cpu_cores: " << num_cpu_cores; VLOG(2) << "gpus: " << num_gpus; - - TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - - // constant folding + tensorflow::RewriterConfig rw_cfg; + tensorflow::grappler::MetaOptimizer meta_opt(nullptr, rw_cfg); + tensorflow::GraphDef gdef; + TF_RETURN_IF_ERROR(meta_opt.Optimize(cluster, item, &gdef)); item.graph = gdef; - tensorflow::grappler::ConstantFolding fold(nullptr); - TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); - TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); + TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true)); // Build full graph + + return ConvertAfterShapes(gdef, output_names, max_batch_size, + max_workspace_size_bytes, new_graph_def, + precision_mode, minimum_segment_size, + static_graph_properties, nullptr); +} + +tensorflow::Status ConvertAfterShapes( + const tensorflow::GraphDef& gdef, const std::vector& output_names, + size_t max_batch_size, size_t max_workspace_size_bytes, + tensorflow::GraphDef* new_graph_def, int precision_mode, + int minimum_segment_size, + const tensorflow::grappler::GraphProperties& graph_properties, + const tensorflow::grappler::Cluster* cluster) { + // Segment the graph into subgraphs that can be converted to TensorRT + tensorflow::tensorrt::segment::SegmentOptions segment_options; tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( tensorflow::GraphConstructorOptions(), gdef, &graph)); - // Segment the graph into subgraphs that can be converted to TensorRT - tensorflow::tensorrt::segment::SegmentOptions segment_options; - // TODO(ben,jie,sami): exclude output nodes (DISCUSS IT) for (auto node : output_names) { segment_options.exclude_node_list.insert(node); @@ -381,7 +398,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( segment_options.minimum_segment_size = minimum_segment_size; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - gdef, IsTensorRTCandidate, segment_options, &segments)); + &graph, IsTensorRTCandidate, segment_options, &segments)); if (segments.size() > 1) { VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); } @@ -391,9 +408,21 @@ tensorflow::Status ConvertGraphDefToTensorRT( int count = 0; float total_num_nodes_in_segments = 0.; for (auto s : segments) { - total_num_nodes_in_segments += s.size(); + total_num_nodes_in_segments += s.first.size(); } - for (const std::set& subgraph_node_names : segments) { + // We create the map here since cluster may not be available in all cases. + std::map name_to_device_map; + if (cluster) { + // TODO(aaroey): consider using DeviceSet::FindDeviceByName(), as in a + // distributed environment, devices from different workers can have same + // short name. + for (const auto dm : cluster->GetDeviceSet()->devices()) { + name_to_device_map[dm->name()] = dm; + } + } + for (const auto& segment_nodes_and_device : segments) { + const std::set& subgraph_node_names = + segment_nodes_and_device.first; std::set subgraph_node_ids; size_t max_mem_per_engine = max_workspace_size_bytes * @@ -403,10 +432,40 @@ tensorflow::Status ConvertGraphDefToTensorRT( oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - VLOG(2) << "Subgraph nodes" << oss.str(); + VLOG(1) << "Subgraph nodes at device " << segment_nodes_and_device.second + << " : " << oss.str(); + auto target_device = + name_to_device_map.find(segment_nodes_and_device.second); + std::shared_ptr allocator(0); + + int cuda_device_id = 0; + if (target_device != name_to_device_map.end()) { + tensorflow::TfGpuId tf_gpu_id(target_device->second->parsed_name().id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) + << "Cuda device identification failed, using device 0. Error= " + << s; + } else { + cuda_device_id = cuda_gpu_id.value(); + } + tensorflow::GPUOptions gpuoptions; + // we need to us PM here since in python path there is no way to get to + // allocators + auto pm = tensorflow::ProcessState::singleton(); + // this should be instantiated by now + auto dev_allocator = pm->GetGPUAllocator(gpuoptions, tf_gpu_id, 1); + VLOG(1) << "Got an allocator for device tf_device=" << tf_gpu_id.value() + << " cuda device= " << cuda_device_id << " at " << dev_allocator; + allocator = std::make_shared(dev_allocator); + } else { // device unknown or not available + allocator = std::make_shared(); + } ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, - max_mem_per_engine, static_graph_properties, - &output_edge_map, precision_mode); + max_mem_per_engine, graph_properties, &output_edge_map, + precision_mode, segment_nodes_and_device.second, + allocator, cuda_device_id); if (precision_mode == INT8MODE) { tensorflow::Status status = GetCalibNode(&p); if (status != tensorflow::Status::OK()) { diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index e01e4a5328061a..65a67d7e73e32f 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -18,6 +18,8 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" @@ -43,6 +45,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, int precision_mode, int minimum_segment_size); +// Method to call from optimization pass +tensorflow::Status ConvertAfterShapes( + const tensorflow::GraphDef& graph, const std::vector& output_names, + size_t max_batch_size, size_t max_workspace_size_bytes, + tensorflow::GraphDef* new_graph_def, int precision_mode, + int minimum_segment_size, + const tensorflow::grappler::GraphProperties& graph_properties, + const tensorflow::grappler::Cluster* cluster); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index b81ae9dc3eeed6..32b211dcd1e282 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -240,35 +241,49 @@ class TFAttrs { return attrs_.at(key); } template - T get(string key) const; + T get(const string& key) const; template - T get(string key, const T& default_value) const { + T get(const string& key, const T& default_value) const { return attrs_.count(key) ? this->get(key) : default_value; } + std::vector GetAllAttrKey() { + std::vector attr_list; + for (const auto& attr_item : attrs_) { + attr_list.emplace_back(attr_item.first); + } + return attr_list; + } + private: typedef std::map AttrMap; AttrMap attrs_; }; template <> -string TFAttrs::get(string key) const { +string TFAttrs::get(const string& key) const { return this->at(key)->s(); } template <> -std::vector TFAttrs::get>(string key) const { +std::vector TFAttrs::get>(const string& key) const { auto attr = this->at(key)->list().i(); return std::vector(attr.begin(), attr.end()); } template <> -std::vector TFAttrs::get>(string key) const { +std::vector TFAttrs::get>(const string& key) const { + auto attr = this->at(key)->list().f(); + return std::vector(attr.begin(), attr.end()); +} + +template <> +std::vector TFAttrs::get>(const string& key) const { auto attr = this->at(key)->list().s(); return std::vector(attr.begin(), attr.end()); } template <> -nvinfer1::Dims TFAttrs::get(string key) const { +nvinfer1::Dims TFAttrs::get(const string& key) const { auto values = this->get>(key); nvinfer1::Dims dims; dims.nbDims = values.size(); @@ -278,24 +293,25 @@ nvinfer1::Dims TFAttrs::get(string key) const { } template <> -nvinfer1::DataType TFAttrs::get(string key) const { +nvinfer1::DataType TFAttrs::get(const string& key) const { nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(this->at(key)->type(), &trt_dtype)); return trt_dtype; } template <> -tensorflow::DataType TFAttrs::get(string key) const { +tensorflow::DataType TFAttrs::get( + const string& key) const { return this->at(key)->type(); } template <> -float TFAttrs::get(string key) const { +float TFAttrs::get(const string& key) const { return this->at(key)->f(); } template <> -bool TFAttrs::get(string key) const { +bool TFAttrs::get(const string& key) const { return this->at(key)->b(); } @@ -424,6 +440,7 @@ using OpConverter = class Converter { std::unordered_map trt_tensors_; std::unordered_map op_registry_; + OpConverter plugin_converter_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; tensorflow::tensorrt::TRTWeightStore* weight_store_; @@ -481,7 +498,7 @@ class Converter { weights.SetValues(weight_store_->store_.back().data()); return weights; } - bool isFP16() { return fp16_; }; + bool isFP16() { return fp16_; } TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -490,13 +507,17 @@ class Converter { std::vector inputs; TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs)); string op = node_def.op(); - if (!op_registry_.count(op)) { - return tensorflow::errors::Unimplemented( - "No converter registered for op: " + op); - } - OpConverter op_converter = op_registry_.at(op); std::vector outputs; - TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs)); + if (PluginFactoryTensorRT::GetInstance()->IsPlugin(op)) { + TF_RETURN_IF_ERROR(plugin_converter_(*this, node_def, inputs, &outputs)); + } else { + if (!op_registry_.count(op)) { + return tensorflow::errors::Unimplemented( + "No converter registered for op: " + op); + } + OpConverter op_converter = op_registry_.at(op); + TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs)); + } for (size_t i = 0; i < outputs.size(); ++i) { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix @@ -672,7 +693,7 @@ std::function LambdaFactory::unary() { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; return [](Eigen::half t) -> Eigen::half { - return Eigen::half(1.0 / sqrt(float(t))); + return Eigen::half(1.0 / sqrt(static_cast(t))); }; } case OP_CATEGORY::NEG: @@ -1158,9 +1179,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1173,6 +1194,45 @@ tensorflow::Status BinaryTensorOpTensor( return tensorflow::Status::OK(); } +tensorflow::Status ConvertPlugin(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + // prepare input + std::vector all_inputs; + for (auto input : inputs) { + all_inputs.emplace_back(const_cast(input.tensor())); + } + + // plugin is owned by PluginFactory + // TODO(jie): destroy plugins later (resource management) + PluginTensorRT* plugin = + PluginFactoryTensorRT::GetInstance()->CreatePlugin(node_def.op()); + + // passing attributes + // TODO(jie): support more general attribute + TFAttrs attrs(node_def); + auto attr_key_vector = attrs.GetAllAttrKey(); + for (auto attr_key : attr_key_vector) { + // TODO(jie): support only list of float for toy example here. + auto data = attrs.get>(attr_key); + size_t size_data = data.size() * sizeof(float); + if (!plugin->SetAttribute(attr_key, static_cast(data.data()), + size_data)) { + return tensorflow::errors::InvalidArgument("plugin SetAttribute failed"); + } + } + + nvinfer1::IPluginLayer* layer = ctx.network()->addPlugin( + &all_inputs[0], static_cast(inputs.size()), *plugin); + + for (int i = 0; i < layer->getNbOutputs(); i++) { + nvinfer1::ITensor* output_tensor = layer->getOutput(i); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + } + return tensorflow::Status::OK(); +} + tensorflow::Status ConvertPlaceholder( Converter& ctx, const tensorflow::NodeDef& node_def, const std::vector& inputs, @@ -2073,6 +2133,8 @@ void Converter::register_op_converters() { op_registry_["Reshape"] = ConvertReshape; op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; + + plugin_converter_ = ConvertPlugin; } } // namespace @@ -2144,7 +2206,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( if (!status.ok() || !calib_res->calibrator_) { return tensorflow::errors::FailedPrecondition( "You must run calibration" - " and inference conversion in the same proces"); + " and inference conversion in the same process"); } calib_res->calibrator_->setDone(); @@ -2213,60 +2275,63 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( return tensorflow::Status::OK(); } -tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort +tensorflow::Status ReverseTopologicalSort( + const tensorrt::convert::SubGraphParams& s, + std::list* order) { std::vector order_vec; tensorflow::GetPostOrder(s.graph, &order_vec); // Select just the subgraph - std::list order; for (tensorflow::Node* node : order_vec) { if (s.subgraph_node_ids.count(node->id())) { - order.push_front(node); // we want topological order to construct the + // We want topological order to contstruct the // network layer by layer + order->push_front(node); } } - // topological order is needed to build TRT network - static int static_id = 0; + return tensorflow::Status::OK(); +} + +tensorflow::Status SetInputList( + const tensorrt::convert::SubGraphParams& s, + tensorflow::NodeDefBuilder* op_builder, + const std::vector* input_names, + std::vector* input_dtypes) { + std::vector income_edges; + VLOG(2) << "input edge size: " << input_names->size(); + for (size_t i = 0; i < input_names->size(); ++i) { + VLOG(2) << "input edges: " << i << " " << input_names->at(i); + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) + auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( + input_names->at(i), output_idx, input_dtypes->at(i)); + income_edges.push_back(incoming_edge); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder->Input(input_list); + return tensorflow::Status::OK(); +} + +string SubgraphNameScopeGenerator(const std::list* order) { string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); + if (!order->empty()) { + subgraph_name_scope = order->front()->name(); } - for (const tensorflow::Node* node : order) { + for (const tensorflow::Node* node : *order) { subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); } // TODO(sami,ben,jie): proper naming! - string calib_op_name = - StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); - static_id++; - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger_ = new tensorflow::tensorrt::Logger(); - op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - - if (!op_res->builder_) { - return tensorflow::errors::Internal( - "failed to create TensorRT builder object"); - } - - op_res->network_ = op_res->builder_->createNetwork(); - if (!op_res->network_) { - return tensorflow::errors::Internal( - "failed to create TensorRT network object"); - } - - // Build the network - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + return subgraph_name_scope; +} - std::vector input_names; - std::vector input_dtypes; +tensorflow::Status ConvertSubgraph( + Converter& converter, tensorrt::convert::SubGraphParams& s, + std::list* order, std::vector* input_names, + std::vector* input_dtypes, + std::vector* output_names, + std::vector* output_dtypes, + const string& engine_name) { for (const std::pair& input : s.input_inds) { VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; @@ -2309,22 +2374,21 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); + input_dtypes->push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); auto type_status = ConvertDType(tf_dtype, &dtype); if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Data type conversion for input '" << node_name - << "' failed"; + LOG(WARNING) << "Type conversion failed for " << node_name; return type_status; } - VLOG(2) << "accessing output index of: " << output_idx + VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name - << "with output entry from shape_map: " << op_info_vec.size(); + << " with output entry from shape_map: " << op_info_vec.size(); // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + nvinfer1::DimsCHW input_dim_pseudo_chw; + for (int i = 0; i < 3; i++) input_dim_pseudo_chw.d[i] = 1; // TODO(jie): TRT 3.x only support 4 dimensional input tensor. // update the code once TRT 4.0 comes out. @@ -2338,7 +2402,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); + input_dim_pseudo_chw.d[i - 1] = op_info.shape().dim(i).size(); } // TODO(ben,jie): proper way to restore input tensor name? @@ -2347,33 +2411,29 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { input_tensor_name = StrCat(node_name, ":", output_idx); } - input_names.push_back(input_tensor_name); + input_names->push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); + input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); if (!input_tensor) return tensorflow::errors::InvalidArgument( "Failed to create Input layer"); - VLOG(2) << "input tensor name :" << input_tensor_name; + VLOG(2) << "Input tensor name :" << input_tensor_name; if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) return tensorflow::errors::AlreadyExists( - "output tensor already exists for op: " + input_tensor_name); + "Output tensor already exists for op: " + input_tensor_name); } - VLOG(2) << "finished sorting"; - - for (const tensorflow::Node* node : order) { + for (const tensorflow::Node* node : *order) { const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); + VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op(); TF_RETURN_IF_ERROR(converter.convert_node(node_def)); } - VLOG(2) << "finished conversion"; + VLOG(2) << "Finished conversion"; // Gather output metadata - std::vector output_names; - std::vector output_dtypes; int trt_engine_op_output_idx = 0; for (const std::pair& output : s.output_inds) { int node_id = output.first; @@ -2388,14 +2448,13 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { : StrCat(engine_name, ":", trt_engine_op_output_idx), {output_idx, tensor_name}}); trt_engine_op_output_idx++; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - VLOG(1) << "output tensor name: " << tensor_name; - output_names.push_back(tensor_name); + if (output_idx != 0) + tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); + VLOG(2) << "Output tensor name: " << tensor_name; + output_names->push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node'" + tensor_name + + return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); @@ -2405,12 +2464,65 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { } converter.network()->markOutput(*tensor); tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); + output_dtypes->push_back(tf_dtype); nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); tensor->setType(trt_dtype); } + return tensorflow::Status::OK(); +} + +tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { + // Visit nodes in reverse topological order and construct the TRT network. + // Toposort + std::list order; + TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order)); + + static int static_id = 0; + string subgraph_name_scope = SubgraphNameScopeGenerator(&order); + // TODO(sami,ben,jie): proper naming! + string calib_op_name = + StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); + static_id++; + + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); + auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); + auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); + TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); + op_res->logger_ = new tensorflow::tensorrt::Logger(); + cudaSetDevice(s.cuda_gpu_id_); + op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); + op_res->allocator_ = s.allocator_; +#if NV_TENSORRT_MAJOR > 3 + op_res->builder_->setGpuAllocator(s.allocator_.get()); +#endif + if (!op_res->builder_) { + return tensorflow::errors::Internal( + "failed to create TensorRT builder object"); + } + + op_res->network_ = op_res->builder_->createNetwork(); + if (!op_res->network_) { + return tensorflow::errors::Internal( + "failed to create TensorRT network object"); + } + + // Build the network + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); + Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + + std::vector input_names; + std::vector input_dtypes; + std::vector output_names; + std::vector output_dtypes; + TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names, + &input_dtypes, &output_names, + &output_dtypes, engine_name)); + VLOG(2) << "Finished processing outputs"; // Build the engine @@ -2422,21 +2534,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Build the TRT op // TODO(sami,ben,jie): proper naming! tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - std::vector income_edges; - for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) - << ":" << output_idx - << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); + SetInputList(s, &op_builder, &input_names, &input_dtypes); + std::vector segment_names; segment_names.reserve(s.subgraph_node_ids.size()); for (int i : s.subgraph_node_ids) { @@ -2460,46 +2559,29 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { tensorflow::Status ConvertSubGraphToTensorRTNodeDef( tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort - std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); - // Select just the subgraph std::list order; - for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { - // We want topological order to contstruct the - // network layer by layer - order.push_front(node); - } - } - // Topological order is needed to build TRT network + TF_RETURN_IF_ERROR(ReverseTopologicalSort(s, &order)); - tensorflow::tensorrt::Logger trt_logger; + static int static_id = 0; + string subgraph_name_scope = SubgraphNameScopeGenerator(&order); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id++); + tensorflow::tensorrt::Logger trt_logger; + cudaSetDevice(s.cuda_gpu_id_); auto trt_builder = infer_object(nvinfer1::createInferBuilder(trt_logger)); if (!trt_builder) { return tensorflow::errors::Internal( "Failed to create TensorRT builder object"); } - +#if NV_TENSORRT_MAJOR > 3 + trt_builder->setGpuAllocator(s.allocator_.get()); +#endif auto trt_network = infer_object(trt_builder->createNetwork()); if (!trt_network) { return tensorflow::errors::Internal( "Failed to create TensorRT network object"); } - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - static int static_id = 0; - // TODO(sami,ben,jie): proper naming! - string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); - engine_name = StrCat(engine_name, static_id++); auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto weight_rmgr = trt_rmgr->getManager("WeightStore"); auto ws = new tensorflow::tensorrt::TRTWeightStore(); @@ -2510,147 +2592,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::vector input_names; std::vector input_dtypes; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= " << input.first; - int node_id = input.first; - int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - auto node_name = node->name(); - // input_names should use the node name in the graph - // here it should be the input tensor name -> matching the binding - // insert original node name without port - auto tensor_name = node_name; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - - VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; - - auto shape_inference_node_name = node_name; - auto shape_inference_output_idx = output_idx; - // rewire the shape inference to original node in the graph - if (s.output_edge_map->count(tensor_name)) { - shape_inference_node_name = s.output_edge_map->at(tensor_name).second; - shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; - } - if (shape_inference_output_idx < 0) continue; - VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; - - if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - shape_inference_node_name); - - auto op_info_vec = - s.graph_properties.GetOutputProperties(shape_inference_node_name); - if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) - return tensorflow::errors::Internal( - "accessing output index of: ", shape_inference_output_idx, - ", at node: ", shape_inference_node_name, - " with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(shape_inference_output_idx); - tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); - - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - auto type_status = ConvertDType(tf_dtype, &dtype); - if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Type conversion failed for " << node_name; - return type_status; - } - - VLOG(2) << "Accessing output index of: " << output_idx - << ", at node: " << node_name - << " with output entry from shape_map: " << op_info_vec.size(); - // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - - // TODO(jie): TRT 3.x only support 4 dimensional input tensor. - // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) { - string err_str = "Require 4 dimensional input."; - StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", - shape_inference_node_name); - return tensorflow::errors::Unimplemented(err_str); - } - - for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); - } - - // TODO(ben,jie): proper way to restore input tensor name? - auto input_tensor_name = node_name; - if (output_idx != 0) { - input_tensor_name = StrCat(node_name, ":", output_idx); - } - - input_names.push_back(input_tensor_name); - nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); - - if (!input_tensor) - return tensorflow::errors::InvalidArgument( - "Failed to create Input layer"); - VLOG(2) << "Input tensor name :" << input_tensor_name; - - if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) - return tensorflow::errors::AlreadyExists( - "Output tensor already exists for op: " + input_tensor_name); - } - - VLOG(2) << "Finished sorting"; - - for (const tensorflow::Node* node : order) { - const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "Converting node: " << node_def.name() << " , " << node_def.op(); - TF_RETURN_IF_ERROR(converter.convert_node(node_def)); - } - - VLOG(2) << "Finished conversion"; - - // Gather output metadata std::vector output_names; std::vector output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair& output : s.output_inds) { - int node_id = output.first; - int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - string op_name = node->name(); - string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; - if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); - VLOG(2) << "Output tensor name: " << tensor_name; - output_names.push_back(tensor_name); - auto tensor_or_weights = converter.get_tensor(tensor_name); - if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + - "' is weights not tensor"); - } - nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); - if (!tensor) { - return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); - } - converter.network()->markOutput(*tensor); - tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); - nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; - TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); - tensor->setType(trt_dtype); - } + TF_RETURN_IF_ERROR(ConvertSubgraph(converter, s, &order, &input_names, + &input_dtypes, &output_names, + &output_dtypes, engine_name)); VLOG(2) << "Finished output"; @@ -2686,20 +2632,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Build the TRT op tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - std::vector income_edges; - VLOG(2) << "input edge size: " << input_names.size(); - for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << i << " " << input_names.at(i); - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); + SetInputList(s, &op_builder, &input_names, &input_dtypes); VLOG(0) << "Finished op preparation"; @@ -2707,9 +2640,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) + .Device(s.device_name_) .Finalize(s.trt_node); - VLOG(0) << status.ToString() << " finished op building"; + VLOG(0) << status.ToString() << " finished op building for " << engine_name + << " on device " << s.device_name_; return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 954a1e72f86043..3f6592cd25ff01 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -22,11 +22,11 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/lib/core/status.h" - #if GOOGLE_CUDA #if GOOGLE_TENSORRT @@ -48,7 +48,9 @@ struct SubGraphParams { const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, tensorflow::NodeDef* constructed_trt_node, - int engine_precision_mode = FP32MODE) + int engine_precision_mode = FP32MODE, const string& device_name = "", + std::shared_ptr allocator = nullptr, + int cuda_gpu_id = 0) : graph(inp_graph), subgraph_node_ids(subgraph_node_id_numbers), input_inds(input_indices), @@ -58,7 +60,10 @@ struct SubGraphParams { graph_properties(current_graph_properties), output_edge_map(output_edges), trt_node(constructed_trt_node), - precision_mode(engine_precision_mode) {} + precision_mode(engine_precision_mode), + device_name_(device_name), + allocator_(allocator), + cuda_gpu_id_(cuda_gpu_id) {} tensorflow::Graph& graph; const std::set& subgraph_node_ids; @@ -70,6 +75,9 @@ struct SubGraphParams { std::unordered_map>* output_edge_map; tensorflow::NodeDef* trt_node; const int precision_mode; + const string device_name_; + std::shared_ptr allocator_; + const int cuda_gpu_id_; }; // TODO(sami): Replace references with const reference or pointers diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc new file mode 100644 index 00000000000000..8f634b1f747173 --- /dev/null +++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc @@ -0,0 +1,246 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h" +#include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +namespace tensorflow { +namespace tensorrt { +namespace convert { +// TODO(sami): Remove VLOG messages once the code matures +using tensorflow::str_util::Uppercase; +using tensorflow::strings::StrAppend; +using tensorflow::strings::StrCat; + +tensorflow::Status TRTOptimizationPass::Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) { + VLOG(1) << "Called INIT for " << name_ << " with config = " << config; + if (config == nullptr) { + maximum_workspace_size_ = 2 << 30; + return tensorflow::Status::OK(); + } + const auto params = config->parameter_map(); + if (params.count("minimum_segment_size")) { + minimum_segment_size_ = params.at("minimum_segment_size").i(); + } + if (params.count("max_batch_size")) { + maximum_batch_size_ = params.at("max_batch_size").i(); + } + if (params.count("max_workspace_size_bytes")) + maximum_workspace_size_ = params.at("max_workspace_size_bytes").i(); + if (params.count("precision_mode")) { + string pm = Uppercase(params.at("precision_mode").s()); + if (pm == "FP32") { + precision_mode_ = 0; + } else if (pm == "FP16") { + precision_mode_ = 1; + } else if (pm == "INT8") { + precision_mode_ = 2; + } else { + LOG(ERROR) << "Unknown precision mode '" << pm << "'"; + return tensorflow::errors::InvalidArgument( + "Unknown precision mode argument" + pm + + " Valid values are FP32, FP16, INT8"); + } + } + return tensorflow::Status::OK(); +} + +void TRTOptimizationPass::PrintDebugInfo( + tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item) { + VLOG(1) << "Cluster = " << cluster; + string offset(" "); + string offset2 = StrCat(offset, offset); + string offset3 = StrCat(offset2, offset); + string offset4 = StrCat(offset2, offset2); + if (cluster) { + VLOG(1) << offset << "type = " << cluster->type(); + VLOG(1) << offset << "num warmup steps = " << cluster->NumWarmupSteps(); + const auto dev_names = cluster->GetDeviceNames(); + if (dev_names.size()) { + VLOG(1) << offset << " Device names:"; + for (const auto s : dev_names) { + VLOG(1) << offset2 << s; + } + } + std::unordered_map peak_mem; + auto status = cluster->GetPeakMemoryUsage(&peak_mem); + if (status == tensorflow::Status::OK()) { + VLOG(1) << offset << "Peak Memory Usage :"; + for (auto s : peak_mem) { + VLOG(1) << offset2 << s.first << " = " << s.second; + } + } + + const auto dev_props = cluster->GetDevices(); + if (dev_props.size()) { + VLOG(1) << offset << "Device properties:"; + for (auto k : dev_props) { + VLOG(1) << offset2 << k.first; + const auto& dt = k.second; + VLOG(1) << offset3 << "type = " << dt.type(); + VLOG(1) << offset3 << "vendor = " << dt.vendor(); + VLOG(1) << offset3 << "model = " << dt.model(); + VLOG(1) << offset3 << "frequency = " << dt.frequency(); + VLOG(1) << offset3 << "num cores = " << dt.num_cores(); + VLOG(1) << offset3 << "num registers = " << dt.num_registers(); + VLOG(1) << offset3 << "L1 cache size = " << dt.l1_cache_size(); + VLOG(1) << offset3 << "L2 cache size = " << dt.l2_cache_size(); + VLOG(1) << offset3 << "L3 cache size = " << dt.l3_cache_size(); + VLOG(1) << offset3 << "SHMem per SMP = " + << dt.shared_memory_size_per_multiprocessor(); + VLOG(1) << offset3 << "memory size = " << dt.memory_size(); + VLOG(1) << offset3 << "bandwidth = " << dt.bandwidth(); + if (dt.environment_size()) { + VLOG(1) << offset3 << "environment :"; + for (const auto e : dt.environment()) { + VLOG(1) << offset4 << e.first << " = " << e.second; + } + } + } + } + } + VLOG(1) << "item: " << item.id; + if (item.feed.size()) { + VLOG(1) << offset << "Feeds :"; + for (const auto& f : item.feed) { + const auto& shape = f.second.shape(); + VLOG(1) << offset2 << f.first << " = shaped " << shape.DebugString(); + } + } else { + VLOG(1) << offset << "No Feeds"; + } + if (item.fetch.size()) { + VLOG(1) << offset << "Fetches :"; + for (const auto& f : item.fetch) { + VLOG(1) << offset2 << f; + } + } else { + VLOG(1) << offset << "No Fetches"; + } + + if (item.init_ops.size()) { + VLOG(1) << offset << "init ops :"; + for (const auto& f : item.init_ops) { + VLOG(1) << offset2 << f; + } + } else { + VLOG(1) << offset << "No init ops"; + } + VLOG(1) << "Save Op = " << item.save_op; + VLOG(1) << "Restore Op = " << item.restore_op; + VLOG(1) << "save_restore_loc_tensor = " << item.save_restore_loc_tensor; + if (item.keep_ops.size()) { + VLOG(1) << offset << "keep ops :"; + for (const auto& f : item.keep_ops) { + VLOG(1) << offset2 << f; + } + } else { + VLOG(1) << offset << "No keep ops"; + } + VLOG(3) << item.graph.DebugString(); + for (const auto dev : cluster->GetDeviceSet()->devices()) { + const auto& pname = dev->parsed_name(); + VLOG(1) << "Device name= " << dev->name() + << " parsedname job= " << pname.job << " id= " << pname.id + << " has_id: " << pname.has_id << " has_job: " << pname.has_job + << "has_type: " << pname.has_type << " type =" << pname.type; + } +} + +tensorflow::Status TRTOptimizationPass::Optimize( + tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, GraphDef* optimized_graph) { + VLOG(1) << "Called TRTOptimization Pass " << name_; + if (VLOG_IS_ON(1)) { + PrintDebugInfo(cluster, item); + } + int max_dim = -1; + if (item.feed.size()) { + for (const auto& f : item.feed) { + const auto& shape = f.second.shape(); + if (shape.dims() > 0) { + if (shape.dim_size(0) > max_dim) max_dim = shape.dim_size(0); + } + } + } + if (maximum_batch_size_ < 0) { // automatic batch size from input + if (max_dim > 0) { + maximum_batch_size_ = max_dim; + VLOG(1) << "Setting maximum batch size to " << max_dim; + } else { + maximum_batch_size_ = 128; + LOG(WARNING) << "Maximum batch size is not set" + " and can't be deduced from inputs setting it to" + << maximum_batch_size_ + << ". Suggest configuring it from configuration parameters"; + } + } else { + if (max_dim > maximum_batch_size_) { + LOG(WARNING) << "Configured batch size " << maximum_batch_size_ + << " is less than input batch size " << max_dim + << " adjusting maximum batch size to match input batch size"; + } + } + tensorflow::grappler::GraphProperties static_graph_properties(item); + TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(true)); + auto status = tensorflow::tensorrt::convert::ConvertAfterShapes( + item.graph, item.fetch, maximum_batch_size_, maximum_workspace_size_, + optimized_graph, precision_mode_, minimum_segment_size_, + static_graph_properties, cluster); + VLOG(2) << optimized_graph->DebugString(); + return status; +} + +void TRTOptimizationPass::Feedback( + tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, + const GraphDef& optimized_graph, double result) {} + +} // namespace convert +} // namespace tensorrt +} // namespace tensorflow + +class VerboseCustomGraphOptimizerRegistrar + : public tensorflow::grappler::CustomGraphOptimizerRegistrar { + public: + VerboseCustomGraphOptimizerRegistrar( + const tensorflow::grappler::CustomGraphOptimizerRegistry::Creator& cr, + const tensorflow::string& name) + : tensorflow::grappler::CustomGraphOptimizerRegistrar(cr, name) { + VLOG(1) << "Constructing a CustomOptimizationPass registration object for " + << name; + } +}; + +static VerboseCustomGraphOptimizerRegistrar TRTOptimizationPass_Registrar( + []() { + VLOG(1) + << "Instantiating CustomOptimizationPass object TensorRTOptimizer"; + return new tensorflow::tensorrt::convert::TRTOptimizationPass( + "TensorRTOptimizer"); + }, + ("TensorRTOptimizer")); + +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h new file mode 100644 index 00000000000000..d8ecead23efaa5 --- /dev/null +++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.h @@ -0,0 +1,73 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_ + +#include + +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { +namespace convert { + +class TRTOptimizationPass : public tensorflow::grappler::CustomGraphOptimizer { + public: + TRTOptimizationPass(const string& name = "TRTOptimizationPass") + : name_(name), + minimum_segment_size_(3), + precision_mode_(0), + maximum_batch_size_(-1), + maximum_workspace_size_(-1) { + VLOG(1) << "Constructing " << name_; + } + + string name() const override { return name_; }; + + tensorflow::Status Init(const tensorflow::RewriterConfig_CustomGraphOptimizer* + config = nullptr) override; + + tensorflow::Status Optimize(tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item, + const GraphDef& optimized_graph, double result) override; + + void PrintDebugInfo(tensorflow::grappler::Cluster* cluster, + const tensorflow::grappler::GrapplerItem& item); + + private: + string name_; + int minimum_segment_size_; + int precision_mode_; + int maximum_batch_size_; + int64_t maximum_workspace_size_; +}; + +} // namespace convert +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_TRT_OPTIMIZATION_PASS_H_ diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD b/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD new file mode 100644 index 00000000000000..a89cf3ab8bfaec --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/BUILD @@ -0,0 +1,118 @@ +# Description: +# Example for plugin support in TensorRT(http://developer.nvidia.com/tensorrt) +# through TensorFlow integration. Targeting TensorRT 3.0.4 +# APIs are meant to change while upgrading TRT. +# add init_py into pip package BUILD dependency to install it. + +package(default_visibility = ["//tensorflow:__subpackages__"]) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_custom_op_library", + "tf_custom_op_library_additional_deps", + "tf_gen_op_libs", + "tf_gen_op_wrapper_py", + "tf_kernel_library", +) +load("//tensorflow:tensorflow.bzl", "cuda_py_test") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") +load( + "@local_config_tensorrt//:build_defs.bzl", + "if_tensorrt", +) + +tf_gen_op_libs( + op_lib_names = ["inc_op"], +) + +tf_gen_op_wrapper_py( + name = "inc_op", + deps = [":inc_op_op_lib"], +) + +tf_custom_op_library( + name = "_inc_op.so", + srcs = [ + "inc_op_kernel.h", + "inc_op_plugin.cc", + "inc_op_plugin.h", + "ops/inc_op.cc", + ], + gpu_srcs = [ + "inc_op_kernel.h", + "inc_op_kernel.cu.cc", + ], + deps = [ + "//tensorflow/contrib/tensorrt:trt_plugins", + "//tensorflow/core:framework_lite", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + +tf_kernel_library( + name = "inc_op_plugin_kernel", + srcs = ["inc_op_plugin.cc"], + hdrs = [ + "inc_op_kernel.h", + "inc_op_plugin.h", + ], + gpu_srcs = [ + "inc_op_kernel.h", + "inc_op_kernel.cu.cc", + ], + deps = [ + "//tensorflow/contrib/tensorrt:trt_plugins", + "//tensorflow/core:stream_executor_headers_lib", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]) + tf_custom_op_library_additional_deps(), +) + +tf_custom_op_py_library( + name = "inc_op_loader", + srcs = ["inc_op.py"], + dso = [ + ":_inc_op.so", + ], + kernels = [ + ":inc_op_op_lib", + ":inc_op_plugin_kernel", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:resources", + ], +) + +py_library( + name = "init_py", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":inc_op", + ":inc_op_loader", + ], +) + +cuda_py_test( + name = "plugin_test", + size = "small", + srcs = ["plugin_test.py"], + additional_deps = [ + ":init_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/contrib/tensorrt:init_py", + "//tensorflow/python:platform", + "//tensorflow/python:client_testlib", + "//tensorflow/python:tf_optimizer", + ], + tags = [ + "manual", + "noguitar", + "notap", + ], +) diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py new file mode 100644 index 00000000000000..363edab2e80ada --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Import custom op for plugin and register it in plugin factory registry.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.tensorrt.custom_plugin_examples import inc_op as import_inc_op_so +from tensorflow.contrib.tensorrt.custom_plugin_examples.ops import gen_inc_op + +inc_op = gen_inc_op.inc_plugin_trt diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py new file mode 100644 index 00000000000000..a007c3f54e208b --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op.py @@ -0,0 +1,32 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Loader for the custom inc_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import platform + +if platform.system() != "Windows": + # pylint: disable=g-import-not-at-top + from tensorflow.contrib.util import loader + from tensorflow.python.platform import resource_loader + # pylint: enable=g-import-not-at-top + + _inc_op = loader.load_op_library( + resource_loader.get_path_to_datafile("_inc_op.so")) +else: + raise RuntimeError("Windows not supported") diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc new file mode 100644 index 00000000000000..988b35f74f3989 --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.cu.cc @@ -0,0 +1,84 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h" + +#include + +#include "tensorflow/core/framework/op_kernel.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda/include/cuda_runtime_api.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace tensorflow { +namespace tensorrt { + +__global__ void VecInc(const float* vec, float inc, float* dest, int n) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < n) dest[i] = vec[i] + inc; +} + +void IncrementKernel(const float* d_input, float inc, float* d_output, + int count, cudaStream_t stream) { + int threads_per_block = 256; + int blocks_per_grid = (count + threads_per_block - 1) / threads_per_block; + + VecInc<<>>(d_input, inc, + d_output, count); +} + +// Note: this kernel definition is not needed in the plugin_test rule, but it is +// required for correctness of the TF program, i.e. if not using plugin or when +// run with trt optimization pass, the test should work. +class IncPluginTRT : public OpKernel { + public: + explicit IncPluginTRT(OpKernelConstruction* context) : OpKernel(context) { + std::vector inc_list; + OP_REQUIRES_OK(context, context->GetAttr("inc", &inc_list)); + OP_REQUIRES(context, inc_list.size() == 1, + errors::InvalidArgument( + "The increment list should contain single element.")); + inc_ = inc_list[0]; + } + + void Compute(OpKernelContext* context) override { + const Tensor& input_tensor = context->input(0); + const TensorShape& input_shape = input_tensor.shape(); + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, input_shape, &output_tensor)); + const cudaStream_t* stream = CHECK_NOTNULL( + reinterpret_cast(context->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + IncrementKernel(input_tensor.flat().data(), inc_, + output_tensor->flat().data(), + input_shape.num_elements(), *stream); + } + + private: + float inc_; +}; + +REGISTER_KERNEL_BUILDER(Name("IncPluginTRT").Device(DEVICE_GPU), IncPluginTRT); + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h new file mode 100644 index 00000000000000..c35955e105798b --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h @@ -0,0 +1,35 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_ + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda/include/cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { + +void IncrementKernel(const float* d_input, float inc, float* d_output, + int count, cudaStream_t stream); + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_KERNEL_H_ diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc new file mode 100644 index 00000000000000..8d4c893af56689 --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.cc @@ -0,0 +1,86 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h" + +#include "tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_kernel.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +const char* kPluginName = "IncPluginTRT"; + +IncOpPlugin* CreateIncPlugin() { return new IncOpPlugin(); } + +IncOpPlugin* CreateIncPluginDeserialize(const void* buffer, size_t length) { + return new IncOpPlugin(buffer, length); +} + +REGISTER_TRT_PLUGIN(kPluginName, CreateIncPluginDeserialize, CreateIncPlugin); + +IncOpPlugin::IncOpPlugin() : plugin_name_(kPluginName) {} + +IncOpPlugin::IncOpPlugin(const void* serialized_data, size_t length) + : PluginTensorRT(serialized_data, length), plugin_name_(kPluginName) { + // account for the consumed pointer. + size_t consumed_data = PluginTensorRT::getSerializationSize(); + assert(length - consumed_data >= sizeof(float)); + const char* buffer = reinterpret_cast(serialized_data); + SetAttribute("inc", buffer + consumed_data, sizeof(float)); +} + +bool IncOpPlugin::SetAttribute(const string& key, const void* ptr, + const size_t size) { + if (strcmp(key.c_str(), "inc") == 0 && size == sizeof(float)) { + StoreAttribute(key, ptr, size); // save the attribute to own the data; + inc_ = *static_cast(ptr); + return true; + } + return false; +} + +bool IncOpPlugin::GetAttribute(const string& key, const void** ptr, + size_t* size) const { + const auto& iter = attr_map_.find(key); + if (iter != attr_map_.end()) { + *ptr = iter->second.data(); + *size = iter->second.size(); + return true; + } + return false; +} + +int IncOpPlugin::enqueue(int batch_size, const void* const* inputs, + void** outputs, void*, cudaStream_t stream) { + int count = 1; + for (int i = 0; i < input_dim_list_[0].nbDims; i++) { + count *= input_dim_list_[0].d[i]; + } + count *= batch_size; + const float* input = reinterpret_cast(inputs[0]); + float* output = reinterpret_cast(outputs[0]); + IncrementKernel(input, inc_, output, count, stream); + return 0; +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h new file mode 100644 index 00000000000000..189e9c939b9ffd --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/inc_op_plugin.h @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_ + +#include +#include + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +class IncOpPlugin : public PluginTensorRT { + public: + IncOpPlugin(); + + IncOpPlugin(const void* serialized_data, size_t length); + + const string& GetPluginName() const override { return plugin_name_; }; + + bool Finalize() override { return true; }; + + bool SetAttribute(const string& key, const void* ptr, + const size_t size) override; + + bool GetAttribute(const string& key, const void** ptr, + size_t* size) const override; + + int getNbOutputs() const override { return 1; } + + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int num_input_dims) override { + assert(index == 0); + assert(num_input_dims == 1); + return inputs[0]; + } + + // use configure to setup input dimensions + void configure(const nvinfer1::Dims* inputs, int num_inputs, + const nvinfer1::Dims* outputs, int num_outputs, + int max_batch_size) override { + assert(num_inputs == 1); + PluginTensorRT::configure(inputs, num_inputs, outputs, num_outputs, + max_batch_size); + } + + int initialize() override { return 0; } + + void terminate() override {} + + size_t getWorkspaceSize(int max_batch_size) const override { return 0; } + + int enqueue(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream) override; + + size_t getSerializationSize() override { + return PluginTensorRT::getSerializationSize() + sizeof(float); + } + + void serialize(void* buffer) override { + // Serialize parent data. + PluginTensorRT::serialize(buffer); + // Incremented buffer after parent serialization. + buffer = + static_cast(buffer) + PluginTensorRT::getSerializationSize(); + std::memcpy(buffer, &inc_, sizeof(float)); + buffer = static_cast(buffer) + sizeof(float); + } + + protected: + float inc_; + nvinfer1::Dims dim_; + + private: + const string plugin_name_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_CUSTOM_PLUGIN_EXAMPLES_INC_OP_PLUGIN_H_ diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc b/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc new file mode 100644 index 00000000000000..d0eb0d299dd61d --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/ops/inc_op.cc @@ -0,0 +1,36 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { + +REGISTER_OP("IncPluginTRT") + .Attr("inc: list(float)") + .Input("input: float32") + .Output("output: float32") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py b/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py new file mode 100644 index 00000000000000..bc4d270bec4fb8 --- /dev/null +++ b/tensorflow/contrib/tensorrt/custom_plugin_examples/plugin_test.py @@ -0,0 +1,95 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to show usage of TensorRT custom op & plugin.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy + +from tensorflow.contrib import tensorrt +from tensorflow.contrib.tensorrt import custom_plugin_examples +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test + + +class TrtPluginTest(test_util.TensorFlowTestCase): + + def _get_plugin_graph_def(self): + """Create a simple graph and return its graph_def.""" + g = ops.Graph() + with g.as_default(): + a = array_ops.placeholder( + dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") + relu = nn.relu(a, "relu") + v = nn_ops.max_pool( + relu, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") + + # insert custom_op in the graph + v = custom_plugin_examples.inc_op(v, inc=[16.5], name="plugin_test") + + v *= 2.0 + v = nn.relu(v) + v = nn.relu(v) + array_ops.squeeze(v, name="output") + return g.as_graph_def() + + def _run_graph(self, gdef, dumm_inp): + """Run given graphdef once.""" + gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + + with session.Session( + config=config_pb2.ConfigProto(gpu_options=gpu_options), + graph=g) as sess: + val = sess.run(out, {inp: dumm_inp}) + return val + + def testIncOpPlugin(self): + inp_dims = (5, 24, 24, 2) + dummy_input = numpy.ones(inp_dims).astype(numpy.float32) + orig_graph = self._get_plugin_graph_def() # graph with plugin node + + # trigger conversion. + # plugin nodes have been registered during import, converter will be able to + # create corresponding plugin layer during conversion. + trt_graph = tensorrt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP32", + minimum_segment_size=2) + o2 = self._run_graph(trt_graph, dummy_input) + self.assertEqual(35, o2.reshape([-1])[0]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index b8f881ceb16a48..9ac80479448741 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h" #include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -32,38 +33,40 @@ namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine - string serialized_engine; OP_REQUIRES_OK(context, - context->GetAttr("serialized_engine", &serialized_engine)); + context->GetAttr("serialized_engine", &serialized_engine_)); // register input output node name in trt_sub_graph OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_)); OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_)); +} - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. - int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - cudaSetDevice(gpu_id); - int device; - cudaGetDevice(&device); - if (gpu_id != device) LOG(FATAL) << "set device failed!"; - +void TRTEngineOp::Compute(OpKernelContext* context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - IRuntime* infer = nvinfer1::createInferRuntime(logger); - trt_engine_ptr_.reset(infer->deserializeCudaEngine( - serialized_engine.c_str(), serialized_engine.size(), nullptr)); - trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); - // Runtime is safe to delete after engine creation - infer->destroy(); -} - -void TRTEngineOp::Compute(OpKernelContext* context) { + if (!trt_execution_context_ptr_) { + IRuntime* infer = nvinfer1::createInferRuntime(logger); +#if NV_TENSORRT_MAJOR > 3 + auto device = context->device(); + auto dev_allocator = + device->GetAllocator(tensorflow::AllocatorAttributes()); + if (!dev_allocator) { + LOG(FATAL) << "Can't find device allocator for gpu device " + << device->name(); + } + allocator_ = std::make_shared(dev_allocator); + infer->setGpuAllocator(allocator_.get()); +#endif + trt_engine_ptr_.reset(infer->deserializeCudaEngine( + serialized_engine_.c_str(), serialized_engine_.size(), + PluginFactoryTensorRT::GetInstance())); + trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); + // Runtime is safe to delete after engine creation + infer->destroy(); + serialized_engine_.clear(); + } int num_binding = context->num_inputs() + context->num_outputs(); std::vector buffers(num_binding); @@ -154,7 +157,12 @@ void TRTEngineOp::Compute(OpKernelContext* context) { VLOG(2) << "enqueue returns: " << ret; // sync should be done by TF. } - +TRTEngineOp::~TRTEngineOp() { + // Order matters! + trt_execution_context_ptr_.reset(); + trt_engine_ptr_.reset(); + allocator_.reset(); +} REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h index 0964b4b18a7811..e613a71422852e 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h @@ -17,25 +17,28 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_ENGINE_OP_H_ #include -#include #include +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" + #if GOOGLE_CUDA #if GOOGLE_TENSORRT #include "cuda/include/cuda_runtime_api.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace tensorrt { class Logger; +// TODO(Sami): Remove this file? class TRTEngineOp : public OpKernel { public: explicit TRTEngineOp(OpKernelConstruction* context); void Compute(OpKernelContext* context) override; + ~TRTEngineOp(); private: template @@ -51,6 +54,8 @@ class TRTEngineOp : public OpKernel { std::vector input_nodes_; std::vector output_nodes_; + std::shared_ptr allocator_; + string serialized_engine_; }; } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index 7f3544f8cfda8d..96ccacb791e401 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -28,7 +28,7 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { public: - Logger(string name = "DefaultLogger") : name_(name){}; + Logger(string name = "DefaultLogger") : name_(name) {} void log(nvinfer1::ILogger::Severity severity, const char* msg) override; private: diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc new file mode 100644 index 00000000000000..062f86e8bb4dc7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin.cc @@ -0,0 +1,106 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include +#include +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +PluginTensorRT::PluginTensorRT(const void* serialized_data, size_t length) { + const char* buffer = static_cast(serialized_data); + size_t op_name_char_count = *reinterpret_cast(buffer); + buffer += sizeof(size_t); + buffer += op_name_char_count; + + size_t count = *reinterpret_cast(buffer); + buffer += sizeof(size_t); + + for (int i = 0; i < count; i++) { + nvinfer1::Dims dim; + std::memcpy(&(dim.nbDims), buffer, sizeof(dim.nbDims)); + buffer += sizeof(dim.nbDims); + std::memcpy(dim.d, buffer, sizeof(dim.d)); + buffer += sizeof(dim.d); + std::memcpy(dim.type, buffer, sizeof(dim.type)); + buffer += sizeof(dim.type); + input_dim_list_.emplace_back(dim); + } +} + +void PluginTensorRT::configure(const nvinfer1::Dims* inputs, int num_inputs, + const nvinfer1::Dims* outputs, int num_outputs, + int max_batch_size) { + for (int index = 0; index < num_inputs; index++) { + nvinfer1::Dims dim; + dim.nbDims = inputs[index].nbDims; + for (int i = 0; i < dim.nbDims; i++) { + dim.d[i] = inputs[index].d[i]; + dim.type[i] = inputs[index].type[i]; + } + input_dim_list_.emplace_back(dim); + } +} + +size_t PluginTensorRT::getSerializationSize() { + nvinfer1::Dims dim; + return sizeof(size_t) + GetPluginName().size() + + sizeof(input_dim_list_.size()) + sizeof(dim.nbDims) + sizeof(dim.d) + + sizeof(dim.type); +} + +void PluginTensorRT::serialize(void* serialized_data) { + size_t op_name_size = GetPluginName().size(); + char* buffer = static_cast(serialized_data); + std::memcpy(buffer, &op_name_size, sizeof(size_t)); + buffer += sizeof(size_t); + + std::memcpy(buffer, GetPluginName().data(), op_name_size); + buffer += op_name_size; + + auto list_size = input_dim_list_.size(); + std::memcpy(buffer, &list_size, sizeof(input_dim_list_.size())); + buffer += sizeof(input_dim_list_.size()); + + for (int i = 0; i < input_dim_list_.size(); i++) { + auto dim = input_dim_list_[i]; + std::memcpy(buffer, &(dim.nbDims), sizeof(dim.nbDims)); + buffer += sizeof(dim.nbDims); + std::memcpy(buffer, dim.d, sizeof(dim.d)); + buffer += sizeof(dim.d); + std::memcpy(buffer, dim.type, sizeof(dim.type)); + buffer += sizeof(dim.type); + } +} + +bool PluginTensorRT::StoreAttribute(const string& key, const void* ptr, + const size_t size) { + if (attr_map_.count(key) != 0) return false; + + attr_map_.emplace(key, std::vector(size)); + std::memcpy(attr_map_[key].data(), ptr, size); + return true; +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin.h new file mode 100644 index 00000000000000..754920b60ca743 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin.h @@ -0,0 +1,74 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_ + +#include +#include +#include + +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +// A wrapper class for TensorRT plugin +// User application should inherit from this class to write custom kernels. +// Allows user to insert custom op in TensorRT engine +// To register plugin in converter, user should also register custom +// PluginDeserializeFunc & PluginConstructFunc through PluginFactoryTensorRT +class PluginTensorRT : public nvinfer1::IPlugin { + public: + PluginTensorRT() {} + PluginTensorRT(const void* serialized_data, size_t length); + + virtual const string& GetPluginName() const = 0; + + virtual bool Finalize() = 0; + + virtual bool SetAttribute(const string& key, const void* ptr, + const size_t size) = 0; + virtual bool GetAttribute(const string& key, const void** ptr, + size_t* size) const = 0; + + void configure(const nvinfer1::Dims* inputs, int num_inputs, + const nvinfer1::Dims* outputs, int num_outputs, + int max_batch_size) override; + + virtual bool StoreAttribute(const string& key, const void* ptr, + const size_t size); + + size_t getSerializationSize() override; + + void serialize(void* buffer) override; + + protected: + std::unordered_map > attr_map_; + + std::vector input_dim_list_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_H_ diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc new file mode 100644 index 00000000000000..2bc591484dcaf5 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.cc @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name, + const void* serial_data, + size_t serial_length) { + size_t parsed_byte = 0; + // extract op_name from serial_data + string encoded_op_name = + ExtractOpName(serial_data, serial_length, &parsed_byte); + + if (!IsPlugin(encoded_op_name)) { + return nullptr; + } + + tensorflow::mutex_lock lock(instance_m_); + auto plugin_ptr = + plugin_registry_[encoded_op_name].first(serial_data, serial_length); + owned_plugins_.emplace_back(plugin_ptr); + + return plugin_ptr; +} + +PluginTensorRT* PluginFactoryTensorRT::CreatePlugin(const string& op_name) { + if (!IsPlugin(op_name)) return nullptr; + + tensorflow::mutex_lock lock(instance_m_); + auto plugin_ptr = plugin_registry_[op_name].second(); + owned_plugins_.emplace_back(plugin_ptr); + + return plugin_ptr; +} + +bool PluginFactoryTensorRT::RegisterPlugin( + const string& op_name, PluginDeserializeFunc deserialize_func, + PluginConstructFunc construct_func) { + if (IsPlugin(op_name)) return false; + + tensorflow::mutex_lock lock(instance_m_); + auto ret = plugin_registry_.emplace( + op_name, std::make_pair(deserialize_func, construct_func)); + + return ret.second; +} + +void PluginFactoryTensorRT::DestroyPlugins() { + tensorflow::mutex_lock lock(instance_m_); + for (auto& owned_plugin_ptr : owned_plugins_) { + owned_plugin_ptr.release(); + } + owned_plugins_.clear(); +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h new file mode 100644 index 00000000000000..bbae9fb65c22cf --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_ + +#include +#include + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +class PluginFactoryTensorRT : public nvinfer1::IPluginFactory { + public: + // TODO(aaroey): this static method has to be inlined to make the singleton a + // unique global symbol. Find a way to fix it. + static PluginFactoryTensorRT* GetInstance() { + static PluginFactoryTensorRT* factory_instance = + new PluginFactoryTensorRT(); + return factory_instance; + } + + // Deserialization method + PluginTensorRT* createPlugin(const char* layer_name, const void* serial_data, + size_t serial_length) override; + + // Plugin construction, PluginFactoryTensorRT owns the plugin. + PluginTensorRT* CreatePlugin(const string& op_name); + + bool RegisterPlugin(const string& op_name, + PluginDeserializeFunc deserialize_func, + PluginConstructFunc construct_func); + + bool IsPlugin(const string& op_name) { + return plugin_registry_.find(op_name) != plugin_registry_.end(); + } + + size_t CountOwnedPlugins() { return owned_plugins_.size(); } + + void DestroyPlugins(); + + protected: + std::unordered_map> + plugin_registry_; + + // TODO(jie): Owned plugin should be associated with different sessions; + // should really hand ownership of plugins to resource management; + std::vector> owned_plugins_; + tensorflow::mutex instance_m_; +}; + +class TrtPluginRegistrar { + public: + TrtPluginRegistrar(const string& name, PluginDeserializeFunc deserialize_func, + PluginConstructFunc construct_func) { + auto factory = PluginFactoryTensorRT::GetInstance(); + QCHECK(factory->RegisterPlugin(name, deserialize_func, construct_func)) + << "Failed to register plugin: " << name; + } +}; + +#define REGISTER_TRT_PLUGIN(name, deserialize_func, construct_func) \ + REGISTER_TRT_PLUGIN_UNIQ_HELPER(__COUNTER__, name, deserialize_func, \ + construct_func) +#define REGISTER_TRT_PLUGIN_UNIQ_HELPER(ctr, name, deserialize_func, \ + construct_func) \ + REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func, construct_func) +#define REGISTER_TRT_PLUGIN_UNIQ(ctr, name, deserialize_func, construct_func) \ + static ::tensorflow::tensorrt::TrtPluginRegistrar trt_plugin_registrar##ctr \ + TF_ATTRIBUTE_UNUSED = ::tensorflow::tensorrt::TrtPluginRegistrar( \ + name, deserialize_func, construct_func) + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_FACTORY_H_ diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc new file mode 100644 index 00000000000000..129bdcdbc2f8d9 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_factory_test.cc @@ -0,0 +1,125 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { +namespace test { + +class StubPlugin : public PluginTensorRT { + public: + static const char* kPluginName; + + StubPlugin() : plugin_name_(kPluginName) {} + + StubPlugin(const void* serialized_data, size_t length) + : PluginTensorRT(serialized_data, length) {} + + const string& GetPluginName() const override { return plugin_name_; } + + bool Finalize() override { return true; } + + bool SetAttribute(const string& key, const void* ptr, + const size_t size) override { + return true; + } + + bool GetAttribute(const string& key, const void** ptr, + size_t* size) const override { + return true; + } + + int getNbOutputs() const override { return 1; } + + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nbInputDims) override { + return inputs[0]; + } + + int initialize() override { return 0; } + + void terminate() override {} + + size_t getWorkspaceSize(int maxBatchSize) const override { return 0; } + + int enqueue(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream) override { + return 0; + } + + private: + const string plugin_name_; +}; + +const char* StubPlugin::kPluginName = "StubPlugin"; + +StubPlugin* CreateStubPlugin() { return new StubPlugin(); } + +StubPlugin* CreateStubPluginDeserialize(const void* serialized_data, + size_t length) { + return new StubPlugin(serialized_data, length); +} + +class TrtPluginFactoryTest : public ::testing::Test { + public: + bool RegisterStubPlugin() { + if (PluginFactoryTensorRT::GetInstance()->IsPlugin( + StubPlugin::kPluginName)) { + return true; + } + return PluginFactoryTensorRT::GetInstance()->RegisterPlugin( + StubPlugin::kPluginName, CreateStubPluginDeserialize, CreateStubPlugin); + } +}; + +TEST_F(TrtPluginFactoryTest, Registration) { + EXPECT_FALSE( + PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName)); + EXPECT_TRUE(RegisterStubPlugin()); + + ASSERT_TRUE( + PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName)); +} + +TEST_F(TrtPluginFactoryTest, CreationDeletion) { + EXPECT_TRUE(RegisterStubPlugin()); + ASSERT_TRUE( + PluginFactoryTensorRT::GetInstance()->IsPlugin(StubPlugin::kPluginName)); + + PluginFactoryTensorRT::GetInstance()->DestroyPlugins(); + ASSERT_TRUE(PluginFactoryTensorRT::GetInstance()->CreatePlugin( + StubPlugin::kPluginName)); + ASSERT_EQ(1, PluginFactoryTensorRT::GetInstance()->CountOwnedPlugins()); + PluginFactoryTensorRT::GetInstance()->DestroyPlugins(); + ASSERT_EQ(0, PluginFactoryTensorRT::GetInstance()->CountOwnedPlugins()); +} + +} // namespace test +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc new file mode 100644 index 00000000000000..a8f60886c03c17 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.cc @@ -0,0 +1,42 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h" +#include + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { + +string ExtractOpName(const void* serial_data, size_t serial_length, + size_t* incremental) { + size_t op_name_char_count = *static_cast(serial_data); + *incremental = sizeof(size_t) + op_name_char_count; + + assert(serial_length >= *incremental); + + const char* buffer = static_cast(serial_data) + sizeof(size_t); + string op_name(buffer, op_name_char_count); + + return op_name; +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT diff --git a/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h new file mode 100644 index 00000000000000..274ce42fec9283 --- /dev/null +++ b/tensorflow/contrib/tensorrt/plugin/trt_plugin_utils.h @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_ + +#include + +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +typedef std::function + PluginDeserializeFunc; + +typedef std::function PluginConstructFunc; + +// TODO(jie): work on error handling here +string ExtractOpName(const void* serial_data, size_t serial_length, + size_t* incremental); + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_CONTRIB_TENSORRT_PLUGIN_TRT_PLUGIN_UTILS_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc new file mode 100644 index 00000000000000..0f0508331c1305 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" + +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +#if NV_TENSORRT_MAJOR > 2 +#include "cuda/include/cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { +void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment, + uint32_t flags) { + assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. + void* memory; + cudaMalloc(&memory, size); + return memory; +} + +void TRTCudaAllocator::free(void* memory) { cudaFree(memory); } + +void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment, + uint32_t flags) { + assert((alignment & (alignment - 1)) == 0); // zero or a power of 2. + void* mem = allocator_->AllocateRaw(alignment, size); + VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment + << " @ " << mem; + return mem; +} + +TRTDeviceAllocator::TRTDeviceAllocator(tensorflow::Allocator* allocator) + : allocator_(allocator) { + VLOG(1) << "Using " << allocator->Name() << " allocator from TensorFlow"; +} + +void TRTDeviceAllocator::free(void* memory) { + VLOG(2) << "Deallocating " << memory; + allocator_->DeallocateRaw(memory); +} + +} // namespace tensorrt +} // namespace tensorflow + +#endif +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator.h b/tensorflow/contrib/tensorrt/resources/trt_allocator.h new file mode 100644 index 00000000000000..a0c2540a7698bc --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h @@ -0,0 +1,68 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ + + +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/core/framework/allocator.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" + +#if NV_TENSORRT_MAJOR == 3 +// Define interface here temporarily until TRT 4.0 is released +namespace nvinfer1 { +class IGpuAllocator { + public: + virtual void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0; + virtual void free(void* memory) = 0; +}; +} // namespace nvinfer1 +#endif + +namespace tensorflow { +namespace tensorrt { + +class TRTCudaAllocator : public nvinfer1::IGpuAllocator { + // Allocator implementation that is using cuda allocator instead of device + // allocator in case we can't get device allocator from TF. + public: + TRTCudaAllocator() {} + virtual ~TRTCudaAllocator() {} + void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override; + void free(void* memory) override; +}; + +class TRTDeviceAllocator : public nvinfer1::IGpuAllocator { + // Allocator implementation wrapping TF device allocators. + public: + TRTDeviceAllocator(tensorflow::Allocator* allocator); + virtual ~TRTDeviceAllocator() {} + void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override; + void free(void* memory) override; + + private: + tensorflow::Allocator* allocator_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h index 3c85968ae7acf5..e3469124acd4b9 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_resources.h +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -13,20 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ -#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_ #include #include #include #include #include + #include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h" +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" #include "tensorflow/core/framework/resource_mgr.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" + #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -40,6 +43,11 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { engine_(nullptr), logger_(nullptr), thr_(nullptr) {} + + ~TRTCalibrationResource() { + VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); + } + string DebugString() override { std::stringstream oss; oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl @@ -47,16 +55,17 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { << " Network = " << std::hex << network_ << std::dec << std::endl << " Engine = " << std::hex << engine_ << std::dec << std::endl << " Logger = " << std::hex << logger_ << std::dec << std::endl + << " Allocator = " << std::hex << allocator_.get() << std::dec + << std::endl << " Thread = " << std::hex << thr_ << std::dec << std::endl; return oss.str(); } - ~TRTCalibrationResource() { - VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); - } + TRTInt8Calibrator* calibrator_; nvinfer1::IBuilder* builder_; nvinfer1::INetworkDefinition* network_; nvinfer1::ICudaEngine* engine_; + std::shared_ptr allocator_; tensorflow::tensorrt::Logger* logger_; // TODO(sami): Use threadpool threads! std::thread* thr_; @@ -65,31 +74,28 @@ class TRTCalibrationResource : public tensorflow::ResourceBase { class TRTWeightStore : public tensorflow::ResourceBase { public: TRTWeightStore() {} - std::list> store_; + + virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } + string DebugString() override { std::stringstream oss; - size_t lenBytes = 0; + size_t len_bytes = 0; for (const auto& v : store_) { - lenBytes += v.size() * sizeof(uint8_t); + len_bytes += v.size() * sizeof(uint8_t); } oss << " Number of entries = " << store_.size() << std::endl << " Total number of bytes = " - << store_.size() * sizeof(std::vector) + lenBytes << std::endl; + << store_.size() * sizeof(std::vector) + len_bytes + << std::endl; return oss.str(); } - virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } -}; -class TRTEngineResource : public tensorflow::ResourceBase { - public: - TRTEngineResource() : runtime_(nullptr), ctx_(nullptr){}; - string DebugString() override { return string(""); } - nvinfer1::IRuntime* runtime_; - nvinfer1::IExecutionContext* ctx_; + std::list> store_; }; } // namespace tensorrt } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ + #endif #endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCES_H_ diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 8fc4697c513057..cc42913ecadc3e 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -25,18 +25,239 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { namespace tensorrt { namespace segment { +using ::tensorflow::strings::StrAppend; +// A simple graph representation to mirror tensorflow::Graph. This structure +// helps saving memory since segmenter modifies the graph in place, preventing +// the need to create a copy of the graph. It is composed of edges and nodes. +// Nodes keep pointers to original TF nodes. +class SimpleNode; +class SimpleGraph; +class SimpleEdge { + public: + SimpleEdge(int id, SimpleNode* src, int src_port, SimpleNode* dst, + int dst_port, bool is_control = false) + : id_(id), + src_(src), + src_port_(src_port), + dst_(dst), + dst_port_(dst_port), + control_(is_control) {} + ~SimpleEdge() {} + + SimpleNode* src() const { return src_; } + SimpleNode* dst() const { return dst_; } + int src_output() const { return src_port_; } + int dst_input() const { return dst_port_; } + int id() const { return id_; } + bool IsControlEdge() const { return control_; } + + private: + int id_; + SimpleNode* src_; + int src_port_; + SimpleNode* dst_; + int dst_port_; + bool control_; +}; + +class SimpleNode { + public: + SimpleNode(const tensorflow::Node* node, const int id); + + const std::vector& in_edges() const { return in_edges_; } + const std::vector& out_edges() const { return out_edges_; } + std::vector in_nodes() const { + std::vector res; + res.reserve(in_edges_.size()); + for (const auto e : in_edges_) { + if (e) res.push_back(e->src()); + } + return res; + } + const string& name() const { return node_->name(); } + const tensorflow::Node* tf_node() const { return node_; } + int id() const { return id_; } + + private: + const tensorflow::Node* node_; + std::vector in_edges_; + std::vector out_edges_; + int id_; + + friend class SimpleGraph; +}; + +class SimpleGraph { + public: + explicit SimpleGraph(const tensorflow::Graph* g); + ~SimpleGraph(); + + void AddControlEdge(SimpleNode* src, SimpleNode* dst); + void AddEdge(SimpleNode* src, int out_port, SimpleNode* dst, int in_port); + void RemoveEdge(const SimpleEdge*); + SimpleNode* FindNodeId(int node_id) { + if (node_id < 0 || node_id > static_cast(nodes_.size())) { + return nullptr; + } + return nodes_[node_id]; + } + int num_node_ids() const { return nodes_.size(); } + const SimpleNode* source_node() const { + return nodes_[tensorflow::Graph::kSourceId]; + } + const SimpleNode* sink_node() const { + return nodes_[tensorflow::Graph::kSinkId]; + } + + private: + const tensorflow::Graph* g_; + std::vector nodes_; + std::vector edges_; + // free_edge_ids_ and free_node_ids_ contain freed indices. + std::set free_edge_ids_; + std::set free_node_ids_; +}; + +SimpleNode::SimpleNode(const tensorflow::Node* node, const int id) + : node_(node), id_(id) { + if (node_) { + in_edges_.reserve(node_->in_edges().size()); + out_edges_.reserve(node_->out_edges().size()); + } +} + +SimpleGraph::SimpleGraph(const tensorflow::Graph* g) : g_(g) { + int n_nodes = g_->num_node_ids(); + nodes_.resize(n_nodes, nullptr); + nodes_[g->kSourceId] = new SimpleNode(g->source_node(), g->kSourceId); + nodes_[g->kSinkId] = new SimpleNode(g->sink_node(), g->kSinkId); + int n_edges = g->num_edge_ids(); + edges_.resize(n_edges, nullptr); + for (int i = 2; i < n_nodes; i++) { + const auto n = g->FindNodeId(i); + if (n) { + nodes_[i] = new SimpleNode(n, i); + } else { + free_node_ids_.insert(i); + } + } + for (int i = 0; i < n_edges; i++) { + const auto e = g->FindEdgeId(i); + if (e) { + const auto tfsrc = e->src(); + const auto tfdst = e->dst(); + bool is_control = e->IsControlEdge(); + auto src = nodes_[tfsrc->id()]; + auto dst = nodes_[tfdst->id()]; + auto edge = new SimpleEdge(i, src, e->src_output(), dst, e->dst_input(), + is_control); + edges_[i] = edge; + src->out_edges_.push_back(edge); + dst->in_edges_.push_back(edge); + } else { + free_edge_ids_.insert(i); + } + } +} + +void SimpleGraph::AddEdge(SimpleNode* src, int out_port, SimpleNode* dst, + int in_port) { + int i = edges_.size(); + if (!free_edge_ids_.empty()) { + auto it = free_edge_ids_.begin(); + i = *it; + free_edge_ids_.erase(it); + } else { + edges_.push_back(nullptr); + } + bool is_control = (out_port == tensorflow::Graph::kControlSlot); + is_control |= (in_port == tensorflow::Graph::kControlSlot); + auto edge = new SimpleEdge(i, src, out_port, dst, in_port, is_control); + edges_[i] = edge; + src->out_edges_.push_back(edge); + dst->in_edges_.push_back(edge); +} + +void SimpleGraph::AddControlEdge(SimpleNode* src, SimpleNode* dst) { + AddEdge(src, tensorflow::Graph::kControlSlot, dst, + tensorflow::Graph::kControlSlot); +} + +void SimpleGraph::RemoveEdge(const SimpleEdge* edge) { + auto src = edge->src(); + auto dst = edge->dst(); + for (auto it = src->out_edges_.begin(); it != src->out_edges_.end(); ++it) { + if (*it == edge) { + src->out_edges_.erase(it); + break; + } + } + for (auto it = dst->in_edges_.begin(); it != dst->in_edges_.end(); ++it) { + if (*it == edge) { + dst->in_edges_.erase(it); + break; + } + } +} + +SimpleGraph::~SimpleGraph() { + for (auto x : nodes_) delete x; + for (auto x : edges_) delete x; +} namespace { -bool CanContractEdge(const tensorflow::Edge* edge, - const tensorflow::Graph& graph) { - const tensorflow::Node* src = edge->src(); - const tensorflow::Node* dst = edge->dst(); +bool CheckCycles(const std::unique_ptr& g, const SimpleNode* src, + const std::vector& start) { + // copied from TF ReverseDFS. + struct Work { + SimpleNode* node; + bool leave; // Are we entering or leaving n? + }; + + std::vector stack(start.size()); + for (int i = 0; i < start.size(); ++i) { + stack[i] = Work{start[i], false}; + } + + std::vector visited(g->num_node_ids(), false); + while (!stack.empty()) { + Work w = stack.back(); + stack.pop_back(); + + auto n = w.node; + if (w.leave) { + if (n == src) { + return true; + } + continue; + } + + if (visited[n->id()]) continue; + visited[n->id()] = true; + // Arrange to call leave(n) when all done with descendants. + stack.push_back(Work{n, true}); + + auto nodes = n->in_nodes(); + for (const auto node : nodes) { + if (!visited[node->id()]) { + stack.push_back(Work{node, false}); + } + } + } + return false; +} + +bool CanContractEdge(const SimpleEdge* edge, + const std::unique_ptr& graph) { + const auto src = edge->src(); + const auto dst = edge->dst(); // Can't contract edge if doing so would cause a cycle in the // graph. So, if there is a directed path from 'src' to 'dst', other @@ -48,46 +269,38 @@ bool CanContractEdge(const tensorflow::Edge* edge, // 1. Get all nodes incoming to 'dst', excluding 'src' // 2. Reverse DFS from those nodes // 3. If reverse DFS reaches 'src' then we have a cycle - std::vector dfs_start_nodes; - for (tensorflow::Node* node : dst->in_nodes()) { + std::vector dfs_start_nodes; + for (SimpleNode* node : dst->in_nodes()) { if (node != src) { dfs_start_nodes.push_back(node); } } - bool is_cycle = false; - if (!dfs_start_nodes.empty()) { - tensorflow::ReverseDFSFrom(graph, dfs_start_nodes, {}, - [&is_cycle, src](tensorflow::Node* node) { - if (node == src) { - is_cycle = true; - } - }); - } - + bool is_cycle = CheckCycles(graph, src, dfs_start_nodes); return !is_cycle; } +} // namespace -void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, - std::vector* remove_edges) { +void ContractEdge(SimpleEdge* edge, SimpleGraph* graph, + std::vector* remove_edges) { // Transfer all inputs and outputs of 'dst' to 'src' except edges // connecting the two. - tensorflow::Node* src = edge->src(); - tensorflow::Node* dst = edge->dst(); + auto src = edge->src(); + auto dst = edge->dst(); // We can use '0' for input/output index because we don't need them // to be accurate for the way we are using the graph. - std::vector in_edges(dst->in_edges().begin(), - dst->in_edges().end()); - for (const tensorflow::Edge* in_edge : in_edges) { + std::vector in_edges(dst->in_edges().begin(), + dst->in_edges().end()); + for (const SimpleEdge* in_edge : in_edges) { if (in_edge->IsControlEdge()) { if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); + SimpleEdge* e = const_cast(in_edge); graph->AddControlEdge(e->src(), src); } } else { if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); + SimpleEdge* e = const_cast(in_edge); if (e->src() == graph->source_node()) { graph->AddEdge(e->src(), e->src_output(), src, tensorflow::Graph::kControlSlot); @@ -98,14 +311,14 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, } } - std::vector out_edges(dst->out_edges().begin(), - dst->out_edges().end()); - for (const tensorflow::Edge* out_edge : out_edges) { + std::vector out_edges(dst->out_edges().begin(), + dst->out_edges().end()); + for (const SimpleEdge* out_edge : out_edges) { if (out_edge->IsControlEdge()) { - tensorflow::Edge* e = const_cast(out_edge); + SimpleEdge* e = const_cast(out_edge); graph->AddControlEdge(src, e->dst()); } else { - tensorflow::Edge* e = const_cast(out_edge); + SimpleEdge* e = const_cast(out_edge); if (e->dst() == graph->sink_node()) { VLOG(1) << " edge to sink node " << src->name() << " -> " << e->dst()->name(); @@ -128,8 +341,6 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, } } -} // namespace - tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, const std::function& candidate_fn, @@ -140,17 +351,22 @@ tensorflow::Status SegmentGraph( tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( tensorflow::GraphConstructorOptions(), gdef, &graph)); + return SegmentGraph(&graph, candidate_fn, options, segments); +} - // tensorflow::DumpGraph("Pre-Segment", &graph); - +tensorflow::Status SegmentGraph( + tensorflow::Graph* tf_graph, + const std::function& candidate_fn, + const SegmentOptions& options, SegmentNodesVector* segments) { + auto graph = std::unique_ptr(new SimpleGraph(tf_graph)); // Use a union-find to collect the nodes that belong to the same - // segment. A node value of nullptr indicates that the node is not a - // candidate for TRT. - std::vector> node_segments; - for (int i = 0; i < graph.num_node_ids(); ++i) { - tensorflow::Node* node = graph.FindNodeId(i); + // segment. A node value of nullptr indicates that the node is not a candidate + // for TRT. + std::vector> node_segments; + for (int i = 0; i < graph->num_node_ids(); ++i) { + SimpleNode* node = graph->FindNodeId(i); if (options.exclude_node_list.count(node->name()) != 0 || - !candidate_fn(node)) { + !candidate_fn(node->tf_node())) { node = nullptr; } node_segments.emplace_back(node); @@ -164,10 +380,16 @@ tensorflow::Status SegmentGraph( // a measure of how beneficial it is to include a given node in a // TRT subgraph then we can revisit this algorithm to take advantage // of that information. - std::vector order; - tensorflow::GetPostOrder(graph, &order); - - for (const tensorflow::Node* node : order) { + std::vector tforder; + tensorflow::GetPostOrder(*tf_graph, &tforder); + // use postorder implementation from tensorflow and construct mirror in + // internal format + std::vector order; + order.reserve(tforder.size()); + for (const auto tfnode : tforder) { + order.push_back(graph->FindNodeId(tfnode->id())); + } + for (const SimpleNode* node : order) { // All output nodes of 'node' have been visited... VLOG(2) << "Trying node " << node->name() << " id=" << node->id(); @@ -181,8 +403,8 @@ tensorflow::Status SegmentGraph( // nodes. Iterate since combining two nodes may unblock other // combining. while (true) { - std::set contract_edges; - for (const tensorflow::Edge* out_edge : node->out_edges()) { + std::set contract_edges; + for (const SimpleEdge* out_edge : node->out_edges()) { VLOG(2) << "... out node " << out_edge->dst()->name() << " ( " << out_edge->dst()->id() << " <- " << node->id() << " )"; if (out_edge->IsControlEdge()) { @@ -210,9 +432,9 @@ tensorflow::Status SegmentGraph( // Contract edges and collect the adjacent nodes into the same // segment/subgraph. while (!contract_edges.empty()) { - const tensorflow::Edge* contract_edge = *contract_edges.begin(); - const tensorflow::Node* src = contract_edge->src(); - const tensorflow::Node* dst = contract_edge->dst(); + const SimpleEdge* contract_edge = *contract_edges.begin(); + const SimpleNode* src = contract_edge->src(); + const SimpleNode* dst = contract_edge->dst(); VLOG(2) << "Merge " << src->name() << " <- " << dst->name() << " (" << src->id() << " <- " << dst->id(); @@ -221,13 +443,13 @@ tensorflow::Status SegmentGraph( // Contracting the edge leaves disconnected graph edges. // Remove these from the graph and from 'contract_edges' so we // don't visit them again. - tensorflow::Edge* e = const_cast(contract_edge); - std::vector remove_edges; - ContractEdge(e, &graph, &remove_edges); + SimpleEdge* e = const_cast(contract_edge); + std::vector remove_edges; + ContractEdge(e, graph.get(), &remove_edges); - for (const tensorflow::Edge* r : remove_edges) { + for (const SimpleEdge* r : remove_edges) { contract_edges.erase(r); - graph.RemoveEdge(r); + graph->RemoveEdge(r); } } } @@ -236,9 +458,27 @@ tensorflow::Status SegmentGraph( // Collect the segments/subgraphs. Each subgraph is represented by a // set of the names of the nodes in that subgraph. std::unordered_map> sg_map; + std::unordered_map> device_maps; for (auto& u : node_segments) { if ((u.Value() != nullptr) && (u.ParentValue() != nullptr)) { sg_map[u.ParentValue()->name()].insert(u.Value()->name()); + auto tf_node = u.Value()->tf_node(); + // has_assigned_device_name() is expected to return true + // when called from optimization pass. However, since graph + // is converted back and forth between graph and graphdef, + // assigned devices demoted to requested devices. If the graph + // is passed directly to this module, assigned devices will be set. + if (tf_node->has_assigned_device_name()) { + device_maps[u.ParentValue()->name()].insert( + tf_node->assigned_device_name()); + } else if (!tf_node->requested_device().empty()) { + device_maps[u.ParentValue()->name()].insert( + tf_node->requested_device()); + } else { + VLOG(1) << "Node " << tf_node->name() + << " has no device assigned requested device is: " + << tf_node->requested_device(); + } } } @@ -260,10 +500,35 @@ tensorflow::Status SegmentGraph( << segment_node_names.size() << " nodes, dropping"; continue; } - - segments->emplace_back(segment_node_names); + // TODO(sami): Make segmenter placement aware once trtscopes are in place + const auto& dev_itr = device_maps.find(itr.first); + if (dev_itr == device_maps.end() || dev_itr->second.empty()) { + VLOG(1) << "No device assigned to segment " << segments->size(); + segments->emplace_back(std::make_pair(segment_node_names, string())); + } else if (dev_itr->second.size() > 1) { + string s("Segment "); + StrAppend(&s, segments->size(), " has multiple devices attached: "); + for (const auto& dev : dev_itr->second) { + StrAppend(&s, dev, ", "); + } + LOG(WARNING) << s << " choosing " << *(dev_itr->second.begin()); + segments->emplace_back( + std::make_pair(segment_node_names, *(dev_itr->second.begin()))); + } else { + segments->emplace_back( + std::make_pair(segment_node_names, *(dev_itr->second.begin()))); + } + } + if (VLOG_IS_ON(1)) { + for (const auto& d : device_maps) { + string s("Segment "); + StrAppend(&s, ": '", d.first, "' "); + for (const auto& dd : d.second) { + StrAppend(&s, dd, ", "); + } + VLOG(1) << "Devices " << s; + } } - return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h index 7e8685f44a8c8a..1568dd915344e6 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.h +++ b/tensorflow/contrib/tensorrt/segment/segment.h @@ -29,7 +29,9 @@ namespace tensorflow { namespace tensorrt { namespace segment { -using SegmentNodesVector = std::vector>; +// vector of segments, each entry contains a device name and a set of nodes in +// segment +using SegmentNodesVector = std::vector, string>>; struct SegmentOptions { // Segment must contain at least this many nodes. @@ -51,6 +53,20 @@ tensorflow::Status SegmentGraph( const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments); +// Get the subgraphs of a graph that can be handled by TensorRT. +// +// @param graph tensorflow::Graph of the network +// @param candidate_fn A function that returns true for a Node* if +// that node can be handled by TensorRT. +// @param segments Returns the TensorRT segments/subgraphs. Each entry +// in the vector describes a subgraph by giving a set of the names of +// all the NodeDefs in that subgraph. +// @return the status. +tensorflow::Status SegmentGraph( + tensorflow::Graph* tf_graph, + const std::function& candidate_fn, + const SegmentOptions& options, SegmentNodesVector* segments); + } // namespace segment } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/segment/segment_test.cc b/tensorflow/contrib/tensorrt/segment/segment_test.cc index 6f7655fcabeec0..2de3923b06a8dd 100644 --- a/tensorflow/contrib/tensorrt/segment/segment_test.cc +++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc @@ -34,7 +34,7 @@ class SegmentTest : public ::testing::Test { TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name); - std::function MakeCandidateFn( + std::function MakeCandidateFn( const std::set& node_names); protected: @@ -59,9 +59,9 @@ bool SegmentTest::GetGraphDef(TF_Graph* graph, return ret; } -std::function SegmentTest::MakeCandidateFn( +std::function SegmentTest::MakeCandidateFn( const std::set& node_names) { - return [node_names](const Node* node) -> bool { + return [node_names](const tensorflow::Node* node) -> bool { return node_names.find(node->name()) != node_names.end(); }; } @@ -164,7 +164,7 @@ TEST_F(SegmentTest, Simple) { ASSERT_EQ(segments.size(), 1); std::vector expected{"add0", "add1", "add2", "add3", "add4"}; for (const auto& ex : expected) { - EXPECT_TRUE(segments[0].find(ex) != segments[0].end()) + EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end()) << "Missing expected node " << ex; } TF_DeleteGraph(graph); @@ -277,13 +277,13 @@ TEST_F(SegmentTest, Multiple) { std::vector expected0{"add0", "add1", "add2", "add3"}; for (const auto& ex : expected0) { - EXPECT_TRUE(segments[0].find(ex) != segments[0].end()) + EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end()) << "Missing expected node " << ex; } std::vector expected1{"add6", "add8"}; for (const auto& ex : expected1) { - EXPECT_TRUE(segments[1].find(ex) != segments[1].end()) + EXPECT_TRUE(segments[1].first.find(ex) != segments[1].first.end()) << "Missing expected node " << ex; } TF_DeleteGraph(graph); @@ -347,13 +347,13 @@ TEST_F(SegmentTest, BigIfElse) { std::vector expected0{"add3", "add4", "add5", "add6", "add7"}; for (const auto& ex : expected0) { - EXPECT_TRUE(segments[0].find(ex) != segments[0].end()) + EXPECT_TRUE(segments[0].first.find(ex) != segments[0].first.end()) << "Missing expected node " << ex; } std::vector expected1{"add0", "add1"}; for (const auto& ex : expected1) { - EXPECT_TRUE(segments[1].find(ex) != segments[1].end()) + EXPECT_TRUE(segments[1].first.find(ex) != segments[1].first.end()) << "Missing expected node " << ex; } TF_DeleteGraph(graph); diff --git a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc index 8b475177bc670d..f36495f6b69ecb 100644 --- a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc +++ b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/tensorrt/shape_fn/trt_shfn.h" +#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" #include #include @@ -33,7 +34,8 @@ tensorflow::Status TRTEngineOpShapeInference(InferenceContext* context) { TF_RETURN_IF_ERROR(context->GetAttr("serialized_engine", &serialized_engine)); nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); nvinfer1::ICudaEngine* trt_engine = infer->deserializeCudaEngine( - serialized_engine.c_str(), serialized_engine.size(), nullptr); + serialized_engine.c_str(), serialized_engine.size(), + tensorrt::PluginFactoryTensorRT::GetInstance()); int num_batch = -1; std::vector<::tensorflow::DataType> input_type; diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index ad01bedd8fa066..175ccd80068625 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -18,7 +18,9 @@ from __future__ import division from __future__ import print_function +import argparse import numpy as np + # normally we should do import tensorflow as tf and then # tf.placeholder, tf.constant, tf.nn.conv2d etc but # it looks like internal builds don't like it so @@ -26,6 +28,7 @@ from tensorflow.contrib import tensorrt as trt from tensorflow.core.protobuf import config_pb2 as cpb2 +from tensorflow.core.protobuf import rewriter_config_pb2 as rwpb2 from tensorflow.python.client import session as csess from tensorflow.python.framework import constant_op as cop from tensorflow.python.framework import dtypes as dtypes @@ -59,9 +62,11 @@ def get_simple_graph_def(): return g.as_graph_def() -def run_graph(gdef, dumm_inp): +def execute_graph(gdef, dumm_inp): """Run given graphdef once.""" + print("executing") gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + sessconfig = cpb2.ConfigProto(gpu_options=gpu_options) ops.reset_default_graph() g = ops.Graph() with g.as_default(): @@ -69,15 +74,14 @@ def run_graph(gdef, dumm_inp): graph_def=gdef, return_elements=["input", "output"]) inp = inp.outputs[0] out = out.outputs[0] - with csess.Session( - config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: + with csess.Session(config=sessconfig, graph=g) as sess: val = sess.run(out, {inp: dumm_inp}) return val # Use real data that is representative of the inference dataset # for calibration. For this test script it is random data. -def run_calibration(gdef, dumm_inp): +def execute_calibration(gdef, dumm_inp): """Run given calibration graph multiple times.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() @@ -96,7 +100,9 @@ def run_calibration(gdef, dumm_inp): return val -if "__main__" in __name__: +def user(run_graph=execute_graph, run_calibration=execute_calibration): + """Example function that converts a graph to TFTRT graph.""" + inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) orig_graph = get_simple_graph_def() # use a frozen graph for inference @@ -137,3 +143,51 @@ def run_calibration(gdef, dumm_inp): assert np.allclose(o1, o4) assert np.allclose(o1, o5) print("Pass") + + +def auto(): + """Run the conversion as an optimization pass.""" + inp_dims = (100, 24, 24, 2) + dummy_input = np.random.random_sample(inp_dims) + orig_graph = get_simple_graph_def() + opt_config = rwpb2.RewriterConfig() + opt_config.optimizers.extend(["constfold", "layout"]) + custom_op = opt_config.custom_optimizers.add() + custom_op.name = "TensorRTOptimizer" + custom_op.parameter_map["minimum_segment_size"].i = 3 + custom_op.parameter_map["precision_mode"].s = "FP32" + custom_op.parameter_map["max_batch_size"].i = inp_dims[0] + custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25 + print(custom_op) + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + graph_options = cpb2.GraphOptions(rewrite_options=opt_config) + sessconfig = cpb2.ConfigProto( + gpu_options=gpu_options, graph_options=graph_options) + print(sessconfig) + g = ops.Graph() + ops.reset_default_graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=orig_graph, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with csess.Session(config=sessconfig, graph=g) as sess: + val = sess.run(out, {inp: dummy_input}) + print(val.shape) + + +if "__main__" in __name__: + P = argparse.ArgumentParser( + prog="tftrt_test", + description="Example utilization of TensorFlow-TensorRT integration") + P.add_argument( + "--automatic", + "-a", + action="store_true", + help="Do TRT conversion automatically", + default=False) + flags, unparsed = P.parse_known_args() + if flags.automatic: + auto() + else: + user() diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py index d426e9f12c574c..0403b652d72877 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py @@ -44,8 +44,7 @@ def setUp(self): inp_dims = (100, 24, 24, 2) self._input = np.random.random_sample(inp_dims) self._original_graph = self.get_simple_graph_def() - self._gpu_options = cpb2.GPUOptions( - per_process_gpu_memory_fraction=0.50) + self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) self._config = cpb2.ConfigProto(gpu_options=self._gpu_options) self._reference = self.run_graph(self._original_graph, self._input) @@ -60,11 +59,7 @@ def get_simple_graph_def(self): name="weights", dtype=dtypes.float32) conv = nn.conv2d( - input=a, - filter=e, - strides=[1, 2, 2, 1], - padding="SAME", - name="conv") + input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") b = cop.constant( [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) t = nn.bias_add(conv, b, name="biasAdd") @@ -85,8 +80,7 @@ def run_graph(self, gdef, dumm_inp): inp = inp.outputs[0] out = out.outputs[0] with self.test_session( - graph=g, config=self._config, use_gpu=True, - force_gpu=True) as sess: + graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: val = sess.run(out, {inp: dumm_inp}) return val @@ -104,15 +98,14 @@ def run_calibration(self, gdef, dumm_inp): # run over real calibration data here, we are mimicking a calibration # set of 30 different batches. Use as much calibration data as you want with self.test_session( - graph=g, config=self._config, use_gpu=True, - force_gpu=True) as sess: + graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: for _ in range(30): val = sess.run(out, {inp: dumm_inp}) return val def get_trt_graph(self, mode): """Return trt converted graph.""" - if mode in ["FP32", "FP16", "INT8"]: + if mode in ["FP32", "FP16", "INT8"]: return trt.create_inference_graph( input_graph_def=self._original_graph, outputs=["output"], @@ -120,7 +113,7 @@ def get_trt_graph(self, mode): max_workspace_size_bytes=1 << 25, precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8" minimum_segment_size=2 # minimum number of nodes in an engine - ) + ) return None def testFP32(self): diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index 5dd7bde2058fdf..5b9aeaa8797b92 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== -"""TPU system metdata and associated tooling.""" +"""TPU system metadata and associated tooling.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index 4b6104a8b4d542..3137bfd03e3faa 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -159,7 +159,7 @@ When the receiver receives the RDMA write, it will locate the relevant **RdmaTen * step_id - Step ID. * request_index - Request index. * remote_addr/rkey - Address/rkey of the reallocated result/proxy tensor. -* **RDMA_MESSAGE_ERROR_STATUS** - (sender ==> receiver) Notify the receiver that an error had occured on the sender side, so it can propagate it to the upper levels. +* **RDMA_MESSAGE_ERROR_STATUS** - (sender ==> receiver) Notify the receiver that an error had occurred on the sender side, so it can propagate it to the upper levels. * type - The message type. * name (name_size) - Name of the requested tensor. * step_id - Step ID. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 2dd8e6fb3155ad..3286f856db7345 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2762,6 +2762,7 @@ cc_library( ], visibility = [ "//tensorflow/compiler:__subpackages__", + "//tensorflow/core/kernels:__subpackages__", "//tensorflow/core/profiler:__subpackages__", ], deps = [":lib_internal"], @@ -3683,7 +3684,11 @@ tf_cuda_only_cc_test( ":test", ":test_main", "//third_party/eigen3", - ], + ] + if_mkl( + [ + "//third_party/mkl:intel_binary_blob", + ], + ), ) tf_cc_test_gpu( diff --git a/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt new file mode 100644 index 00000000000000..8cef243aee3a9b --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RegexFullMatch.pbtxt @@ -0,0 +1,30 @@ +op { + graph_op_name: "RegexFullMatch" + in_arg { + name: "input" + description: <

-template <typename T>
+```c++
+template 
 class ZeroOutOp : public OpKernel {
  public:
-  explicit ZeroOutOp(OpKernelConstruction\* context) : OpKernel(context) {}
- void Compute(OpKernelContext\* context) override { + explicit ZeroOutOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { // Grab the input tensor - const Tensor& input\_tensor = context->input(0); - auto input = input\_tensor.flat<T>();
+ const Tensor& input_tensor = context->input(0); + auto input = input_tensor.flat(); + // Create an output tensor Tensor* output = NULL; - OP\_REQUIRES\_OK(context, - context->allocate\_output(0, input_tensor.shape(), &output)); - auto output\_flat = output->template flat<T>();
+ OP_REQUIRES_OK(context, + context->allocate_output(0, input_tensor.shape(), &output)); + auto output_flat = output->template flat(); + // Set all the elements of the output tensor to 0 const int N = input.size(); - for (int i = 0; i < N; i++) { - output\_flat(i) = 0; - }
+ for (int i = 0; i < N; i++) { + output_flat(i) = 0; + } + // Preserve the first input value - if (N > 0) output\_flat(0) = input(0); + if (N > 0) output_flat(0) = input(0); } -};
-// Note that TypeConstraint<int32>("T") means that attr "T" (defined +}; + +// Note that TypeConstraint("T") means that attr "T" (defined // in the op registration above) must be "int32" to use this template -// instantiation.
-REGISTER\_KERNEL\_BUILDER( +// instantiation. +REGISTER_KERNEL_BUILDER( Name("ZeroOut") - .Device(DEVICE\_CPU) - .TypeConstraint<int32>("T"), - ZeroOutOp<int32>); -REGISTER\_KERNEL\_BUILDER( + .Device(DEVICE_CPU) + .TypeConstraint("T"), + ZeroOutOp); +REGISTER_KERNEL_BUILDER( Name("ZeroOut") - .Device(DEVICE\_CPU) - .TypeConstraint<float>("T"), - ZeroOutOp<float>); -REGISTER\_KERNEL\_BUILDER( + .Device(DEVICE_CPU) + .TypeConstraint("T"), + ZeroOutOp); +REGISTER_KERNEL_BUILDER( Name("ZeroOut") - .Device(DEVICE\_CPU) - .TypeConstraint<double>("T"), - ZeroOutOp<double>); -
+ .Device(DEVICE_CPU) + .TypeConstraint("T"), + ZeroOutOp); +``` If you have more than a couple overloads, you can put the registration in a macro. diff --git a/tensorflow/docs_src/extend/architecture.md b/tensorflow/docs_src/extend/architecture.md index c0fc714a4405d6..c8f522a03ab0c1 100644 --- a/tensorflow/docs_src/extend/architecture.md +++ b/tensorflow/docs_src/extend/architecture.md @@ -4,8 +4,8 @@ We designed TensorFlow for large-scale distributed training and inference, but it is also flexible enough to support experimentation with new machine learning models and system-level optimizations. -This document describes the system architecture that makes possible this -combination of scale and flexibility. It assumes that you have basic familiarity +This document describes the system architecture that makes this +combination of scale and flexibility possible. It assumes that you have basic familiarity with TensorFlow programming concepts such as the computation graph, operations, and sessions. See @{$programmers_guide/low_level_intro$this document} for an introduction to these topics. Some familiarity @@ -15,8 +15,8 @@ will also be helpful. This document is for developers who want to extend TensorFlow in some way not supported by current APIs, hardware engineers who want to optimize for TensorFlow, implementers of machine learning systems working on scaling and -distribution, or anyone who wants to look under Tensorflow's hood. After -reading it you should understand TensorFlow architecture well enough to read +distribution, or anyone who wants to look under Tensorflow's hood. By the end of this document +you should understand the TensorFlow architecture well enough to read and modify the core TensorFlow code. ## Overview @@ -35,7 +35,7 @@ This document focuses on the following layers: * **Client**: * Defines the computation as a dataflow graph. * Initiates graph execution using a [**session**]( - https://www.tensorflow.org/code/tensorflow/python/client/session.py) + https://www.tensorflow.org/code/tensorflow/python/client/session.py). * **Distributed Master** * Prunes a specific subgraph from the graph, as defined by the arguments to Session.run(). @@ -55,7 +55,7 @@ Figure 2 illustrates the interaction of these components. "/job:worker/task:0" a server": a task responsible for storing and updating the model's parameters. Other tasks send updates to these parameters as they work on optimizing the parameters. This particular division of labor between tasks is not required, but -it is common for distributed training. + is common for distributed training. ![TensorFlow Architecture Diagram](https://www.tensorflow.org/images/diag1.svg){: width="500"} @@ -193,7 +193,7 @@ https://www.tensorflow.org/code/tensorflow/contrib/nccl/python/ops/nccl_ops.py)) ## Kernel Implementations -The runtime contains over 200 standard operations, including mathematical, array +The runtime contains over 200 standard operations including mathematical, array manipulation, control flow, and state management operations. Each of these operations can have kernel implementations optimized for a variety of devices. Many of the operation kernels are implemented using Eigen::Tensor, which uses diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 8c165aad52499a..1abd840ab3ca3f 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 26cbcc9a9b0a99..52a2a3f8a68dd5 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 05b28787017487..1256fb99c4307c 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.8.0-rc1 + 1.8.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.8.0-rc1 + 1.8.0 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.8.0-rc1 + 1.8.0 org.tensorflow libtensorflow_jni_gpu - 1.8.0-rc1 + 1.8.0 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0.zip). 3. Extract this .zip file. @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.8.0.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.8.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.8.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 9d9322dbb59e2b..0ed81600279b17 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -438,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -684,14 +684,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +703,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +722,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp35-cp35m-linux_x86_64.whl
 
@@ -741,14 +741,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 0906b550086c8e..29a867a9e300b7 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 8bbdf013ca4a7d..5ba522b436137b 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.8.0rc1 on Linux: +for TensorFlow 1.8.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/docs_src/mobile/mobile_intro.md b/tensorflow/docs_src/mobile/mobile_intro.md index 1b0b9b44b469af..241f01d460ae35 100644 --- a/tensorflow/docs_src/mobile/mobile_intro.md +++ b/tensorflow/docs_src/mobile/mobile_intro.md @@ -212,7 +212,7 @@ handle the task then it will be difficult to train a computer to do better. After you’ve solved any fundamental issues with your use case, you need to create a labeled dataset to define what problem you’re trying to solve. This -step is extremely important, moreso than picking which model to use. You want it +step is extremely important, more than picking which model to use. You want it to be as representative as possible of your actual use case, since the model will only be effective at the task you teach it. It’s also worth investing in tools to make labeling the data as efficient and accurate as possible. For diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md index 01881ccf3bb15b..56220348276399 100644 --- a/tensorflow/docs_src/mobile/tflite/index.md +++ b/tensorflow/docs_src/mobile/tflite/index.md @@ -155,7 +155,7 @@ retraining for both floating point and quantized inference. The following diagram shows the architectural design of TensorFlow Lite: -TensorFlow Lite architecture diagram diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 51c1a1e032baae..b6291a9fface40 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -72,7 +72,7 @@ tensors in the execution of a step. If `t` is a @{tf.Tensor} object, @{tf.Tensor.eval} is shorthand for -@{tf.Session.run} (where `sess` is the +@{tf.Session.run}, where `sess` is the current @{tf.get_default_session}. The two following snippets of code are equivalent: @@ -101,9 +101,8 @@ sessions, it may be more straightforward to make explicit calls to Sessions can own resources, such as @{tf.Variable}, @{tf.QueueBase}, and -@{tf.ReaderBase}; and these resources can use -a significant amount of memory. These resources (and the associated memory) are -released when the session is closed, by calling +@{tf.ReaderBase}. These resources can sometimes use +a significant amount of memory, and can be released when the session is closed by calling @{tf.Session.close}. The intermediate tensors that are created as part of a call to @@ -137,7 +136,7 @@ TensorFlow also has a to help build support for more client languages. We invite contributions of new language bindings. -Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the opensource community build on top of the C API supported by the TensorFlow maintainers. +Bindings for various other languages (such as [C#](https://github.com/migueldeicaza/TensorFlowSharp), [Julia](https://github.com/malmaud/TensorFlow.jl), [Ruby](https://github.com/somaticio/tensorflow.rb) and [Scala](https://github.com/eaplatanios/tensorflow_scala)) created and supported by the open source community build on top of the C API supported by the TensorFlow maintainers. #### Does TensorFlow make use of all the devices (GPUs and CPUs) available on my machine? @@ -210,8 +209,8 @@ a new tensor with a different dynamic shape. #### How do I build a graph that works with variable batch sizes? -It is often useful to build a graph that works with variable batch sizes, for -example so that the same code can be used for (mini-)batch training, and +It is often useful to build a graph that works with variable batch sizes +so that the same code can be used for (mini-)batch training, and single-instance inference. The resulting graph can be @{tf.Graph.as_graph_def$saved as a protocol buffer} and @@ -260,7 +259,7 @@ See the how-to documentation for There are three main options for dealing with data in a custom format. The easiest option is to write parsing code in Python that transforms the data -into a numpy array. Then use @{tf.data.Dataset.from_tensor_slices} to +into a numpy array. Then, use @{tf.data.Dataset.from_tensor_slices} to create an input pipeline from the in-memory data. If your data doesn't fit in memory, try doing the parsing in the Dataset @@ -274,7 +273,7 @@ If your data is not easily parsable with the built-in TensorFlow operations, consider converting it, offline, to a format that is easily parsable, such as @{tf.python_io.TFRecordWriter$`TFRecord`} format. -The more efficient method to customize the parsing behavior is to +The most efficient method to customize the parsing behavior is to @{$adding_an_op$add a new op written in C++} that parses your data format. The @{$new_data_formats$guide to handling new data formats} has more information about the steps for doing this. diff --git a/tensorflow/docs_src/programmers_guide/tensors.md b/tensorflow/docs_src/programmers_guide/tensors.md index 58a80d533927e4..1248c3cabe23c8 100644 --- a/tensorflow/docs_src/programmers_guide/tensors.md +++ b/tensorflow/docs_src/programmers_guide/tensors.md @@ -265,7 +265,7 @@ example: ```python constant = tf.constant([1, 2, 3]) tensor = constant * constant -print tensor.eval() +print(tensor.eval()) ``` The `eval` method only works when a default `tf.Session` is active (see @@ -306,8 +306,8 @@ Note that you rarely want to use the following pattern when printing a ``` python t = <> -print t # This will print the symbolic tensor when the graph is being built. - # This tensor does not have a value in this context. +print(t) # This will print the symbolic tensor when the graph is being built. + # This tensor does not have a value in this context. ``` This code prints the `tf.Tensor` object (which represents deferred computation) diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md index e8cf7711552f4c..cd8c4b5b9a026f 100644 --- a/tensorflow/docs_src/programmers_guide/variables.md +++ b/tensorflow/docs_src/programmers_guide/variables.md @@ -237,7 +237,7 @@ TensorFlow supports two ways of sharing variables: While code which explicitly passes variables around is very clear, it is sometimes convenient to write TensorFlow functions that implicitly use variables in their implementations. Most of the functional layers from -`tf.layer` use this approach, as well as all `tf.metrics`, and a few other +`tf.layers` use this approach, as well as all `tf.metrics`, and a few other library utilities. Variable scopes allow you to control variable reuse when calling functions which diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index ead5a636b99a02..0f17899dae7ccd 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -209,7 +209,6 @@ for two-dimensional image data expect input tensors to have a shape of * _`channels`_. Number of color channels in the example images. For color images, the number of channels is 3 (red, green, blue). For monochrome images, there is just 1 channel (black). -* _`image_height`_. Height of the example images. * _`data_format`_. A string, one of `channels_last` (default) or `channels_first`. `channels_last` corresponds to inputs with shape `(batch, ..., channels)` while `channels_first` corresponds to diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py index 9e21aee87f6298..a40a9eaecbd9bb 100644 --- a/tensorflow/examples/learn/text_classification_cnn.py +++ b/tensorflow/examples/learn/text_classification_cnn.py @@ -73,7 +73,7 @@ def cnn_model(features, labels, mode): kernel_size=FILTER_SHAPE2, padding='VALID') # Max across each filter to get useful features for classification. - pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1]) + pool2 = tf.squeeze(tf.reduce_max(conv2, 1), axis=[1]) # Apply regular WX + B and classification. logits = tf.layers.dense(pool2, MAX_LABEL, activation=None) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index a503b3b00af71f..36db3dda6bcf07 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -21321,7 +21321,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py index 9af2e9b8b62abe..32e08021dc80d1 100644 --- a/tensorflow/python/data/util/nest.py +++ b/tensorflow/python/data/util/nest.py @@ -103,7 +103,7 @@ def is_sequence(seq): NOTE(mrry): This differs from `tensorflow.python.util.nest.is_sequence()`, which *does* treat a Python list as a sequence. For ergonomic reasons, `tf.data` users would prefer to treat lists as - implict `tf.Tensor` objects, and dicts as (nested) sequences. + implicit `tf.Tensor` objects, and dicts as (nested) sequences. Args: seq: an input sequence. diff --git a/tensorflow/python/debug/cli/curses_ui.py b/tensorflow/python/debug/cli/curses_ui.py index f66cefb427c9cc..7b87972d694981 100644 --- a/tensorflow/python/debug/cli/curses_ui.py +++ b/tensorflow/python/debug/cli/curses_ui.py @@ -190,8 +190,6 @@ def layout(self): return layout def get_click_command(self, mouse_y): - # TODO(cais): Support continuous scrolling when the mouse button is held - # down. if self._output_num_rows <= 1: return None elif mouse_y == self._min_y: @@ -271,6 +269,10 @@ class CursesUI(base_ui.BaseUI): _UI_WAIT_MESSAGE = "Processing..." + # The delay (in ms) between each update of the scroll bar when the mouse + # button is held down on the scroll bar. Controls how fast the screen scrolls. + _MOUSE_SCROLL_DELAY_MS = 100 + _single_instance_lock = threading.Lock() def __init__(self, on_ui_exit=None, config=None): @@ -855,7 +857,30 @@ def _on_textbox_keypress(self, x): except curses.error: mouse_event_type = None - if mouse_event_type == curses.BUTTON1_RELEASED: + if mouse_event_type == curses.BUTTON1_PRESSED: + # Logic for held mouse-triggered scrolling. + if mouse_x >= self._max_x - 2: + # Disable blocking on checking for user input. + self._command_window.nodelay(True) + + # Loop while mouse button is pressed. + while mouse_event_type == curses.BUTTON1_PRESSED: + # Sleep for a bit. + curses.napms(self._MOUSE_SCROLL_DELAY_MS) + scroll_command = self._scroll_bar.get_click_command(mouse_y) + if scroll_command in (_SCROLL_UP_A_LINE, _SCROLL_DOWN_A_LINE): + self._scroll_output(scroll_command) + + # Check to see if different mouse event is in queue. + self._command_window.getch() + try: + _, _, _, _, mouse_event_type = self._screen_getmouse() + except curses.error: + pass + + self._command_window.nodelay(False) + return x + elif mouse_event_type == curses.BUTTON1_RELEASED: # Logic for mouse-triggered scrolling. if mouse_x >= self._max_x - 2: scroll_command = self._scroll_bar.get_click_command(mouse_y) @@ -1677,4 +1702,7 @@ def _set_mouse_enabled(self, enabled): self._redraw_output() def _screen_set_mousemask(self): - curses.mousemask(self._mouse_enabled) + if self._mouse_enabled: + curses.mousemask(curses.BUTTON1_RELEASED | curses.BUTTON1_PRESSED) + else: + curses.mousemask(0) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 9b4b8666974aff..347a760333a1bd 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -1163,7 +1163,7 @@ def _train_model_distributed(self, input_fn, hooks, saving_listeners): model_fn_lib.ModeKeys.TRAIN, self.config) - # TODO(anjalisridhar): Figure out how to resolve the folowing scaffold + # TODO(anjalisridhar): Figure out how to resolve the following scaffold # parameters: init_feed_dict, init_fn. scaffold_list = self._distribution.unwrap( grouped_estimator_spec.scaffold) diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index 8e5d8141a1a15d..8e2ec83020abc5 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -52,7 +52,7 @@ def _fill_array(arr, seq, fillvalue=0): If length of seq is less than arr padded length, fillvalue used. Args: arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len]. - seq: Non-padded list of data sampels of shape + seq: Non-padded list of data samples of shape [batch_size, ..., padded_dim(None)] fillvalue: Default fillvalue to use. """ diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py index 9961fa74c2184d..7bcf3d84bb4218 100644 --- a/tensorflow/python/estimator/keras.py +++ b/tensorflow/python/estimator/keras.py @@ -74,7 +74,7 @@ def _any_variable_initalized(): """Check if any variable has been initialized in the Keras model. Returns: - boolean, True if at least one variable has been initalized, else False. + boolean, True if at least one variable has been initialized, else False. """ variables = variables_module.global_variables() for v in variables: diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 08fff3ba647bb5..522662cd328d7b 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -597,7 +597,7 @@ def after_save(self, session, global_step_value): # max_steps, the evaluator will send the final export signal. There is a # small chance that the Estimator.train stopping logic sees a different # global_step value (due to global step race condition and the fact the - # saver sees a larger value for checkpoing saving), which does not end + # saver sees a larger value for checkpoint saving), which does not end # the training. When the training ends, a new checkpoint is generated, which # triggers the listener again. So, it could be the case the final export is # triggered twice. diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index ede6e0d15957da..ffcb9990d52c2e 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -48,7 +48,7 @@ embedded_dept_column = embedding_column( categorical_column_with_vocabulary_list( - "department", ["math", "philosphy", ...]), dimension=10) + "department", ["math", "philosophy", ...]), dimension=10) * Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`). @@ -280,7 +280,7 @@ def input_layer(features, # TODO(akshayka): InputLayer should be a subclass of Layer, and it # should implement the logic in input_layer using Layer's build-and-call # paradigm; input_layer should create an instance of InputLayer and -# return the result of inovking its apply method, just as functional layers do. +# return the result of invoking its apply method, just as functional layers do. class InputLayer(object): """An object-oriented version of `input_layer` that reuses variables.""" @@ -834,7 +834,7 @@ def shared_embedding_columns( tensor_name_in_ckpt=None, max_norm=None, trainable=True): """List of dense columns that convert from sparse, categorical input. - This is similar to `embedding_column`, except that that it produces a list of + This is similar to `embedding_column`, except that it produces a list of embedding columns that share the same embedding weights. Use this when your inputs are sparse and of the same type (e.g. watched and diff --git a/tensorflow/python/framework/fast_tensor_util.pyx b/tensorflow/python/framework/fast_tensor_util.pyx index 19928314efe143..17d112a1ece9ae 100644 --- a/tensorflow/python/framework/fast_tensor_util.pyx +++ b/tensorflow/python/framework/fast_tensor_util.pyx @@ -7,6 +7,18 @@ cimport numpy as np from tensorflow.python.util import compat +def AppendFloat16ArrayToTensorProto( + # For numpy, npy_half is a typedef for npy_uint16, + # see: https://github.com/numpy/numpy/blob/master/doc/source/reference/c-api.coremath.rst#half-precision-functions + # Because np.float16_t dosen't exist in cython, we use uint16_t here. + # TODO: Use np.float16_t when cython supports it. + tensor_proto, np.ndarray[np.uint16_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.half_val.append(nparray[i]) + + def AppendFloat32ArrayToTensorProto( tensor_proto, np.ndarray[np.float32_t, ndim=1] nparray): cdef long i, n diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 80140e4063d9a4..9fc813634838b6 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2582,7 +2582,7 @@ def set_shape_and_handle_data_for_outputs(op): When _USE_C_API = True, this is lazily called when a tensor's shape is first requested. Usually this should work automatically, but some edge cases may - require manaully calling this first to make sure Tensor._shape_val and + require manually calling this first to make sure Tensor._shape_val and Tensor._handle_data are set (e.g. manually overriding _handle_data, copying a Tensor). """ @@ -5426,36 +5426,30 @@ def enable_eager_execution(config=None, device_policy=None, in which operations are executed. Note that @{tf.ConfigProto} is also used to configure graph execution (via @{tf.Session}) and many options within `tf.ConfigProto` are not implemented (or are irrelevant) when - eager execution is enabled. + eager execution is enabled. device_policy: (Optional.) Policy controlling how operations requiring - inputs on a specific device (e.g., a GPU 0) handle inputs on a different - device (e.g. GPU 1 or CPU). When set to None, an appropriate value will be - picked automatically. The value picked may change between TensorFlow - releases. - Valid values: - + inputs on a specific device (e.g., a GPU 0) handle inputs on a different + device (e.g. GPU 1 or CPU). When set to None, an appropriate value will be + picked automatically. The value picked may change between TensorFlow + releases. + Valid values: - tf.contrib.eager.DEVICE_PLACEMENT_EXPLICIT: raises an error if the placement is not correct. - - tf.contrib.eager.DEVICE_PLACEMENT_WARN: copies the tensors which are not on the right device but logs a warning. - - tf.contrib.eager.DEVICE_PLACEMENT_SILENT: silently copies the tensors. Note that this may hide performance problems as there is no notification provided when operations are blocked on the tensor being copied between devices. - - tf.contrib.eager.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies int32 tensors, raising errors on the other ones. execution_mode: (Optional.) Policy controlling how operations dispatched are actually executed. When set to None, an appropriate value will be picked automatically. The value picked may change between TensorFlow releases. Valid values: - - - tf.contrib.eager.SYNC: executes each operation synchronously. - - - tf.contrib.eager.ASYNC: executes each operation asynchronously. These - operations may return "non-ready" handles. + - tf.contrib.eager.SYNC: executes each operation synchronously. + - tf.contrib.eager.ASYNC: executes each operation asynchronously. These + operations may return "non-ready" handles. Raises: ValueError: If eager execution is enabled after creating/executing a diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 8cf24206edab8b..ca63efbc84dab2 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -50,6 +50,13 @@ def SlowAppendFloat16ArrayToTensorProto(tensor_proto, proto_values): [ExtractBitsFromFloat16(x) for x in proto_values]) +def _MediumAppendFloat16ArrayToTensorProto(tensor_proto, proto_values): + # TODO: Remove the conversion if cython supports np.float16_t + fast_tensor_util.AppendFloat16ArrayToTensorProto( + tensor_proto, + np.asarray(proto_values, dtype=np.float16).view(np.uint16)) + + def ExtractBitsFromBFloat16(x): return np.asscalar( np.asarray(x, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16)) @@ -64,11 +71,8 @@ def SlowAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values): _NP_TO_APPEND_FN = { dtypes.bfloat16.as_numpy_dtype: SlowAppendBFloat16ArrayToTensorProto, - # TODO(sesse): We should have a - # fast_tensor_util.AppendFloat16ArrayToTensorProto, - # but it seems np.float16_t doesn't exist? np.float16: - SlowAppendFloat16ArrayToTensorProto, + _MediumAppendFloat16ArrayToTensorProto, np.float32: fast_tensor_util.AppendFloat32ArrayToTensorProto, np.float64: diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 97cd22e47a0959..5b01df48fea19f 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -682,7 +682,7 @@ def test_foo(self): Args: - __unused__: Prevents sliently skipping tests. + __unused__: Prevents silently skipping tests. config: An optional config_pb2.ConfigProto to use to configure the session when executing graphs. use_gpu: If True, attempt to run as many operations as possible on GPU. diff --git a/tensorflow/python/keras/utils/__init__.py b/tensorflow/python/keras/utils/__init__.py index 7b5eecc153fb19..69337b6a8d52ab 100644 --- a/tensorflow/python/keras/utils/__init__.py +++ b/tensorflow/python/keras/utils/__init__.py @@ -20,6 +20,7 @@ from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras.utils.data_utils import get_file +from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras.utils.data_utils import Sequence from tensorflow.python.keras.utils.data_utils import SequenceEnqueuer from tensorflow.python.keras.utils.generic_utils import custom_object_scope diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 72cc357c71dbdb..3dfad9c130ca12 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -741,6 +741,18 @@ tf_py_test( ], ) +tf_py_test( + name = "regex_full_match_op_test", + size = "small", + srcs = ["regex_full_match_op_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:string_ops", + ], +) + tf_py_test( name = "save_restore_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index e2e6205911caa0..fcba456004407b 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -31,9 +31,7 @@ class Conv1DTest(test.TestCase): def testBasic(self): """Test that argument passing to conv1d is handled properly.""" - # TODO(yongtang): dtypes.float64 can only be enabled once conv2d support - # dtypes.float64, as conv1d implicitly calls conv2d after expand_dims. - for dtype in [dtypes.float16, dtypes.float32]: + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]: x = constant_op.constant([1, 2, 3, 4], dtype=dtype) x = array_ops.expand_dims(x, 0) # Add batch dimension x = array_ops.expand_dims(x, 2) # And depth dimension diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py index 8973a450fa246e..289ae29fcec724 100644 --- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py @@ -131,6 +131,23 @@ def testConv3DTransposeShapeMismatch(self): nn_ops.conv3d_transpose( x_value, f_value, y_shape, strides, data_format='NCDHW') + def testConv3DTransposeOutputShapeType(self): + # Test case for GitHub issue 18887 + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(): + x_shape = [2, 5, 6, 4, 3] + y_shape = [2, 5, 6, 4, 2] + f_shape = [3, 3, 3, 2, 3] + strides = [1, 1, 1, 1, 1] + x_value = constant_op.constant( + 1.0, shape=x_shape, name="x", dtype=dtypes.float32) + f_value = constant_op.constant( + 1.0, shape=f_shape, name="filter", dtype=dtypes.float32) + output = nn_ops.conv3d_transpose( + x_value, f_value, constant_op.constant(y_shape, dtype=dtype), + strides=strides, padding="SAME") + output.eval() + def testConv3DTransposeValid(self): with self.test_session(): strides = [1, 2, 2, 2, 1] diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py index 8e5556d0a0421d..63d19c15cfdc8c 100644 --- a/tensorflow/python/kernel_tests/distributions/util_test.py +++ b/tensorflow/python/kernel_tests/distributions/util_test.py @@ -735,7 +735,7 @@ def _fill_triangular(self, x, upper=False): raise ValueError("Invalid shape.") n = np.int32(n) # We can't do: `x[..., -(n**2-m):]` because this doesn't correctly handle - # `m == n == 1`. Hence, we do absoulte indexing. + # `m == n == 1`. Hence, we do absolute indexing. x_tail = x[..., (m - (n * n - m)):] y = np.concatenate( [x, x_tail[..., ::-1]] if upper else [x_tail, x[..., ::-1]], diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py index f31426713c49ba..dc3ea386714c98 100644 --- a/tensorflow/python/kernel_tests/manip_ops_test.py +++ b/tensorflow/python/kernel_tests/manip_ops_test.py @@ -93,7 +93,7 @@ def testComplexTypes(self): def testNegativeAxis(self): self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1) self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2) - # Make sure negative axis shoudl be 0 <= axis + dims < dims + # Make sure negative axis should be 0 <= axis + dims < dims with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of range"): diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py new file mode 100644 index 00000000000000..5daae1b79bf493 --- /dev/null +++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py @@ -0,0 +1,54 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for RegexFullMatch op from string_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class RegexFullMatchOpTest(test.TestCase): + + def testRegexFullMatch(self): + values = ["abaaba", "abcdabcde"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + matched = string_ops.regex_full_match(input_vector, "a.*a").eval() + self.assertAllEqual([True, False], matched) + + def testEmptyMatch(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + matched = string_ops.regex_full_match(input_vector, "").eval() + self.assertAllEqual([False, False], matched) + + def testInvalidPattern(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + invalid_pattern = "A[" + matched = string_ops.regex_full_match(input_vector, invalid_pattern) + with self.assertRaisesOpError("Invalid pattern"): + matched.eval() + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 3bca5fadc42693..794be096b7309a 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -91,16 +91,18 @@ def testValues(self): ] # Each item is np_op1, np_op2, tf_op - ops_list = [(np.add, None, math_ops.segment_sum), (self._mean_cum_op, - self._mean_reduce_op, - math_ops.segment_mean), + ops_list = [(np.add, None, math_ops.segment_sum), + (self._mean_cum_op, self._mean_reduce_op, + math_ops.segment_mean), (np.ndarray.__mul__, None, math_ops.segment_prod), (np.minimum, None, math_ops.segment_min), (np.maximum, None, math_ops.segment_max)] # A subset of ops has been enabled for complex numbers complex_ops_list = [(np.add, None, math_ops.segment_sum), - (np.ndarray.__mul__, None, math_ops.segment_prod)] + (np.ndarray.__mul__, None, math_ops.segment_prod), + (self._mean_cum_op, self._mean_reduce_op, + math_ops.segment_mean)] n = 10 shape = [n, 2] diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 340c34fc5e6bf0..eda036ece4a7d7 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -191,6 +191,16 @@ def add_weight(self, name, shape, dtype=None, RuntimeError: If called with partioned variable regularization and eager execution is enabled. """ + + def _should_add_regularizer(variable, existing_variable_set): + if isinstance(variable, tf_variables.PartitionedVariable): + for var in variable: + if var in existing_variable_set: + return False + return True + else: + return variable not in existing_variable_set + init_graph = None if not context.executing_eagerly(): default_graph = ops.get_default_graph() @@ -233,7 +243,8 @@ def add_weight(self, name, shape, dtype=None, getter=vs.get_variable) if regularizer: - if context.executing_eagerly() or variable not in existing_variables: + if context.executing_eagerly() or _should_add_regularizer( + variable, existing_variables): self._handle_weight_regularization(name, variable, regularizer) if init_graph is not None: @@ -353,4 +364,3 @@ def _add_elements_to_collection(elements, collection_list): for element in elements: if element not in collection_set: collection.append(element) - diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index f08b552840f5ff..ab49e37b90e183 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import random_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -95,6 +96,21 @@ def testAddWeight(self): regularizer=regularizer) self.assertEqual(len(layer.losses), 1) + def testReusePartitionedVaraiblesAndRegularizers(self): + regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 + partitioner = partitioned_variables.fixed_size_partitioner(3) + for reuse in [False, True]: + with variable_scope.variable_scope(variable_scope.get_variable_scope(), + partitioner=partitioner, + reuse=reuse): + layer = base_layers.Layer(name='my_layer') + variable = layer.add_variable( + 'reg_part_var', [4, 4], + initializer=init_ops.zeros_initializer(), + regularizer=regularizer) + self.assertEqual( + len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 3) + def testNoEagerActivityRegularizer(self): with context.eager_mode(): with self.assertRaisesRegexp(ValueError, 'activity_regularizer'): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index ab5997e85c6030..3a31ef7f881490 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1285,7 +1285,7 @@ def reduce_sum(input_tensor, The reduced tensor, of the same dtype as the input_tensor. @compatibility(numpy) - Equivalent to np.sum appart the fact that numpy upcast uint8 and int32 to + Equivalent to np.sum apart the fact that numpy upcast uint8 and int32 to int64 while tensorflow returns the same dtype as the input. @end_compatibility """ diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 1271ee5108fc47..ae79c0194954a0 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -39,6 +39,8 @@ from tensorflow.python.util.tf_export import tf_export # pylint: enable=wildcard-import +# Expose regex_full_match in strings namespace +tf_export("strings.regex_full_match")(regex_full_match) @tf_export("string_split") def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=invalid-name diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 75580fc6308345..9e49188c1ef353 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -232,7 +232,12 @@ def testComplexCodeView(self): self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) - self.assertLess(145660, tfprof_node.total_float_ops) + #The graph is modifed when MKL is enabled,total_float_ops will + #be different + if test_util.IsMklEnabled(): + self.assertLess(101600, tfprof_node.total_float_ops) + else: + self.assertLess(145660, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py index 4b3982677fbd85..24a13c0f336aa9 100644 --- a/tensorflow/python/saved_model/builder_impl.py +++ b/tensorflow/python/saved_model/builder_impl.py @@ -130,7 +130,8 @@ def _save_and_write_assets(self, assets_collection_to_add=None): if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) - tf_logging.info("Assets written to: %s", assets_destination_dir) + tf_logging.info("Assets written to: %s", + compat.as_text(assets_destination_dir)) def _maybe_add_legacy_init_op(self, legacy_init_op=None): """Add legacy init op to the SavedModel. @@ -461,7 +462,7 @@ def save(self, as_text=False): compat.as_bytes(self._export_dir), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB)) file_io.write_string_to_file(path, self._saved_model.SerializeToString()) - tf_logging.info("SavedModel written to: %s", path) + tf_logging.info("SavedModel written to: %s", compat.as_text(path)) return path diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 6d05a2ee29ada7..ab8b37bb655bfc 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -750,7 +750,7 @@ def call_for_each_tower(self, fn, *args, **kwargs): `fn` may call `tf.get_tower_context()` to access methods such as `tower_id()` and `merge_call()`. - `merge_call()` is used to communicate betwen the towers and + `merge_call()` is used to communicate between the towers and re-enter the cross-tower context. All towers pause their execution having encountered a `merge_call()` call. After that the `merge_fn`-function is executed. Its results are then unwrapped and diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index fc89f88063e4b9..4d464135fd0333 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1743,7 +1743,7 @@ def restore(self, sess, save_path): return if save_path is None: raise ValueError("Can't load save_path when it is None.") - logging.info("Restoring parameters from %s", save_path) + logging.info("Restoring parameters from %s", compat.as_text(save_path)) try: if context.executing_eagerly(): self._build_eager(save_path, build_save=False, build_restore=True) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 5faf644c91e91b..fbd65617670b15 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -232,7 +232,7 @@ def getcallargs(func, *positional, **named): it. If no attached decorators modify argspec, the final unwrapped target's argspec will be used. """ - argspec = getargspec(func) + argspec = getfullargspec(func) call_args = named.copy() this = getattr(func, 'im_self', None) or getattr(func, '__self__', None) if ismethod(func) and this: diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 2b33d106bc8abe..0f465eda4fc44f 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -320,7 +320,7 @@ void SetDifferentKeysError(PyObject* dict1, PyObject* dict2, string* error_msg, // Returns true iff there were no "internal" errors. In other words, // errors that has nothing to do with structure checking. -// If an "internal" error occured, the appropriate Python error will be +// If an "internal" error occurred, the appropriate Python error will be // set and the caller can propage it directly to the user. // // Both `error_msg` and `is_type_error` must be non-null. `error_msg` must diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h index 9851c11c2efda4..70efc10c9abe7c 100644 --- a/tensorflow/python/util/util.h +++ b/tensorflow/python/util/util.h @@ -97,7 +97,7 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types); // used instead. The same convention is followed in `pack_sequence_as`. This // correctly repacks dicts and `OrderedDict`s after they have been flattened, // and also allows flattening an `OrderedDict` and then repacking it back using -// a correponding plain dict, or vice-versa. +// a corresponding plain dict, or vice-versa. // Dictionaries with non-sortable keys cannot be flattened. // // Args: diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h index be0b0bf5fb20b2..ea87744b225215 100644 --- a/tensorflow/stream_executor/blas.h +++ b/tensorflow/stream_executor/blas.h @@ -1083,6 +1083,13 @@ class BlasSupport { // This is a batched version of DoBlasGemm. // The batched GEMM computes matrix product for each input/output in a, b, // and c, which contain batch_count DeviceMemory objects. + virtual bool DoBlasGemmBatched( + Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, + uint64 n, uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, + int ldc, int batch_count, ScratchAllocator *scratch_allocator) = 0; virtual bool DoBlasGemmBatched( Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, @@ -1945,6 +1952,13 @@ class BlasSupport { DeviceMemory> *c, int ldc, \ blas::ComputationType computation_type, blas::AlgorithmType algorithm, \ blas::ProfileResult *output_profile_result) override; \ + bool DoBlasGemmBatched( \ + Stream *stream, blas::Transpose transa, blas::Transpose transb, \ + uint64 m, uint64 n, uint64 k, float alpha, \ + const port::ArraySlice *> &a, int lda, \ + const port::ArraySlice *> &b, int ldb, \ + float beta, const port::ArraySlice *> &c, \ + int ldc, int batch_count, ScratchAllocator *scratch_allocator) override; \ bool DoBlasGemmBatched( \ Stream *stream, blas::Transpose transa, blas::Transpose transb, \ uint64 m, uint64 n, uint64 k, float alpha, \ diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index 3e9a23c6589b3d..08fe153b5909d3 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -286,6 +286,10 @@ STREAM_EXECUTOR_CUBLAS_WRAP(cublasGetMathMode) STREAM_EXECUTOR_CUBLAS_WRAP(cublasSetMathMode) #endif +#if CUDA_VERSION >= 9010 +STREAM_EXECUTOR_CUBLAS_WRAP(cublasGemmBatchedEx) +#endif + } // namespace wrap static string ToString(cublasStatus_t status) { @@ -2330,13 +2334,23 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( computation_type, algorithm, output_profile_result); } -template +template +struct HalfAsFloat { + typedef T type; +}; + +template <> +struct HalfAsFloat { + typedef float type; +}; + +template port::Status CUDABlas::DoBlasGemmBatchedInternal( FuncT cublas_func, Stream *stream, blas::Transpose transa, - blas::Transpose transb, uint64 m, uint64 n, uint64 k, T alpha, + blas::Transpose transb, uint64 m, uint64 n, uint64 k, Scalar alpha, const port::ArraySlice *> &a_ptrs_to_wrappers, int lda, const port::ArraySlice *> &b_ptrs_to_wrappers, int ldb, - T beta, const port::ArraySlice *> &c_ptrs_to_wrappers, + Scalar beta, const port::ArraySlice *> &c_ptrs_to_wrappers, int ldc, int batch_count, ScratchAllocator *scratch_allocator) { std::vector a_raw_ptrs, b_raw_ptrs, c_raw_ptrs; for (int i = 0; i < batch_count; ++i) { @@ -2345,7 +2359,7 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal( c_raw_ptrs.push_back(static_cast(c_ptrs_to_wrappers[i]->opaque())); } - typedef typename CUDAComplexT::type CUDA_T; + typedef typename HalfAsFloat::type>::type CUDA_T; const size_t size = batch_count * sizeof(CUDA_T *); @@ -2397,18 +2411,84 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal( "CUDABlas::DoBlasGemmBatched"); } - bool ok = DoBlasInternal( - cublas_func, stream, true /* = pointer_mode_host */, - CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, - CUDAComplex(&alpha), const_cast(CUDAMemory(a)), lda, - const_cast(CUDAMemory(b)), ldb, CUDAComplex(&beta), - const_cast(CUDAMemory(c)), ldc, batch_count); + cudaDataType_t data_type = CUDADataType::type; - if (ok) { +#if CUDA_VERSION >= 9010 + int cc_major, cc_minor; + if (stream->parent()->GetDeviceDescription().cuda_compute_capability( + &cc_major, &cc_minor) && + cc_major >= 5) { + bool use_tensor_ops = TensorOpMathEnabled() && data_type == CUDA_R_16F; + cublasGemmAlgo_t algo = + (use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT); + cudaDataType_t compute_type = + (data_type == CUDA_R_16F ? CUDA_R_32F : data_type); + const void **a_void_ptrs = reinterpret_cast( + const_cast(CUDAMemory(a))); + const void **b_void_ptrs = reinterpret_cast( + const_cast(CUDAMemory(b))); + void **c_void_ptrs = + reinterpret_cast(const_cast(CUDAMemory(c))); + bool ok; + ok = DoBlasInternalImpl( + wrap::cublasGemmBatchedEx, stream, true /* = pointer_mode_host */, + true /* = err_on_failure */, use_tensor_ops, CUDABlasTranspose(transa), + CUDABlasTranspose(transb), m, n, k, &alpha, a_void_ptrs, data_type, lda, + b_void_ptrs, data_type, ldb, &beta, c_void_ptrs, data_type, ldc, + batch_count, compute_type, algo); + if (ok) { + return port::Status::OK(); + } + return port::Status(port::error::INTERNAL, + "failed BLAS call, see log for details"); + } +#endif + // either CUDA_VERSION < 9.1 or SM < 5.0 + if (data_type != CUDA_R_16F) { + bool ok = DoBlasInternal( + cublas_func, stream, true /* = pointer_mode_host */, + CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, + CUDAComplex(&alpha), const_cast(CUDAMemory(a)), lda, + const_cast(CUDAMemory(b)), ldb, CUDAComplex(&beta), + const_cast(CUDAMemory(c)), ldc, batch_count); + if (ok) { + return port::Status::OK(); + } + return port::Status(port::error::INTERNAL, + "failed BLAS call, see log for details"); + } else { + // Fall back to a loop for fp16 + for (int b = 0; b < batch_count; ++b) { + const DeviceMemory &a_matrix = *a_ptrs_to_wrappers[b]; + const DeviceMemory &b_matrix = *b_ptrs_to_wrappers[b]; + DeviceMemory *c_matrix = c_ptrs_to_wrappers[b]; + bool ok = DoBlasGemm(stream, transa, transb, m, n, k, alpha, a_matrix, + lda, b_matrix, ldb, beta, c_matrix, ldc); + if (!ok) { + return port::Status(port::error::INTERNAL, + "failed BLAS call, see log for details"); + } + } return port::Status::OK(); } - return port::Status(port::error::INTERNAL, - "failed BLAS call, see log for details"); +} + +bool CUDABlas::DoBlasGemmBatched( + Stream *stream, blas::Transpose transa, blas::Transpose transb, uint64 m, + uint64 n, uint64 k, float alpha, + const port::ArraySlice *> &a_array, int lda, + const port::ArraySlice *> &b_array, int ldb, + float beta, const port::ArraySlice *> &c_array, + int ldc, int batch_count, ScratchAllocator *scratch_allocator) { + // Note: The func passed here (cublasSgemmBatched) is not actually called, + // due to special handling of fp16 inside DoBlasGemmBatchedInternal. + port::Status status = DoBlasGemmBatchedInternal( + wrap::cublasSgemmBatched, stream, transa, transb, m, n, k, alpha, a_array, + lda, b_array, ldb, beta, c_array, ldc, batch_count, scratch_allocator); + if (!status.ok()) { + LOG(ERROR) << status; + } + return status.ok(); } bool CUDABlas::DoBlasGemmBatched( diff --git a/tensorflow/stream_executor/cuda/cuda_blas.h b/tensorflow/stream_executor/cuda/cuda_blas.h index 12dc5e47fd1b9d..42b3fde5b0816f 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.h +++ b/tensorflow/stream_executor/cuda/cuda_blas.h @@ -107,12 +107,12 @@ class CUDABlas : public blas::BlasSupport { // A helper function to implement DoBlasGemmBatched interfaces for generic // types. - template + template port::Status DoBlasGemmBatchedInternal( FuncT cublas_func, Stream *stream, blas::Transpose transa, - blas::Transpose transb, uint64 m, uint64 n, uint64 k, T alpha, + blas::Transpose transb, uint64 m, uint64 n, uint64 k, Scalar alpha, const port::ArraySlice *> &a_array, int lda, - const port::ArraySlice *> &b_array, int ldb, T beta, + const port::ArraySlice *> &b_array, int ldb, Scalar beta, const port::ArraySlice *> &c_array, int ldc, int batch_count, ScratchAllocator *scratch_allocator); diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 2bc9b6b798865c..4a98cfe16460ff 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -4480,6 +4480,40 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, n, alpha, a, lda, b, ldb); } +Stream &Stream::ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, float beta, + const port::ArraySlice *> &c, int ldc, + int batch_count) { + return ThenBlasGemmBatchedWithScratch(transa, transb, m, n, k, alpha, a, lda, + b, ldb, beta, c, ldc, batch_count, + /*scratch_allocator=*/nullptr); +} + +Stream &Stream::ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, float beta, + const port::ArraySlice *> &c, int ldc, + int batch_count, ScratchAllocator *scratch_allocator) { + VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), + PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), + PARAM(beta), PARAM(c), PARAM(ldc), PARAM(batch_count)); + + ThenBlasImpl *> &, int, + const port::ArraySlice *> &, int, + float, const port::ArraySlice *> &, + int, int, ScratchAllocator *> + impl; + return impl(this, &blas::BlasSupport::DoBlasGemmBatched, transa, transb, m, n, + k, alpha, a, lda, b, ldb, beta, c, ldc, batch_count, + scratch_allocator); +} + Stream &Stream::ThenBlasGemmBatched( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice *> &a, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index c6e37da6d14f63..3da1b856d6a41f 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -1471,6 +1471,13 @@ class Stream { blas::ProfileResult *output_profile_result); // See BlasSupport::DoBlasGemmBatched. + Stream &ThenBlasGemmBatched( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, + int ldc, int batch_count); Stream &ThenBlasGemmBatched(blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice *> &a, @@ -1503,6 +1510,13 @@ class Stream { std::complex beta, const port::ArraySlice> *> &c, int ldc, int batch_count); + Stream &ThenBlasGemmBatchedWithScratch( + blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, + uint64 k, float alpha, + const port::ArraySlice *> &a, int lda, + const port::ArraySlice *> &b, int ldb, + float beta, const port::ArraySlice *> &c, + int ldc, int batch_count, ScratchAllocator *scratch_allocator); Stream &ThenBlasGemmBatchedWithScratch( blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const port::ArraySlice *> &a, diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 880ec0523d397a..d71fd71bbd83ad 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1300,7 +1300,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): native.cc_library( name=basename + "_gpu", srcs=gpu_srcs, - copts=_cuda_copts(), + copts=_cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]), deps=deps + if_cuda(cuda_deps)) cuda_deps.extend([":" + basename + "_gpu"]) @@ -1483,7 +1483,7 @@ def tf_py_wrap_cc(name, # This macro is for running python tests against system installed pip package # on Windows. # -# py_test is built as an exectuable python zip file on Windows, which contains all +# py_test is built as an executable python zip file on Windows, which contains all # dependencies of the target. Because of the C++ extensions, it would be very # inefficient if the py_test zips all runfiles, plus we don't need them when running # tests against system installed pip package. So we'd like to get rid of the deps diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD index e58de5b63ea72a..f46bb4b5fcc5d6 100644 --- a/tensorflow/tools/api/generator/BUILD +++ b/tensorflow/tools/api/generator/BUILD @@ -101,6 +101,7 @@ genrule( "api/profiler/__init__.py", "api/python_io/__init__.py", "api/resource_loader/__init__.py", + "api/strings/__init__.py", "api/saved_model/__init__.py", "api/saved_model/builder/__init__.py", "api/saved_model/constants/__init__.py", diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 74b1b39d9faea1..dc2bd40096a695 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -500,6 +500,10 @@ tf_module { name: "string" mtype: "" } + member { + name: "strings" + mtype: "" + } member { name: "summary" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt new file mode 100644 index 00000000000000..a3fbe95bbad4b8 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.strings.pbtxt @@ -0,0 +1,7 @@ +path: "tensorflow.strings" +tf_module { + member_method { + name: "regex_full_match" + argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 5aaf544afdcb88..982161cefeefdd 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -17,14 +17,9 @@ set -e # We don't apt-get install so that we can install a newer version of pip. -# Only needed for Ubuntu 14.04 ,and not needed for Ubuntu 16.04 / Debian 8,9 -if $(cat /etc/*-release | grep -q 14.04); then - easy_install -U pip==9.0.3 - easy_install3 -U pip==9.0.3 -else - pip2 install --upgrade pip==9.0.3 - pip3 install --upgrade pip==9.0.3 -fi +# Only needed for Ubuntu 14.04 and 16.04; not needed for 18.04 and Debian 8,9? +easy_install -U pip==9.0.3 +easy_install3 -U pip==9.0.3 # Install pip packages from whl files to avoid the time-consuming process of # building from source. diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index b9996395d02bfb..406d134699ff18 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -85,7 +85,7 @@ RUN git clone --branch=r1.8 --depth=1 https://github.com/tensorflow/tensorflow.g ENV CI_BUILD_PYTHON python RUN tensorflow/tools/ci_build/builds/configured CPU \ - bazel build -c opt --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + bazel build -c opt --copt=-mavx --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ # For optimized builds appropriate for the hardware platform of your choosing, uncomment below... # For ivy-bridge or sandy-bridge # --copt=-march="ivybridge" \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index c65e0b72bc582d..a6cd44ced1d546 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -35,10 +35,10 @@ ENV CI_BUILD_PYTHON=python \ PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \ CC_OPT_FLAGS='-march=native' \ TF_NEED_JEMALLOC=0 \ - TF_NEED_GCP=0 \ + TF_NEED_GCP=1 \ TF_NEED_CUDA=0 \ TF_NEED_HDFS=0 \ - TF_NEED_S3=0 \ + TF_NEED_S3=1 \ TF_NEED_OPENCL=0 \ TF_NEED_GDR=0 \ TF_ENABLE_XLA=0 \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 7e5e6ef2d5b024..2fe47f3356ce26 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -98,7 +98,7 @@ ENV TF_CUDNN_VERSION=7 RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \ tensorflow/tools/ci_build/builds/configured GPU \ - bazel build -c opt --config=cuda \ + bazel build -c opt --copt=-mavx --config=cuda \ --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ tensorflow/tools/pip_package:build_pip_package && \ rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md index 67badb4869029b..9f6f553ba1e4c6 100644 --- a/tensorflow/tools/graph_transforms/README.md +++ b/tensorflow/tools/graph_transforms/README.md @@ -388,7 +388,7 @@ input is collapsed down into a simple constant. Args: * clear_output_shapes: Clears tensor shape information saved as attributes. - Some older graphs containes out-of-date information and may cause import + Some older graphs contains out-of-date information and may cause import errors. Defaults to true. Prerequisites: None diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index b66d5bdd37c03e..1a83c6e7578fed 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -24,7 +24,7 @@ function real_path() { function cp_external() { local src_dir=$1 local dest_dir=$2 - for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*org_tensorflow*'`; do + for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*' ! -name '*local_config_tensorrt*' ! -name '*org_tensorflow*'`; do cp -R "$f" "$dest_dir" done mkdir -p "${dest_dir}/local_config_cuda/cuda/cuda/" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index f7385e59912fa0..319878e1b5ae9f 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -31,7 +31,7 @@ # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.8.0-rc1' +_VERSION = '1.8.0' _SHORT_DESCRIPTION = ('TensorFlow is an open source machine learning framework ' 'for everyone.') @@ -55,7 +55,7 @@ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.7.0, < 1.8.0', + 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md index 7f477d19208257..fbb1fde837b92b 100644 --- a/third_party/examples/eager/spinn/README.md +++ b/third_party/examples/eager/spinn/README.md @@ -70,7 +70,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth - After training, you may use the model to perform inference on input data in the SNLI data format. The premise and hypotheses sentences are specified with the command-line flags `--inference_premise` and `--inference_hypothesis`, - respecitvely. Each sentence should include the words, as well as parentheses + respectively. Each sentence should include the words, as well as parentheses representing a binary parsing of the sentence. The words and parentheses should all be separated by spaces. For instance, diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 50d1b778d9e14b..c90c66912d959a 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -604,7 +604,7 @@ def _find_cupti_header_dir(repository_ctx, cuda_config): for relative_path in CUPTI_HEADER_PATHS: if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists: return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] - auto_configure_fail("Cannot find cupti.h under %s" % cuda_toolkit_path) + auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS])) def _find_cupti_lib(repository_ctx, cuda_config): diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index c2adf578c703f5..a058c46cc42439 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -34,6 +34,7 @@ filegroup( "@org_tensorflow//tensorflow:windows": [ "@mkl_windows//:LICENSE", ], + "//conditions:default": [], }), visibility = ["//visibility:public"], ) @@ -54,5 +55,6 @@ cc_library( "@mkl_windows//:mkl_headers", "@mkl_windows//:mkl_libs_windows", ], + "//conditions:default": [], }), )