From 63471ea8cab8fbea715e0e8094071a06d0547b5d Mon Sep 17 00:00:00 2001 From: "yuanman.ym" Date: Tue, 22 Jun 2021 17:48:35 +0800 Subject: [PATCH] [Fix] Fix various issues. - Support NCCL separated compilation. - Support graceful exit of gRPC servers. - Fix double output of tf.logging. - Fix build break caused by multiple definition of zlib. - Remove deprecation messages. --- WORKSPACE | 9 +++++++++ tensorflow/core/BUILD | 3 ++- .../core/distributed_runtime/rpc/grpc_server_lib.cc | 2 +- tensorflow/core/util/gpu_device_functions.h | 4 ++-- tensorflow/python/platform/tf_logging.py | 1 + tensorflow/python/training/server_lib.py | 4 +--- tensorflow/python/util/deprecation.py | 2 +- tensorflow/python/util/module_wrapper.py | 2 +- tensorflow/tensorflow.bzl | 8 +++++--- third_party/llvm/llvm.autogenerated.BUILD | 1 - 10 files changed, 23 insertions(+), 13 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 74ea14d0fd7..de086629af4 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,6 +2,15 @@ workspace(name = "org_tensorflow") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") +http_archive( + name="io_bazel_rules_docker", + sha256="e2674bb36d5c39e3dfd28c18fb6f0568083c98209f0c5a0ee8eaf35ab4766f1d", + strip_prefix="rules_docker-251f6a68b439744094faff800cd029798edf9faa", + urls=[ + "https://github.com/bazelbuild/rules_docker/archive/251f6a68b439744094faff800cd029798edf9faa.tar.gz" + ], +) + http_archive( name = "io_bazel_rules_closure", sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9", diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c23c1f9b396..4219991e70a 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -386,7 +386,7 @@ filegroup( cc_library( name = "platform_port", srcs = [ - "//tensorflow/core/platform:cpu_info.cc", + "//tensorflow/core/platform:cpu_info", "//tensorflow/core/platform:legacy_platform_port_srcs", ], hdrs = [ @@ -889,6 +889,7 @@ tf_cuda_library( "framework/ops_util.h", "framework/partial_tensor_shape.h", "framework/queue_interface.h", + "framework/reader_base.h", "framework/reader_interface.h", "framework/reader_op_kernel.h", "framework/register_types.h", diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index c8eeaa9ddef..731953887c0 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -61,7 +61,7 @@ namespace { class NoReusePortOption : public ::grpc::ServerBuilderOption { public: void UpdateArguments(::grpc::ChannelArguments* args) override { - args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 0); + args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 1); } void UpdatePlugins(std::vector>* diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h index 9040e78d6fd..45584cf8ba9 100644 --- a/tensorflow/core/util/gpu_device_functions.h +++ b/tensorflow/core/util/gpu_device_functions.h @@ -31,8 +31,8 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA -#include "third_party/gpus/cuda/include/cuComplex.h" -#include "third_party/gpus/cuda/include/cuda.h" +#include +#include #endif #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/gpu_cuda_alias.h" diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py index 86a4957c9da..78bb3d39092 100644 --- a/tensorflow/python/platform/tf_logging.py +++ b/tensorflow/python/platform/tf_logging.py @@ -124,6 +124,7 @@ def get_logger(): _handler.setFormatter(_logging.Formatter(_logging.BASIC_FORMAT, None)) logger.addHandler(_handler) + logger.propagate = False _logger = logger return _logger diff --git a/tensorflow/python/training/server_lib.py b/tensorflow/python/training/server_lib.py index bd9c2382e3b..452910b5d3c 100644 --- a/tensorflow/python/training/server_lib.py +++ b/tensorflow/python/training/server_lib.py @@ -155,9 +155,7 @@ def __del__(self): # we leak instead of calling c_api.TF_DeleteServer here. # See: # https://github.com/tensorflow/tensorflow/blob/0495317a6e9dd4cac577b9d5cf9525e62b571018/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h#L73 - except errors.UnimplementedError: - pass - except AttributeError: + except: # pylint: disable=bare-except # At shutdown, `c_api` may have been garbage collected. pass self._server = None diff --git a/tensorflow/python/util/deprecation.py b/tensorflow/python/util/deprecation.py index 5e822f87e8c..d10312151ba 100644 --- a/tensorflow/python/util/deprecation.py +++ b/tensorflow/python/util/deprecation.py @@ -32,7 +32,7 @@ # Allow deprecation warnings to be silenced temporarily with a context manager. -_PRINT_DEPRECATION_WARNINGS = True +_PRINT_DEPRECATION_WARNINGS = False # Remember which deprecation warnings have been printed already. _PRINTED_WARNING = {} diff --git a/tensorflow/python/util/module_wrapper.py b/tensorflow/python/util/module_wrapper.py index 5ee356258a2..16a1d392f0e 100644 --- a/tensorflow/python/util/module_wrapper.py +++ b/tensorflow/python/util/module_wrapper.py @@ -29,7 +29,7 @@ from tensorflow.tools.compatibility import all_renames_v2 -_PER_MODULE_WARNING_LIMIT = 1 +_PER_MODULE_WARNING_LIMIT = 0 def get_rename_v2(name): diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 989984afd8d..ddb95312753 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -293,6 +293,7 @@ def tf_copts( (if_not_windows(["-fno-exceptions"]) if not allow_exceptions else []) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + + if_nccl(["-DGOOGLE_NCCL=1"]) + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_mkl_v1_open_source_only(["-DENABLE_MKLDNN_V1"]) + @@ -1351,7 +1352,8 @@ def tf_gpu_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs): - The cuda runtime is added as a dependency (if necessary). - The library additionally passes -DGOOGLE_CUDA=1 to the list of copts. - In addition, when the library is also built with TensorRT enabled, it - additionally passes -DGOOGLE_TENSORRT=1 to the list of copts. + additionally passes -DGOOGLE_TENSORRT=1 to the list of copts. Likewise + for NCCL and -DGOOGLE_NCCL=1. Args: - cuda_deps: BUILD dependencies which will be linked if and only if: @@ -1373,7 +1375,7 @@ def tf_gpu_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs): ]) + if_rocm_is_configured(cuda_deps + [ "@local_config_rocm//rocm:rocm_headers", ]), - copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), + copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_nccl(["-DGOOGLE_NCCL=1"])), **kwargs ) @@ -1794,7 +1796,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ native.cc_library( name = basename + "_gpu", srcs = gpu_srcs, - copts = copts + _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]), + copts = copts + _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_nccl(["-DGOOGLE_NCCL=1"]), features = if_cuda(["-use_header_modules"]), deps = deps + if_cuda_is_configured_compat(cuda_deps) + if_rocm_is_configured(rocm_deps), **kwargs diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index 32705321ea1..968eb6ac694 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -3404,7 +3404,6 @@ cc_library( deps = [ ":config", ":demangle", - "@zlib_archive//:zlib", ], )