Skip to content

Latest optimizations #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
daa76e1
enhancement with relu primitive reuse
gzmkl May 18, 2018
28727ba
enhancement with BatchNorm primitive reuse
gzmkl May 18, 2018
f5e2edb
enhancement with pooling ops primitive reuse
gzmkl May 18, 2018
2bcd873
revert mkl_conv_ops.cc to avoid PR review confusion
gzmkl May 21, 2018
d89e88b
revert mkl_conv_ops.cc to avoid PR review confusion
gzmkl May 21, 2018
ede9ff1
revert mkl_conv_ops.cc to avoid PR review confusion
gzmkl May 21, 2018
52485e7
minor code style fix
gzmkl May 23, 2018
834f0fa
minor code style fix
gzmkl May 23, 2018
d03f0ca
remove unused methods: PrepareAndExecuteNet, ConfigureOriginalInput a…
gzmkl Jun 4, 2018
f369de2
code refactoring per Rasmus's suggestions on PR 19754
gzmkl Jun 12, 2018
9aca063
code refactoring per Rasmus's suggestions on PR 19754
gzmkl Jun 12, 2018
ff9bf67
code refactoring per Rasmus's suggestions on PR 19754
gzmkl Jun 12, 2018
e948266
[Intel-MKL] Support for N-D Transpose using MKL-DNN
nhasabni Jun 15, 2018
c299b9c
[Intel MKL] Optimized implementation of GatherND using OpenMP
nhasabni Jun 22, 2018
5b19c88
Merge branch 'master' into primreuse_batch_norm
agramesh1 Jul 4, 2018
7718499
Merge branch 'master' into primreuse_pooling
agramesh1 Jul 4, 2018
3ee53e6
Merge branch 'master' into primreuse_relu
agramesh1 Jul 5, 2018
d064d03
Fix a typo during code refactoring
yiqianglee Jul 11, 2018
f814e24
Replace to use fast reorder path in MklRelu op.
yiqianglee Jul 15, 2018
238eb51
Replace to call fast reorder path in MklPooling op.
yiqianglee Jul 15, 2018
85aa931
Replace to call fast reorder path in MklBN op.
yiqianglee Jul 15, 2018
d490493
Code refactoring per comments.
yiqianglee Jul 18, 2018
51877bd
Upgrading to MKL-DNN v0.15
mahmoud-abuzaina Jul 20, 2018
0298660
Merge branch 'primreuse_relu' into latest_optimizations
aramesh1 Jul 22, 2018
f4906fb
Merge branch 'primreuse_batch_norm' into latest_optimizations
aramesh1 Jul 22, 2018
9f2b069
Merge branch 'primreuse_pooling' into latest_optimizations
aramesh1 Jul 22, 2018
2277de4
Merge branch 'nhasabni/mkldnn-transposend' into latest_optimizations
aramesh1 Jul 22, 2018
871c97f
Merge branch 'nhasabni/gathernd' into latest_optimizations
aramesh1 Jul 22, 2018
08c68df
Merge branch 'pr-upgrading-mkl-dnn-to-v0.15' into latest_optimizations
aramesh1 Jul 22, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions tensorflow/compiler/tf2xla/kernels/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ package(

load("//tensorflow:tensorflow.bzl", "tf_copts")
load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
load(
"//third_party/mkl:build_defs.bzl",
"if_mkl",
)

tf_kernel_library(
name = "xla_ops",
Expand Down Expand Up @@ -151,8 +155,14 @@ tf_kernel_library(
"//tensorflow/core/kernels:sparse_to_dense_op",
"//tensorflow/core/kernels:stack_ops",
"//tensorflow/core/kernels:training_ops",
"//tensorflow/core/kernels:transpose_op",
],
] + if_mkl(
[
"//tensorflow/core/kernels:mkl_transpose_op",
],
[
"//tensorflow/core/kernels:transpose_op",
],
),
)

tf_kernel_library(
Expand Down
42 changes: 29 additions & 13 deletions tensorflow/core/kernels/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,14 @@ cc_library(
":split_v_op",
":strided_slice_op",
":tile_ops",
":transpose_op",
] + if_mkl(
[
":mkl_transpose_op",
],
[
":transpose_op",
],
) + [
":unique_op",
":unpack_op",
":unravel_index_op",
Expand Down Expand Up @@ -891,18 +898,27 @@ tf_kernel_library(
deps = ARRAY_DEPS,
)

tf_kernel_library(
name = "transpose_op",
srcs = [
"transpose_op.cc",
] + if_mkl([
"mkl_transpose_op.cc",
]),
hdrs = ["transpose_op.h"],
deps = ARRAY_DEPS + if_mkl([
"//third_party/mkl:intel_binary_blob",
"@mkl_dnn",
]),
if_mkl(
[tf_mkl_kernel_library(
name = "mkl_transpose_op",
srcs = [
"transpose_op.cc",
"mkl_transpose_op.cc",
],
hdrs = ["transpose_op.h"],
deps = ARRAY_DEPS + if_mkl([
"//third_party/mkl:intel_binary_blob",
"@mkl_dnn",
]),
)],
[tf_kernel_library(
name = "transpose_op",
srcs = [
"transpose_op.cc",
],
hdrs = ["transpose_op.h"],
deps = ARRAY_DEPS,
)],
)

tf_kernel_library(
Expand Down
15 changes: 15 additions & 0 deletions tensorflow/core/kernels/gather_nd_op_cpu_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,25 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
#endif
generator::GatherNdSliceGenerator<T, Index, IXDIM> gather_nd_generator(
slice_size, Tindices, Tparams, Tout, &error_loc);

#ifdef INTEL_MKL
// Eigen implementation below is not highly performant. gather_nd_generator
// does not seem to be called in parallel, leading to very poor performance.
// Additionally, since it uses scalar (Tscratch) to invoke 'generate', it
// needs to go through redundant operations like 'reshape', 'broadcast' and
// 'sum'. OpenMP loop below essentially does same thing as Eigen code, but
// is considerably more efficient.
#pragma omp parallel for
for (Eigen::DenseIndex i = 0; i < batch_size; i++) {
const Eigen::array<Eigen::DenseIndex, 1> loc = i;
gather_nd_generator(loc);
}
#else
Tscratch.device(d) = Tscratch.reshape(reshape_dims)
.broadcast(broadcast_dims)
.generate(gather_nd_generator)
.sum();
#endif

// error_loc() returns -1 if there's no out-of-bounds index,
// otherwise it returns the location of an OOB index in Tindices.
Expand Down
Loading