@@ -25,12 +25,14 @@ limitations under the License.
25
25
#include " tensorflow/core/framework/tensor_shape.h"
26
26
#include " tensorflow/core/framework/tensor_types.h"
27
27
#include " tensorflow/core/framework/types.h"
28
+ #include " tensorflow/core/kernels/bounds_check.h"
28
29
#include " tensorflow/core/kernels/depthwise_conv_op.h"
29
30
#include " tensorflow/core/kernels/ops_util.h"
30
31
#include " tensorflow/core/lib/core/status.h"
31
32
#include " tensorflow/core/platform/logging.h"
32
33
#include " tensorflow/core/platform/types.h"
33
34
#include " tensorflow/core/util/padding.h"
35
+ #include " tensorflow/core/util/tensor_format.h"
34
36
#include " tensorflow/core/util/work_sharder.h"
35
37
36
38
#if GOOGLE_CUDA
@@ -62,23 +64,51 @@ typedef Eigen::GpuDevice GPUDevice;
62
64
context, batch == out_backprop.dim_size(0 ), \
63
65
errors::InvalidArgument ( \
64
66
label, " : input and out_backprop must have the same batch size" )); \
65
- const int64 input_rows = input_shape.dim_size(1 ); \
66
- const int64 input_cols = input_shape.dim_size(2 ); \
67
+ const int64 input_rows_raw = GetTensorDim(input_shape, data_format_, ' H' ); \
68
+ OP_REQUIRES ( \
69
+ context, \
70
+ FastBoundsCheck (input_rows_raw, std::numeric_limits<int32>::max()), \
71
+ errors::InvalidArgument(" Input rows too large" )); \
72
+ const int32 input_rows = static_cast <int32>(input_rows_raw); \
73
+ const int64 input_cols_raw = GetTensorDim(input_shape, data_format_, ' W' ); \
74
+ OP_REQUIRES ( \
75
+ context, \
76
+ FastBoundsCheck (input_cols_raw, std::numeric_limits<int32>::max()), \
77
+ errors::InvalidArgument(" Input cols too large" )); \
78
+ const int32 input_cols = static_cast <int32>(input_cols_raw); \
67
79
const int64 filter_rows = filter_shape.dim_size(0 ); \
68
80
const int64 filter_cols = filter_shape.dim_size(1 ); \
69
- const int64 output_rows = out_backprop.dim_size(1 ); \
70
- const int64 output_cols = out_backprop.dim_size(2 ); \
71
- const int64 in_depth = input_shape.dim_size(3 ); \
81
+ const int64 output_rows_raw = \
82
+ GetTensorDim (out_backprop.shape(), data_format_, ' H' ); \
83
+ OP_REQUIRES ( \
84
+ context, \
85
+ FastBoundsCheck (output_rows_raw, std::numeric_limits<int32>::max()), \
86
+ errors::InvalidArgument(" Output rows too large" )); \
87
+ const int32 output_rows = static_cast <int32>(output_rows_raw); \
88
+ const int64 output_cols_raw = \
89
+ GetTensorDim (out_backprop.shape(), data_format_, ' W' ); \
90
+ OP_REQUIRES ( \
91
+ context, \
92
+ FastBoundsCheck (output_cols_raw, std::numeric_limits<int32>::max()), \
93
+ errors::InvalidArgument(" Output cols too large" )); \
94
+ const int32 output_cols = static_cast <int32>(output_cols_raw); \
95
+ const int64 in_depth = GetTensorDim(input_shape, data_format_, ' C' ); \
72
96
OP_REQUIRES (context, in_depth == filter_shape.dim_size(2 ), \
73
97
errors::InvalidArgument ( \
74
98
label, " : input and filter must have the same in_depth" )); \
75
99
const int64 depth_multiplier = filter_shape.dim_size(3 ); \
76
- const int64 out_depth = out_backprop.dim_size(3 ); \
100
+ const int64 out_depth_raw = \
101
+ GetTensorDim (out_backprop.shape(), data_format_, ' C' ); \
102
+ OP_REQUIRES ( \
103
+ context, \
104
+ FastBoundsCheck (out_depth_raw, std::numeric_limits<int32>::max()), \
105
+ errors::InvalidArgument(" Output depth too large" )); \
106
+ const int32 out_depth = static_cast <int32>(out_depth_raw); \
77
107
OP_REQUIRES ( \
78
108
context, (depth_multiplier * in_depth) == out_depth, \
79
109
errors::InvalidArgument ( \
80
110
label, " : depth_multiplier * in_depth not equal to out_depth" )); \
81
- const auto stride = strides_[ 1 ]; \
111
+ const auto stride = stride_; \
82
112
int64 out_rows = 0 , out_cols = 0 , pad_rows = 0 , pad_cols = 0 ; \
83
113
OP_REQUIRES_OK (context, \
84
114
GetWindowedOutputSize (input_rows, filter_rows, stride, \
@@ -343,7 +373,12 @@ struct LaunchDepthwiseConvBackpropInputOp<CPUDevice, T> {
343
373
344
374
static void launch(OpKernelContext* ctx, const DepthwiseArgs& args,
345
375
const T* out_backprop, const T* depthwise_filter,
346
- T* in_backprop) {
376
+ T* in_backprop, TensorFormat data_format) {
377
+ OP_REQUIRES(
378
+ ctx, data_format == FORMAT_NHWC,
379
+ errors::Unimplemented(
380
+ " Depthwise convolution on CPU is only supported for NHWC format" ));
381
+
347
382
static const int64 kPacketSize = (sizeof(Packet) / sizeof(T));
348
383
349
384
// Pad 'depthwise_filter' to vector register width (if needed).
@@ -482,16 +517,18 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
482
517
template <typename T>
483
518
struct DepthwiseConv2dBackpropInputGPULaunch {
484
519
static void Run(const GPUDevice& d, const DepthwiseArgs args,
485
- const T* out_backprop, const T* filter, T* in_backprop);
520
+ const T* out_backprop, const T* filter, T* in_backprop,
521
+ TensorFormat data_format);
486
522
};
487
523
488
524
template <typename T>
489
525
struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, T> {
490
526
static void launch(OpKernelContext* ctx, const DepthwiseArgs args,
491
- const T* out_backprop, const T* filter, T* in_backprop) {
527
+ const T* out_backprop, const T* filter, T* in_backprop,
528
+ TensorFormat data_format) {
492
529
const GPUDevice& d = ctx->eigen_device<GPUDevice>();
493
- DepthwiseConv2dBackpropInputGPULaunch<T>().Run(d, args, out_backprop,
494
- filter, in_backprop);
530
+ DepthwiseConv2dBackpropInputGPULaunch<T>().Run(
531
+ d, args, out_backprop, filter, in_backprop, data_format );
495
532
auto stream = ctx->op_device_context()->stream();
496
533
OP_REQUIRES(ctx, stream->ok(), errors::Internal(" Launch of gpu kernel for "
497
534
" DepthwiseConv2dBackpropInp"
@@ -511,12 +548,23 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel {
511
548
OP_REQUIRES(context, strides_.size() == 4,
512
549
errors::InvalidArgument(" Sliding window strides field must "
513
550
" specify 4 dimensions" ));
514
- OP_REQUIRES(context, strides_[1] == strides_[2],
551
+
552
+ string data_format;
553
+ OP_REQUIRES_OK(context, context->GetAttr(" data_format" , &data_format));
554
+ OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
555
+ errors::InvalidArgument(" Invalid data format" ));
556
+
557
+ stride_ = GetTensorDim(strides_, data_format_, 'H');
558
+ const int64 stride_w = GetTensorDim(strides_, data_format_, 'W');
559
+ const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
560
+ const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
561
+
562
+ OP_REQUIRES(context, stride_ == stride_w,
515
563
errors::InvalidArgument(
516
564
" Current implementation only supports equal length "
517
565
" strides in the row and column dimensions." ));
518
566
OP_REQUIRES(
519
- context, (strides_[0] == 1 && strides_[3] == 1),
567
+ context, (stride_n == 1 && stride_c == 1),
520
568
errors::InvalidArgument(" Current implementation does not yet support "
521
569
" strides in the batch and depth dimensions." ));
522
570
OP_REQUIRES_OK(context, context->GetAttr(" padding" , &padding_));
@@ -539,7 +587,6 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel {
539
587
input_shape.AddDim(in_sizes_data[i]);
540
588
}
541
589
const TensorShape& filter_shape = filter.shape();
542
-
543
590
EXTRACT_AND_VERIFY_DIMENSIONS(" DepthwiseConv2DBackpropInput" );
544
591
Tensor* in_backprop = nullptr;
545
592
OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
@@ -552,12 +599,15 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel {
552
599
return;
553
600
}
554
601
LaunchDepthwiseConvBackpropInputOp<Device, T>::launch(
555
- context, args, out_backprop_ptr, filter_ptr, in_backprop_ptr);
602
+ context, args, out_backprop_ptr, filter_ptr, in_backprop_ptr,
603
+ data_format_);
556
604
}
557
605
558
606
private:
559
607
std::vector<int32> strides_;
560
608
Padding padding_;
609
+ TensorFormat data_format_;
610
+ int64 stride_;
561
611
562
612
TF_DISALLOW_COPY_AND_ASSIGN(DepthwiseConv2dNativeBackpropInputOp);
563
613
};
@@ -695,8 +745,13 @@ struct LaunchDepthwiseConvBackpropFilterOp<CPUDevice, T> {
695
745
typedef typename Eigen::internal::packet_traits<T>::type Packet;
696
746
697
747
static void launch(OpKernelContext* ctx, const DepthwiseArgs& args,
698
- const T* out_backprop, const T* input,
699
- T* filter_backprop) {
748
+ const T* out_backprop, const T* input, T* filter_backprop,
749
+ TensorFormat data_format) {
750
+ OP_REQUIRES(
751
+ ctx, data_format == FORMAT_NHWC,
752
+ errors::Unimplemented(
753
+ " Depthwise convolution on CPU is only supported for NHWC format" ));
754
+
700
755
static const int64 kPacketSize = (sizeof(Packet) / sizeof(T));
701
756
702
757
const int64 filter_spatial_size = args.filter_rows * args.filter_cols;
@@ -855,14 +910,15 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
855
910
template <typename T>
856
911
struct DepthwiseConv2dBackpropFilterGPULaunch {
857
912
static void Run(const GPUDevice& d, const DepthwiseArgs args,
858
- const T* out_backprop, const T* input, T* filter_backprop);
913
+ const T* out_backprop, const T* input, T* filter_backprop,
914
+ TensorFormat data_format);
859
915
};
860
916
861
917
template <typename T>
862
918
struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, T> {
863
919
static void launch(OpKernelContext* ctx, const DepthwiseArgs args,
864
- const T* out_backprop, const T* input,
865
- T* filter_backprop ) {
920
+ const T* out_backprop, const T* input, T* filter_backprop,
921
+ TensorFormat data_format ) {
866
922
const GPUDevice& d = ctx->eigen_device<GPUDevice>();
867
923
auto stream = ctx->op_device_context()->stream();
868
924
@@ -873,8 +929,8 @@ struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, T> {
873
929
num_filter_backprop);
874
930
stream->ThenMemset32(&filter_bp_ptr, 0, num_filter_backprop * sizeof(T));
875
931
876
- DepthwiseConv2dBackpropFilterGPULaunch<T>().Run(d, args, out_backprop,
877
- input, filter_backprop);
932
+ DepthwiseConv2dBackpropFilterGPULaunch<T>().Run(
933
+ d, args, out_backprop, input, filter_backprop, data_format );
878
934
OP_REQUIRES(ctx, stream->ok(), errors::Internal(" Launch of gpu kernel for "
879
935
" DepthwiseConv2dBackpropFil"
880
936
" terGPULaunch failed" ));
@@ -893,12 +949,23 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel {
893
949
OP_REQUIRES(context, strides_.size() == 4,
894
950
errors::InvalidArgument(" Sliding window strides field must "
895
951
" specify 4 dimensions" ));
896
- OP_REQUIRES(context, strides_[1] == strides_[2],
952
+
953
+ string data_format;
954
+ OP_REQUIRES_OK(context, context->GetAttr(" data_format" , &data_format));
955
+ OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
956
+ errors::InvalidArgument(" Invalid data format" ));
957
+
958
+ stride_ = GetTensorDim(strides_, data_format_, 'H');
959
+ const int64 stride_w = GetTensorDim(strides_, data_format_, 'W');
960
+ const int64 stride_n = GetTensorDim(strides_, data_format_, 'N');
961
+ const int64 stride_c = GetTensorDim(strides_, data_format_, 'C');
962
+
963
+ OP_REQUIRES(context, stride_ == stride_w,
897
964
errors::InvalidArgument(
898
965
" Current implementation only supports equal length "
899
966
" strides in the row and column dimensions." ));
900
967
OP_REQUIRES(
901
- context, (strides_[0] == 1 && strides_[3] == 1),
968
+ context, (stride_n == 1 && stride_c == 1),
902
969
errors::InvalidArgument(" Current implementation does not yet support "
903
970
" strides in the batch and depth dimensions." ));
904
971
OP_REQUIRES_OK(context, context->GetAttr(" padding" , &padding_));
@@ -935,12 +1002,15 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel {
935
1002
return;
936
1003
}
937
1004
LaunchDepthwiseConvBackpropFilterOp<Device, T>::launch(
938
- context, args, out_backprop_ptr, input_ptr, filter_backprop_ptr);
1005
+ context, args, out_backprop_ptr, input_ptr, filter_backprop_ptr,
1006
+ data_format_);
939
1007
}
940
1008
941
1009
private:
942
1010
std::vector<int32> strides_;
943
1011
Padding padding_;
1012
+ TensorFormat data_format_;
1013
+ int64 stride_;
944
1014
945
1015
TF_DISALLOW_COPY_AND_ASSIGN(DepthwiseConv2dNativeBackpropFilterOp);
946
1016
};
0 commit comments