-
Notifications
You must be signed in to change notification settings - Fork 751
/
Copy pathopencv-cudnn8.patch
79 lines (71 loc) · 3.29 KB
/
opencv-cudnn8.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
--- a/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp.orig 2020-06-08 17:01:47.788349629 +0300
+++ b/modules/dnn/src/cuda4dnn/csl/cudnn/convolution.hpp 2020-06-08 16:50:51.579297388 +0300
@@ -260,10 +260,10 @@
const TensorDescriptor<T>& output)
{
CUDA4DNN_CHECK_CUDNN(
- cudnnGetConvolutionForwardAlgorithm(
+ cudnnGetConvolutionForwardAlgorithm_v7(
handle.get(),
input.get(), filter.get(), conv.get(), output.get(),
- CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
+ 1,
0, /* no memory limit */
&algo
)
@@ -273,7 +273,7 @@
cudnnGetConvolutionForwardWorkspaceSize(
handle.get(),
input.get(), filter.get(), conv.get(), output.get(),
- algo, &workspace_size
+ algo.algo, &workspace_size
)
);
}
@@ -281,13 +281,13 @@
ConvolutionAlgorithm& operator=(const ConvolutionAlgorithm&) = default;
ConvolutionAlgorithm& operator=(ConvolutionAlgorithm&& other) = default;
- cudnnConvolutionFwdAlgo_t get() const noexcept { return algo; }
+ cudnnConvolutionFwdAlgo_t get() const noexcept { return algo.algo; }
/** number of bytes of workspace memory required by the algorithm */
std::size_t get_workspace_size() const noexcept { return workspace_size; }
private:
- cudnnConvolutionFwdAlgo_t algo;
+ cudnnConvolutionFwdAlgoPerf_t algo;
std::size_t workspace_size;
};
--- a/modules/dnn/src/cuda4dnn/csl/cudnn/transpose_convolution.hpp.orig 2020-06-08 17:01:35.797383567 +0300
+++ b/modules/dnn/src/cuda4dnn/csl/cudnn/transpose_convolution.hpp 2020-06-08 16:52:44.001957698 +0300
@@ -36,10 +36,10 @@
const TensorDescriptor<T>& output)
{
CUDA4DNN_CHECK_CUDNN(
- cudnnGetConvolutionBackwardDataAlgorithm(
+ cudnnGetConvolutionBackwardDataAlgorithm_v7(
handle.get(),
filter.get(), input.get(), conv.get(), output.get(),
- CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
+ 1,
0, /* no memory limit */
&dalgo
)
@@ -49,7 +49,7 @@
cudnnGetConvolutionBackwardDataWorkspaceSize(
handle.get(),
filter.get(), input.get(), conv.get(), output.get(),
- dalgo, &workspace_size
+ dalgo.algo, &workspace_size
)
);
}
@@ -57,12 +57,12 @@
TransposeConvolutionAlgorithm& operator=(const TransposeConvolutionAlgorithm&) = default;
TransposeConvolutionAlgorithm& operator=(TransposeConvolutionAlgorithm&& other) = default;
- cudnnConvolutionBwdDataAlgo_t get() const noexcept { return dalgo; }
+ cudnnConvolutionBwdDataAlgo_t get() const noexcept { return dalgo.algo; }
std::size_t get_workspace_size() const noexcept { return workspace_size; }
private:
- cudnnConvolutionBwdDataAlgo_t dalgo;
+ cudnnConvolutionBwdDataAlgoPerf_t dalgo;
std::size_t workspace_size;
};