Merge pull request #110 from FluxML/sf/gc_preserve

staticfloat · web-flow · commit 6b987ee87ddf · 2019-04-10T23:39:36.000-07:00
Preserve pointers before sending them into `gemm!()`
diff --git a/src/impl/conv_im2col.jl b/src/impl/conv_im2col.jl
@@ -50,10 +50,12 @@ which should eliminate any need for large allocations within this method.
         # We invoke `@timeit_debug` on the outside of `im2col!()` because inference
         # doesn't like us putting it on the inside.
         @timeit_debug to "im2col!" im2col!(col, view(x, :, :, :, :, batch_idx), cdims)
-        col_ptr = pointer(col)
-        w_ptr = pointer(w)
-        y_ptr = pointer(y, (batch_idx - 1)*M*N + 1)
-        @timeit_debug to "gemm!" gemm!(Val(false), Val(false), M, N, K, alpha, col_ptr, w_ptr, beta, y_ptr)
+        GC.@preserve col, w, y, begin
+            col_ptr = pointer(col)
+            w_ptr = pointer(w)
+            y_ptr = pointer(y, (batch_idx - 1)*M*N + 1)
+            @timeit_debug to "gemm!" gemm!(Val(false), Val(false), M, N, K, alpha, col_ptr, w_ptr, beta, y_ptr)
+        end
     end
     return y
 end
@@ -96,10 +98,12 @@ See the documentation for `conv_im2col!()` for explanation of optional parameter
         # We invoke `@timeit_debug` on the outside of `im2col!()` because inference
         # doesn't like us putting it on the inside.
         @timeit_debug to "im2col!" im2col!(col, view(x, :, :, :, :, batch_idx), cdims)
-        col_ptr = pointer(col)
-        dy_ptr = pointer(dy,(batch_idx - 1)*K*N + 1)
-        dw_ptr = pointer(dw)
-        @timeit_debug to "gemm!" gemm!(Val(true), Val(false), M, N, K, alpha, col_ptr, dy_ptr, beta, dw_ptr)
+        GC.@preserve col, dw, dy, begin
+            col_ptr = pointer(col)
+            dy_ptr = pointer(dy,(batch_idx - 1)*K*N + 1)
+            dw_ptr = pointer(dw)
+            @timeit_debug to "gemm!" gemm!(Val(true), Val(false), M, N, K, alpha, col_ptr, dy_ptr, beta, dw_ptr)
+        end
 
         # Because we accumulate over batches in this loop, we must set `beta` equal
         # to `1.0` from this point on.
@@ -141,10 +145,12 @@ See the documentation for `conv_im2col!()` for explanation of other parameters.
     K = channels_out(cdims)
 
     @inbounds for batch_idx in 1:size(dx, 5)
-        dy_ptr = pointer(dy, (batch_idx - 1)*M*K + 1)
-        w_ptr = pointer(w)
-        col_ptr = pointer(col)
-        @timeit_debug to "gemm!" gemm!(Val(false), Val(true), M, N, K, alpha, dy_ptr, w_ptr, T(0), col_ptr)
+        GC.@preserve col, w, dy, begin
+            dy_ptr = pointer(dy, (batch_idx - 1)*M*K + 1)
+            w_ptr = pointer(w)
+            col_ptr = pointer(col)
+            @timeit_debug to "gemm!" gemm!(Val(false), Val(true), M, N, K, alpha, dy_ptr, w_ptr, T(0), col_ptr)
+        end
         @timeit_debug to "col2im!" col2im!(view(dx, :, :, :, :, batch_idx), col, cdims)
     end
     return dx
diff --git a/src/impl/depthwiseconv_im2col.jl b/src/impl/depthwiseconv_im2col.jl
@@ -35,10 +35,12 @@ depthwiseconv_im2col!
         # We do a separate convolution for each channel in x, as we must
         for c_in in 1:channels_in(cdims)
             # Walk each pointer forward as we process each input channel
-            col_ptr = pointer(col, (c_in-1)*M*K+1)
-            w_ptr = pointer(w, (c_in-1)*K*N+1)
-            y_ptr = pointer(y, ((batch_idx - 1)*channels_in(cdims) + c_in - 1)*M*N + 1)
-            gemm!(Val(false), Val(false), M, N, K, alpha, col_ptr, w_ptr, beta, y_ptr)
+            GC.@preserve col, w, y, begin
+                col_ptr = pointer(col, (c_in-1)*M*K+1)
+                w_ptr = pointer(w, (c_in-1)*K*N+1)
+                y_ptr = pointer(y, ((batch_idx - 1)*channels_in(cdims) + c_in - 1)*M*N + 1)
+                gemm!(Val(false), Val(false), M, N, K, alpha, col_ptr, w_ptr, beta, y_ptr)
+            end
         end
     end
     return y
@@ -71,11 +73,12 @@ See the documentation for `conv_im2col!()` for explanation of optional parameter
         # We do a separate convolution for each channel in x, as we must
         for c_in in 1:channels_in(cdims)
             # Walk each pointer forward as we process each input channel
-            col_ptr = pointer(col, (c_in - 1)*M*K + 1)
-            dy_ptr = pointer(dy, (batch_idx - 1)*N*K*channels_in(cdims) + (c_in - 1)*K*N + 1)
-            dw_ptr = pointer(dw, (c_in - 1)*M*N + 1)
-
-            gemm!(Val(true), Val(false), M, N, K, alpha, col_ptr, dy_ptr, beta, dw_ptr)
+            GC.@preserve col, dw, dy, begin
+                col_ptr = pointer(col, (c_in - 1)*M*K + 1)
+                dy_ptr = pointer(dy, (batch_idx - 1)*N*K*channels_in(cdims) + (c_in - 1)*K*N + 1)
+                dw_ptr = pointer(dw, (c_in - 1)*M*N + 1)
+                gemm!(Val(true), Val(false), M, N, K, alpha, col_ptr, dy_ptr, beta, dw_ptr)
+            end
         end
 
         # Because we accumulate over batches in this loop, we must set `beta` equal
@@ -107,13 +110,15 @@ See the documentation for `conv_im2col!()` for explanation of optional parameter
     @inbounds for batch_idx in 1:size(dx)[end]
         # We do a separate convolution for each channel in x, as we must
         for cidx in 1:channels_in(cdims)
-            # Walk each pointer forward as we process each input channel
-            dy_ptr = pointer(dy, (batch_idx - 1)*M*K*channels_in(cdims)+(cidx - 1)*K*M + 1)
-            w_ptr = pointer(w, (cidx - 1)*K*N + 1)
-            col_ptr = pointer(col, (cidx - 1)*M*N + 1)
-            gemm!(Val(false), Val(true), M, N, K, alpha, dy_ptr, w_ptr, T(0), col_ptr)
+            GC.@preserve col, w, dy, begin
+                # Walk each pointer forward as we process each input channel
+                dy_ptr = pointer(dy, (batch_idx - 1)*M*K*channels_in(cdims)+(cidx - 1)*K*M + 1)
+                w_ptr = pointer(w, (cidx - 1)*K*N + 1)
+                col_ptr = pointer(col, (cidx - 1)*M*N + 1)
+                gemm!(Val(false), Val(true), M, N, K, alpha, dy_ptr, w_ptr, T(0), col_ptr)
+            end
         end
         @timeit_debug to "col2im!" col2im!(view(dx, :, :, :, :, batch_idx), col, cdims)
     end
     return dx
-end
+end