fix some cuda memory access error.

wzzju · wzzju · commit 63acc7bf9c32 · 2020-12-08T08:34:13.000Z
diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.cc b/paddle/fluid/operators/amp/update_loss_scaling_op.cc
@@ -141,12 +141,12 @@ class LazyZeros<platform::CPUDeviceContext, T> {
                   const bool* found_inf_data,
                   const std::vector<const framework::Tensor*>& xs,
                   const std::vector<framework::Tensor*>& outs) const {
-    if (*found_inf_data) {
-      VLOG(1) << "-- UpdateLossScaling: Infinite values are found in grads. --";
-      for (size_t i = 0; i < xs.size(); ++i) {
-        auto* out = outs[i];
-        T* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
-        int num = out->numel();
+    for (size_t i = 0; i < xs.size(); ++i) {
+      auto* out = outs[i];
+      T* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
+      int num = out->numel();
+      if (*found_inf_data) {
+        VLOG(1) << "-- UpdateLossScaling: Find infinite grads. --";
         std::memset(out_data, 0, num * sizeof(T));
       }
     }
diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.cu b/paddle/fluid/operators/amp/update_loss_scaling_op.cu
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <thrust/fill.h>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/amp/update_loss_scaling_op.h"
@@ -34,7 +33,7 @@ __global__ void GpuUpdateLossScaling(
 }
 
 template <typename T>
-__global__ void FillIf(T* data, const int num, const T& value,
+__global__ void FillIf(T* data, const int64_t num, const T value,
                        const bool* has_inf) {
   if (*has_inf) {
     int tid = threadIdx.x + blockIdx.x * blockDim.x;
@@ -71,7 +70,7 @@ class LazyZeros<platform::CUDADeviceContext, T> {
     for (size_t i = 0; i < xs.size(); ++i) {
       auto* out = outs[i];
       T* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
-      int num = out->numel();
+      int64_t num = out->numel();
       int block = 1024;
       int grid = (block - 1 + num) / block;
       FillIf<<<grid, block, 0, dev_ctx.stream()>>>(
diff --git a/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py b/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py
@@ -35,7 +35,7 @@ def setUp(self):
         }
 
         self.outputs = {
-            'Out': [('out0', np.zeros_like(x))],
+            'Out': [('out0', x)],
             'LossScaling': self.prev_loss_scaling * self.incr_ratio,
             'OutGoodSteps': self.zero_steps,
             'OutBadSteps': self.zero_steps

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ def setUp(self):`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`self.outputs = {`
`38`		`- 'Out': [('out0', np.zeros_like(x))],`
	`38`	`+ 'Out': [('out0', x)],`
`39`	`39`	`'LossScaling': self.prev_loss_scaling * self.incr_ratio,`
`40`	`40`	`'OutGoodSteps': self.zero_steps,`
`41`	`41`	`'OutBadSteps': self.zero_steps`