[Prim] Add index_select_double_grad (PaddlePaddle#71352)

HydrogenSulfate · web-flow · commit f6866ea3bb44 · 2025-03-05T14:21:56.000+08:00
* add UT: test_selected_high_order_derivative

* remove default axis = 0

* update error msg

* update op_compat.yaml

* update UT

* update code

* only run UT in gpu
diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -1029,9 +1029,17 @@ def BackwardValidationCheck(self):
 
         max_grad_tensor_position = -1
         for _, (_, _, pos) in backward_grad_inputs_map.items():
-            assert pos > max_fwd_input_position, AssertMessage(
-                pos, max_fwd_input_position
-            )
+            if pos <= max_fwd_input_position:
+                err_msg = AssertMessage(pos, max_fwd_input_position)
+                if IsInvokeForwardApi(
+                    self.grad_api_contents, self.forward_apis_dict
+                ):
+                    err_msg += (
+                        f"\n\nNOTE: '{self.backward_api_name}' is an invoke api, "
+                        "please ensure that the parameters from `forward` "
+                        "are placed at the front in the `args` section.\n"
+                    )
+                raise AssertionError(err_msg)
             max_grad_tensor_position = max(max_grad_tensor_position, pos)
 
         max_attr_position = -1
diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -1672,6 +1672,12 @@
   data_transform :
     skip_transform : index
 
+- backward_op : index_select_double_grad
+  forward : index_select_grad (Tensor x, Tensor index, Tensor grad_out, int axis) -> Tensor(grad_x)
+  args : (Tensor index, Tensor grad_x_grad, int axis)
+  output : Tensor(grad_out_grad)
+  invoke : index_select(grad_x_grad, index, axis)
+
 - backward_op : index_select_grad
   forward : index_select(Tensor x, Tensor index, int axis) -> Tensor(out)
   args : (Tensor x, Tensor index, Tensor out_grad, int axis)
@@ -1685,6 +1691,7 @@
   no_need_buffer : x
   data_transform :
     skip_transform : index
+  backward: index_select_double_grad
 
 - backward_op : index_select_strided_grad
   forward : index_select_strided(Tensor x, int64_t index, int axis) -> Tensor(out)
diff --git a/paddle/phi/ops/yaml/op_compat.yaml b/paddle/phi/ops/yaml/op_compat.yaml
@@ -2044,6 +2044,7 @@
     out : Out
 
 - op : index_select
+  backward : index_select_grad, index_select_double_grad
   inputs :
     {x : X, index : Index}
   outputs :
diff --git a/test/legacy_test/gradient_checker.py b/test/legacy_test/gradient_checker.py
@@ -30,6 +30,17 @@ def _product(t):
     return int(np.prod(t))
 
 
+# data type like int32, int64, bool, that do not requires grad
+DTYPE_REQUIRES_GRAD = [
+    paddle.float16,
+    paddle.float32,
+    paddle.float64,
+    core.DataType.FLOAT16,
+    core.DataType.FLOAT32,
+    core.DataType.FLOAT64,
+]
+
+
 def dtype_to_np_dtype(dtype):
     if dtype == paddle.float32 or dtype == core.DataType.FLOAT32:
         return np.float32
@@ -84,7 +95,7 @@ def var_to_np_array_in_scope(scope, place, name):
 
 def make_jacobian(x, y_size, np_dtype):
     if isinstance(x, (base.framework.Variable, paddle.pir.Value)):
-        return np.zeros((_product(x.shape), y_size), dtype=np_dtype)
+        return np.zeros([_product(x.shape), y_size], dtype=np_dtype)
     elif isinstance(x, Sequence):
         jacobians = list(
             filter(
@@ -260,10 +271,15 @@ def run():
     x_name = x.get_defining_op().attrs()['name']
     x_shape = x.shape
     x_size = _product(x_shape)
-    np_type = dtype_to_np_dtype(x.dtype)
-    np_t = np.array(feeds[x_name]).astype(np_type)
-    np_t = np_t.flatten()
-    jacobian = [make_jacobian(x, _product(yi.shape), np_type) for yi in y]
+    if x.dtype in DTYPE_REQUIRES_GRAD:
+        np_type = dtype_to_np_dtype(x.dtype)
+        np_t = np.array(feeds[x_name]).astype(np_type)
+        np_t = np_t.flatten()
+        jacobian = [make_jacobian(x, _product(yi.shape), np_type) for yi in y]
+    else:
+        np_type = np.float32  # temporarily set to float32
+        jacobian = [make_jacobian(x, _product(yi.shape), np_type) for yi in y]
+        return jacobian
 
     for i in range(x_size):
         orig = np_t[i]
diff --git a/test/legacy_test/test_nn_grad.py b/test/legacy_test/test_nn_grad.py
@@ -551,6 +551,50 @@ def test_grad(self):
             self.func(p)
 
 
+class TestIndexSelectDoubleGradCheck(unittest.TestCase):
+    @prog_scope()
+    def func(self, place):
+        x_shape = [2, 2, 2, 2]
+        axis = 2
+        index_shape = [3]
+        dtype = np.float64
+
+        x = paddle.static.data('x', x_shape, dtype)
+        x.persistable = True
+        x.stop_gradient = False
+        index = paddle.static.data('index', index_shape, 'int64')
+        index.persistable = True
+        out = paddle.index_select(x, index, axis)
+
+        x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
+        index_arr = np.random.uniform(
+            -x_shape[axis], x_shape[axis], index_shape
+        ).astype('int64')
+        gradient_checker.double_grad_check(
+            [x, index], out, x_init=[x_arr, index_arr], place=place
+        )
+
+        def index_select_wrapper(args):
+            return paddle.index_select(*args, axis=axis)
+
+        gradient_checker.double_grad_check_for_dygraph(
+            index_select_wrapper,
+            [x, index],
+            out,
+            x_init=[x_arr, index_arr],
+            place=place,
+        )
+
+    def test_grad(self):
+        places = []
+        # free(): invalid next size (fast) may occurs when
+        # execute in CPU
+        if core.is_compiled_with_cuda():
+            places.append(base.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 class TestAvgPool2DDoubleGradCheckCase1(unittest.TestCase):
 
     @prog_scope()