Skip to content

Commit 73bdf47

Browse files
authored
【CUDA Kernel No.120】cal_aux_loss_grad算子Kernel修复-part (#75637)
1 parent b4f9f2a commit 73bdf47

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

paddle/phi/kernels/legacy/gpu/cal_aux_loss_grad_kernel.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/phi/core/dense_tensor.h"
16-
15+
#include "paddle/phi/kernels/legacy/gpu/cal_aux_loss_grad_kernel.h"
1716
#include "paddle/phi/backends/gpu/gpu_context.h"
17+
#include "paddle/phi/core/dense_tensor.h"
1818
#include "paddle/phi/core/kernel_registry.h"
1919

2020
#include "paddle/phi/kernels/funcs/math_cuda_utils.h"
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "paddle/phi/core/dense_tensor.h"
18+
#include "paddle/phi/core/device_context.h"
19+
20+
namespace phi {
21+
22+
template <typename T, typename Context>
23+
void CalAuxLossGradKernel(const Context& dev_ctx,
24+
const DenseTensor& gate_prob,
25+
const DenseTensor& seqlen_float,
26+
const DenseTensor& ce,
27+
const DenseTensor& l_aux_loss_grad,
28+
const int64_t num_experts,
29+
const bool use_group,
30+
const int64_t moe_k,
31+
DenseTensor* gate_prob_grad);
32+
33+
} // namespace phi

0 commit comments

Comments
 (0)