Skip to content

Commit 0cc416a

Browse files
authored
【Metax】fix patch (#178)
1 parent 3411e40 commit 0cc416a

File tree

6 files changed

+29
-15
lines changed

6 files changed

+29
-15
lines changed

backends/metax_gpu/build.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
# limitations under the License.
1717

1818
set -e
19+
20+
# install requirement.txt
21+
pip install -r requirement.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
22+
1923
# uninstall paddle
2024
pip uninstall paddlepaddle -y
2125

@@ -24,7 +28,7 @@ pip uninstall paddlepaddle -y
2428
# git submodule sync --recursive && git submodule update --init --recursive
2529

2630

27-
pip install parameterized safetensors==0.6.2 -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
31+
# pip install parameterized safetensors==0.6.2 -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
2832
# install paddle
2933

3034

backends/metax_gpu/build_in_metax.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
set -e
1919

20+
# install requirement.txt
21+
pip install -r requirement.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
22+
2023
# init paddle
2124
git submodule sync --recursive && git submodule update --init --recursive
2225

backends/metax_gpu/build_private_CI.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
# limitations under the License.
1717

1818
set -e
19+
20+
# install requirement.txt
21+
pip install -r requirement.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
22+
1923
# uninstall paddle
2024
pip uninstall paddlepaddle -y
2125

@@ -49,7 +53,7 @@ echo "✅ 脚本执行完毕!"
4953
echo "📌 已撤销本地修改,并更新到 Paddle 最新的 develop (dev) 分支代码。"
5054

5155

52-
pip install parameterized safetensors==0.6.2 -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
56+
# pip install parameterized safetensors==0.6.2 -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
5357
# install paddle
5458

5559
python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/

backends/metax_gpu/patch/paddle.patch

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -229,15 +229,15 @@ index c5309e7e11..3328571380 100644
229229
} \
230230
}; \
231231
diff --git a/paddle/phi/backends/gpu/cuda/cuda_device_function.h b/paddle/phi/backends/gpu/cuda/cuda_device_function.h
232-
index 4ff2e528a9..23f7f4b583 100644
232+
index 092365a961..23d3b65dc6 100644
233233
--- a/paddle/phi/backends/gpu/cuda/cuda_device_function.h
234234
+++ b/paddle/phi/backends/gpu/cuda/cuda_device_function.h
235235
@@ -1,3 +1,4 @@
236236
+// 2024 - Modified by MetaX Integrated Circuits (Shanghai) Co., Ltd. All Rights Reserved.
237237
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
238238

239239
Licensed under the Apache License, Version 2.0 (the "License");
240-
@@ -25,7 +26,7 @@ namespace phi {
240+
@@ -23,7 +24,7 @@ namespace phi {
241241
namespace backends {
242242
namespace gpu {
243243

@@ -246,7 +246,7 @@ index 4ff2e528a9..23f7f4b583 100644
246246
#define CREATE_SHFL_MASK(mask, predicate) \
247247
mask = __ballot_sync(FULL_WARP_MASK, (predicate))
248248

249-
@@ -45,12 +46,12 @@ namespace gpu {
249+
@@ -43,12 +44,12 @@ namespace gpu {
250250

251251
template <typename T>
252252
__forceinline__ __device__ T
@@ -261,7 +261,7 @@ index 4ff2e528a9..23f7f4b583 100644
261261
T val,
262262
int width = warpSize) {
263263
return __shfl_xor_sync(mask, val, width);
264-
@@ -58,14 +59,14 @@ __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask,
264+
@@ -56,14 +57,14 @@ __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask,
265265

266266
template <>
267267
__forceinline__ __device__ phi::dtype::float16 CudaShuffleDownSync(
@@ -278,7 +278,7 @@ index 4ff2e528a9..23f7f4b583 100644
278278
#if defined(PADDLE_CUDA_BF16)
279279
return phi::dtype::bfloat16(__shfl_down_sync(
280280
mask, val.to_nv_bfloat16(), static_cast<unsigned>(delta), width));
281-
@@ -77,7 +78,7 @@ __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleDownSync(
281+
@@ -75,7 +76,7 @@ __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleDownSync(
282282

283283
template <>
284284
__forceinline__ __device__ phi::dtype::complex<float> CudaShuffleDownSync(
@@ -287,7 +287,7 @@ index 4ff2e528a9..23f7f4b583 100644
287287
float real = static_cast<float>(__shfl_down_sync(
288288
mask, static_cast<float>(val.real), static_cast<unsigned>(delta), width));
289289
float imag = static_cast<float>(__shfl_down_sync(
290-
@@ -87,7 +88,7 @@ __forceinline__ __device__ phi::dtype::complex<float> CudaShuffleDownSync(
290+
@@ -85,7 +86,7 @@ __forceinline__ __device__ phi::dtype::complex<float> CudaShuffleDownSync(
291291

292292
template <>
293293
__forceinline__ __device__ phi::dtype::complex<double> CudaShuffleDownSync(
@@ -296,7 +296,7 @@ index 4ff2e528a9..23f7f4b583 100644
296296
double real =
297297
static_cast<double>(__shfl_down_sync(mask,
298298
static_cast<double>(val.real),
299-
@@ -103,13 +104,13 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleDownSync(
299+
@@ -101,20 +102,20 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleDownSync(
300300

301301
template <>
302302
__forceinline__ __device__ phi::dtype::float16 CudaShuffleXorSync(
@@ -309,10 +309,9 @@ index 4ff2e528a9..23f7f4b583 100644
309309
__forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync(
310310
- unsigned mask, phi::dtype::bfloat16 val, int width) {
311311
+ unsigned long long mask, phi::dtype::bfloat16 val, int width) {
312-
#if defined(PADDLE_CUDA_BF16)
313312
return phi::dtype::bfloat16(
314313
__shfl_xor_sync(mask, val.to_nv_bfloat16(), width));
315-
@@ -121,7 +122,7 @@ __forceinline__ __device__ phi::dtype::bfloat16 CudaShuffleXorSync(
314+
}
316315

317316
template <>
318317
__forceinline__ __device__ phi::dtype::complex<float> CudaShuffleXorSync(
@@ -321,7 +320,7 @@ index 4ff2e528a9..23f7f4b583 100644
321320
float real = static_cast<float>(
322321
__shfl_xor_sync(mask, static_cast<float>(val.real), width));
323322
float imag = static_cast<float>(
324-
@@ -131,7 +132,7 @@ __forceinline__ __device__ phi::dtype::complex<float> CudaShuffleXorSync(
323+
@@ -124,7 +125,7 @@ __forceinline__ __device__ phi::dtype::complex<float> CudaShuffleXorSync(
325324

326325
template <>
327326
__forceinline__ __device__ phi::dtype::complex<double> CudaShuffleXorSync(
@@ -330,7 +329,7 @@ index 4ff2e528a9..23f7f4b583 100644
330329
double real = static_cast<double>(
331330
__shfl_xor_sync(mask, static_cast<double>(val.real), width));
332331
double imag = static_cast<double>(
333-
@@ -141,7 +142,7 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleXorSync(
332+
@@ -134,7 +135,7 @@ __forceinline__ __device__ phi::dtype::complex<double> CudaShuffleXorSync(
334333

335334
template <typename T>
336335
__forceinline__ __device__ T
@@ -339,7 +338,7 @@ index 4ff2e528a9..23f7f4b583 100644
339338
return __shfl_sync(mask, val, src_line, width);
340339
}
341340

342-
@@ -160,7 +161,7 @@ __device__ T reduceSum(T val, int tid, int len) {
341+
@@ -153,7 +154,7 @@ __device__ T reduceSum(T val, int tid, int len) {
343342
// but most card's warp size is 32.
344343
const int warpSize = 32;
345344
__shared__ T shm[warpSize];
@@ -348,6 +347,7 @@ index 4ff2e528a9..23f7f4b583 100644
348347
CREATE_SHFL_MASK(mask, tid < len);
349348

350349
for (int offset = warpSize / 2; offset > 0; offset /= 2)
350+
351351
diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h
352352
index 024a7de73e..66b373d698 100644
353353
--- a/paddle/phi/core/enforce.h

backends/metax_gpu/requirement.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
parameterized
2+
safetensors==0.6.2
3+
scipy

backends/metax_gpu/tests/run_test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717

18-
pip install scipy -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
18+
# pip install scipy -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
1919
SCRIPT_DIR=$(dirname "$0")
2020
LEGACY_TEST_PATH="${SCRIPT_DIR}/../../../Paddle/test/legacy_test"
2121
TEST_PATH1="${SCRIPT_DIR}/../../../python"

0 commit comments

Comments
 (0)