forked from microsoft/DeepSpeed
-
Notifications
You must be signed in to change notification settings - Fork 0
77 lines (63 loc) · 2.32 KB
/
huawei-ascend-npu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
name: huawei-ascend-npu
on:
workflow_dispatch:
pull_request:
paths:
- '.github/workflows/huawei-ascend-npu.yml'
- 'requirements/**'
schedule:
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
issues: write
jobs:
unit-tests:
runs-on: [self-hosted, ascend, npu]
container:
image: ascendai/cann
ports:
- 80
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
- /etc/ascend_install.info:/etc/ascend_install.info
- /usr/local/Ascend/ascend-toolkit/:/usr/local/Ascend/ascend-toolkit/
options: --network host
--device /dev/davinci4
--device /dev/davinci_manager
--device /dev/devmm_svm
--device /dev/hisi_hdc
--shm-size "20g"
steps:
- uses: actions/checkout@v4
- name: Install pytorch
run: |
npu-smi info
apt-get update
# echo "export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:${LD_LIBRARY_PATH}" >>~/.bashrc
# echo "source /usr/local/Ascend/ascend-toolkit/set_env.sh" >>~/.bashrc
cat ~/.bashrc
# source ~/.bashrc
pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 numpy==1.26.4 cloudpickle tornado -i https://pypi.tuna.tsinghua.edu.cn/simple
python -c "import torch; print('torch:', torch.__version__)"
python -c "import torch,torch_npu; print('torch_npu:', torch.npu.is_available(),',version:', torch_npu.__version__)"
- name: Install deepspeed
run: |
pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple
ds_report
- name: Python environment
run: |
pip list
- name: Multi-card parallel training
env:
GITHUB_TOKEN: ${{ secrets.DEEPSPEED_TOKEN }}
run: |
echo "y" | apt-get install git
git clone https://github.com/microsoft/DeepSpeedExamples.git
cd DeepSpeedExamples
deepspeed ./training/cifar/cifar10_deepspeed.py