-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
Copy pathazure-pipelines.yml
114 lines (99 loc) · 3.65 KB
/
azure-pipelines.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
jobs:
- job: DeepSpeed_Tests
timeoutInMinutes: 360
pool:
name: 'DS_testing'
strategy:
matrix:
PyTorch12-CUDA100:
python.version: '3.6'
cuda.version: '10.0'
pytorch.version: '1.2'
torchvision.version: '0.4.0'
runmodeltests: true
#PyTorch15-CUDA101:
# python.version: '3.7'
# cuda.version: '10.1'
# pytorch.version: '1.5.0+cu101'
# torchvision.version: '0.6.0+cu101'
# runmodeltests: true
##PyTorch15-CUDA102:
# python.version: '3.7'
# cuda.version: '10.2'
# pytorch.version: '1.5'
# torchvision.version: '0.6.1'
# runmodeltests: true
variables:
conda_env: 'ds_test_py$(python.version)_cuda$(cuda.version)_pytorch$(pytorch.version)'
steps:
# Unfortunately nvidia's nvcc_linux-64=<version> seems to install 10.1 regardless?
# Most of this complexity is a workaround to get the compiler toolchain to match the
# cudatoolkit runtime
- script: |
conda create --force --yes -n $(conda_env) python=$(python.version) cudatoolkit=$(cuda.version)
source activate $(conda_env)
conda install -q --yes conda
conda install -q --yes pip
conda install -q --yes gxx_linux-64
if [[ $(cuda.version) != "10.2" ]]; then conda install --yes -c conda-forge cudatoolkit-dev=$(cuda.version) ; fi
displayName: 'Setup environment python=$(python.version) pytorch=$(pytorch.version) cuda=$(cuda.version)'
# Manually install torch/torchvision first to enforce versioning.
- script: |
source activate $(conda_env)
pip install --progress-bar=off torch==$(pytorch.version) torchvision==$(torchvision.version)
#-f https://download.pytorch.org/whl/torch_stable.html
./install.sh --local_only
#python -I basic_install_test.py
displayName: 'Install DeepSpeed'
- script: |
source activate $(conda_env)
which python
python --version
which nvcc
nvcc --version
which deepspeed
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
python -c "import deepspeed; print('deepspeed:', deepspeed.__version__)"
displayName: 'Show environment'
- script: |
source activate $(conda_env)
pytest --durations=0 --forked --verbose -x tests/unit/
displayName: 'Unit tests'
- script: |
source activate $(conda_env)
ln -s /data/Megatron-LM/data DeepSpeedExamples/Megatron-LM/
pip install --progress-bar=off -r DeepSpeedExamples/Megatron-LM/requirements.txt
cd tests/model/
rm -rf BingBertSquad/baseline
rm -rf Megatron_GPT2/baseline
pytest --durations=0 -s run_sanity_check.py
condition: and(succeeded(), eq(variables['runmodeltests'], true))
displayName: 'Model tests'
#BingBertSquad logs
- task: PublishPipelineArtifact@1
inputs:
targetPath: '$(Build.SourcesDirectory)/tests/model/BingBertSquad/test/'
artifactName: BingBertSquad_logs
displayName: 'BingBertSquad log uploads'
condition: eq(variables['runmodeltests'], true)
- job: Code_Quality_Checks
pool:
name: 'DS_testing'
variables:
conda_env: 'ds_codetest'
steps:
- script: |
conda create --force --yes -n $(conda_env) python=3.7
source activate $(conda_env)
displayName: 'Create code test environment'
- script: |
source activate $(conda_env)
pip install pre-commit
pre-commit run --all-files
displayName: 'Formatting checks'
- script: |
source activate $(conda_env)
pip install pylint
pylint --exit-zero deepspeed/
displayName: 'Code linter'