Skip to content

Commit ed191ea

Browse files
committed
Working flexible test
1 parent 79267ce commit ed191ea

File tree

2 files changed

+61
-14
lines changed

2 files changed

+61
-14
lines changed

config_vsc.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,20 +199,16 @@
199199
{
200200
'name': 'nvidia',
201201
'scheduler': 'slurm',
202-
'access': [],
202+
'access': ['-p ampere_gpu'],
203203
'environs': ['CUDA', 'builtin'],
204-
'descr': 'Nvidia node',
204+
'descr': 'Nvidia ampere node',
205205
'max_jobs': 1,
206-
'launcher': 'local',
206+
'launcher': 'srun',
207207
'resources': [
208208
{
209209
'name': 'gpu',
210210
'options': ['--gres=gpu:{num_gpus}'],
211211
},
212-
{
213-
'name': 'partition',
214-
'options': ['-p ampere_gpu']
215-
}
216212
]
217213
}
218214
]
@@ -256,6 +252,21 @@
256252
# vsc-mympirun launcher
257253
'launcher': 'srun',
258254
},
255+
{
256+
'name': 'nvidia',
257+
'scheduler': 'slurm',
258+
'access': ['-p pascal_gpu'],
259+
'environs': ['CUDA', 'builtin'],
260+
'descr': 'Nvidia pascal nodes',
261+
'max_jobs': 2,
262+
'launcher': 'srun',
263+
'resources': [
264+
{
265+
'name': 'gpu',
266+
'options': ['--gres=gpu:{num_gpus}'],
267+
},
268+
]
269+
}
259270
]
260271
},
261272
],

tests/gpu/gpu_burn.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,54 @@
55
@rfm.simple_test
66
class GPU_Burn_nvidia(rfm.RunOnlyRegressionTest):
77
descr = "GPU burn test on nvidia node"
8-
valid_systems = ["vaughan:nvidia"]
8+
valid_systems = ["*:nvidia"]
99
valid_prog_environs = ["CUDA"]
10-
variables = {'CUDAPATH': '/apps/antwerpen/rome/centos8/CUDA/11.6.2'}
10+
modules = ['git']
11+
variables = {'CUDAPATH': '$EBROOTCUDA'}
1112
time_limit = '10m'
12-
prebuild_cmds = ['git clone https://github.com/wilicc/gpu-burn.git']
13-
prerun_cmds = ['cd gpu-burn', 'make']
14-
executable = './gpu_burn 5'
15-
tags = {"antwerp", "gpu", "gpuburn"}
13+
prerun_cmds = ['git clone https://github.com/wilicc/gpu-burn.git', 'cd gpu-burn', 'make']
14+
executable = './gpu_burn 20'
15+
tags = {"antwerp", "gpu", "burn"}
16+
num_devices = 0
17+
num_tasks = -1 # flexible test
18+
num_tasks_per_node = 1
19+
num_gpus_per_node = 0
20+
reference = {
21+
'vaughan:nvidia': {
22+
'device0': (17339.0, -0.05, 0.05, 'Gflop/s'),
23+
'device1': (17336.0, -0.05, 0.05, 'Gflop/s'),
24+
'device2': (17340.0, -0.05, 0.05, 'Gflop/s'),
25+
'device3': (17335.0, -0.05, 0.05, 'Gflop/s'),
26+
},
27+
'leibniz:nvidia': {
28+
'device0': (7412.0, -0.05, 0.05, 'Gflop/s'),
29+
'device1': (7412.0, -0.05, 0.05, 'Gflop/s'),
30+
}
31+
}
1632

1733
def __init__(self):
18-
self.extra_resources = {'gpu': {'num_gpus': '4'}}
34+
if self.current_system.name == 'vaughan':
35+
self.num_devices = 4
36+
if self.current_system.name == 'leibniz':
37+
self.num_devices = 2
38+
39+
self.extra_resources = {'gpu': {'num_gpus': str(self.num_devices)}}
40+
self.descr = f'Nvidia gpu burn test on {self.current_system.name} with {self.num_devices} gpus'
1941

2042
@sanity_function
2143
def assert_job(self):
2244
return sn.and_(sn.assert_found(r'OK', self.stdout), sn.assert_not_found(r'FAULTY', self.stdout))
45+
46+
@performance_function('Gflop/s')
47+
def get_gflops(self, device=0):
48+
return sn.extractsingle(r'\((?P<gflops>\S+) Gflop/s\)', self.stdout, 'gflops', float, item=(-device-1))
49+
50+
@run_before('performance')
51+
def set_perf_variables(self):
52+
'''Build the dictionary with all the performance variables.'''
53+
self.perf_variables = {}
54+
55+
counter = 0
56+
for x in range(self.num_devices):
57+
self.perf_variables[f'device{counter}'] = self.get_gflops(device=self.num_devices-counter)
58+
counter += 1

0 commit comments

Comments
 (0)