|
5 | 5 | @rfm.simple_test
|
6 | 6 | class GPU_Burn_nvidia(rfm.RunOnlyRegressionTest):
|
7 | 7 | descr = "GPU burn test on nvidia node"
|
8 |
| - valid_systems = ["vaughan:nvidia"] |
| 8 | + valid_systems = ["*:nvidia"] |
9 | 9 | valid_prog_environs = ["CUDA"]
|
10 |
| - variables = {'CUDAPATH': '/apps/antwerpen/rome/centos8/CUDA/11.6.2'} |
| 10 | + modules = ['git'] |
| 11 | + variables = {'CUDAPATH': '$EBROOTCUDA'} |
11 | 12 | time_limit = '10m'
|
12 |
| - prebuild_cmds = ['git clone https://github.com/wilicc/gpu-burn.git'] |
13 |
| - prerun_cmds = ['cd gpu-burn', 'make'] |
14 |
| - executable = './gpu_burn 5' |
15 |
| - tags = {"antwerp", "gpu", "gpuburn"} |
| 13 | + prerun_cmds = ['git clone https://github.com/wilicc/gpu-burn.git', 'cd gpu-burn', 'make'] |
| 14 | + executable = './gpu_burn 20' |
| 15 | + tags = {"antwerp", "gpu", "burn"} |
| 16 | + num_devices = 0 |
| 17 | + num_tasks = -1 # flexible test |
| 18 | + num_tasks_per_node = 1 |
| 19 | + num_gpus_per_node = 0 |
| 20 | + reference = { |
| 21 | + 'vaughan:nvidia': { |
| 22 | + 'device0': (17339.0, -0.05, 0.05, 'Gflop/s'), |
| 23 | + 'device1': (17336.0, -0.05, 0.05, 'Gflop/s'), |
| 24 | + 'device2': (17340.0, -0.05, 0.05, 'Gflop/s'), |
| 25 | + 'device3': (17335.0, -0.05, 0.05, 'Gflop/s'), |
| 26 | + }, |
| 27 | + 'leibniz:nvidia': { |
| 28 | + 'device0': (7412.0, -0.05, 0.05, 'Gflop/s'), |
| 29 | + 'device1': (7412.0, -0.05, 0.05, 'Gflop/s'), |
| 30 | + } |
| 31 | + } |
16 | 32 |
|
17 | 33 | def __init__(self):
|
18 |
| - self.extra_resources = {'gpu': {'num_gpus': '4'}} |
| 34 | + if self.current_system.name == 'vaughan': |
| 35 | + self.num_devices = 4 |
| 36 | + if self.current_system.name == 'leibniz': |
| 37 | + self.num_devices = 2 |
| 38 | + |
| 39 | + self.extra_resources = {'gpu': {'num_gpus': str(self.num_devices)}} |
| 40 | + self.descr = f'Nvidia gpu burn test on {self.current_system.name} with {self.num_devices} gpus' |
19 | 41 |
|
20 | 42 | @sanity_function
|
21 | 43 | def assert_job(self):
|
22 | 44 | return sn.and_(sn.assert_found(r'OK', self.stdout), sn.assert_not_found(r'FAULTY', self.stdout))
|
| 45 | + |
| 46 | + @performance_function('Gflop/s') |
| 47 | + def get_gflops(self, device=0): |
| 48 | + return sn.extractsingle(r'\((?P<gflops>\S+) Gflop/s\)', self.stdout, 'gflops', float, item=(-device-1)) |
| 49 | + |
| 50 | + @run_before('performance') |
| 51 | + def set_perf_variables(self): |
| 52 | + '''Build the dictionary with all the performance variables.''' |
| 53 | + self.perf_variables = {} |
| 54 | + |
| 55 | + counter = 0 |
| 56 | + for x in range(self.num_devices): |
| 57 | + self.perf_variables[f'device{counter}'] = self.get_gflops(device=self.num_devices-counter) |
| 58 | + counter += 1 |
0 commit comments