Skip to content

Commit fb0209d

Browse files
authored
Merge pull request #21 from CentML/dec22-new-devices
Add new devices to Habitat
2 parents 36e10df + a49d7c0 commit fb0209d

File tree

14 files changed

+159
-20
lines changed

14 files changed

+159
-20
lines changed

analyzer/habitat/analysis/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
SPECIAL_OPERATIONS = {
66
# Convolution
77
'conv2d',
8+
'conv_transpose2d',
89

910
# Matrix multiply operations
1011
'linear',

analyzer/habitat/analysis/mlp/dataset_process.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,29 +35,23 @@ def get_dataset(path, features, device_features=None):
3535
print("Loaded file %s (%d entries)" % (f, len(df.index)))
3636

3737
if device_name not in devices:
38-
devices[device_name] = df
39-
else:
40-
devices[device_name] = devices[device_name].append(df)
38+
devices[device_name] = []
39+
devices[device_name].append(df)
4140

4241
for device in devices.keys():
42+
devices[device] = pd.concat(devices[device])
4343
print("Device %s contains %d entries" % (device, len(devices[device].index)))
4444

4545
print()
4646

47-
print("Merging")
48-
df_merged = functools.reduce(
49-
lambda df1, df2: pd.merge(df1, df2, on=features),
50-
devices.values()
51-
)
52-
5347
print("Generating dataset")
5448
# generate vectorized dataset (one entry for each device with device params)
5549
device_params = get_all_devices(device_features)
5650

5751
x, y = [], []
5852
for device in devices.keys():
59-
df_merged_device = df_merged[features + [device, ]]
60-
for row in tqdm(df_merged_device.iterrows(), leave=False, desc=device, total=len(df_merged_device.index)):
53+
df_device = devices[device]
54+
for row in tqdm(df_device.iterrows(), leave=False, desc=device, total=len(df_device.index)):
6155
row = row[1]
6256

6357
x.append(list(row[:-1]) + device_params[device])

analyzer/habitat/analysis/mlp/devices.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ T4,16,GDDR6,320,40,,8.1,
77
V100,16,HBM2,900,80,,14.028,
88
A100,40,HBM2,1555,108,9.5,19.0,300.0
99
RTX3090,24,GDDR6X,936.2,82,556.0,35.58,35.58
10+
A40,48,GDDR6,614.9,84,1.168,37.4,299.4
11+
A4000,16,GDDR6,378.1,48,0.599,19.17,19.17
12+
RTX4000,8,GDDR6,364.1,36,0.2225,7.119,7.119

analyzer/habitat/analysis/mlp/mlp.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@ def forward(self, x):
8989

9090
return x
9191

92+
class ConvTranspose2DMLP(nn.Module):
93+
def __init__(self, layers, layer_size):
94+
super().__init__()
95+
96+
self.features = ['bias', 'batch', 'image_size', 'in_channels', 'out_channels', 'kernel_size', 'stride',
97+
'padding']
98+
99+
# properly manage device parameters
100+
self.fc1 = nn.Linear(len(self.features) + 4, layer_size)
101+
self.mlp = MLPBase(layers, layer_size)
102+
self.fc2 = nn.Linear(layer_size, 1)
103+
104+
def forward(self, x):
105+
x = self.fc1(x)
106+
x = F.relu(x)
107+
x = self.mlp(x)
108+
x = self.fc2(x)
109+
110+
return x
92111

93112
class BMMMLP(nn.Module):
94113
def __init__(self, layers, layer_size):
@@ -119,6 +138,7 @@ def __init__(self, model_name, layers, layer_size, model_path=None):
119138
"linear": LinearMLP,
120139
"lstm": LSTMMLP,
121140
"conv2d": Conv2DMLP,
141+
"conv_transpose2d": ConvTranspose2DMLP,
122142
"bmm": BMMMLP,
123143
}[self.model_name](layers, layer_size)
124144

analyzer/habitat/analysis/predictor.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
'input', 'weight', 'bias', 'stride', 'padding', 'dilation', 'groups',
2020
]
2121

22+
CONVTRANSPOSE2D_PARAMS = [
23+
'input', 'weight', 'bias', 'stride', 'padding', 'dilation', 'groups',
24+
]
25+
2226
LINEAR_PARAMS = ['input', 'weight', 'bias']
2327

2428
BMM_PARAMS = ['input', 'mat2', 'out']
@@ -79,6 +83,10 @@ def __init__(
7983
"bmm", 8, 1024,
8084
path_to_data("bmm/model.pth"),
8185
)
86+
self.conv_transpose2d_pred = RuntimePredictor(
87+
"conv_transpose2d", 8, 1024,
88+
path_to_data("conv_transpose2d/model.pth"),
89+
)
8290

8391

8492
def predict_operation(self, operation, dest_device):
@@ -99,6 +107,8 @@ def predict_operation(self, operation, dest_device):
99107
return self._special_scale(operation, dest_device, self._linear_scale)
100108
elif operation.name == 'bmm':
101109
return self._special_scale(operation, dest_device, self._bmm_scale)
110+
elif operation.name == 'conv_transpose2d':
111+
return self._special_scale(operation, dest_device, self._conv_transpose2d_scale)
102112

103113
logger.warn('Unhandled special operation: %s', operation.name)
104114
return PredictedOperation(
@@ -181,6 +191,40 @@ def _conv2d_scale(self, operation, dest_device):
181191

182192
return operation.run_time_ms * pred_dest / pred_orig
183193

194+
def _conv_transpose2d_scale(self, operation, dest_device):
195+
# 1. Merge arguments (give them all names)
196+
merged = name_all_arguments(
197+
CONVTRANSPOSE2D_PARAMS,
198+
operation.arguments.args,
199+
operation.arguments.kwargs,
200+
)
201+
202+
# 2. Construct arguments that the predictor expects
203+
arguments = dict(
204+
batch=merged['input'][0],
205+
image_size=merged['input'][2],
206+
in_channels=merged['input'][1],
207+
out_channels=merged['weight'][0],
208+
kernel_size=merged['weight'][2],
209+
stride=(
210+
merged['stride'][0]
211+
if isinstance(merged['stride'], tuple) else merged['stride']
212+
),
213+
padding=(
214+
merged['padding'][0]
215+
if isinstance(merged['padding'], tuple) else merged['padding']
216+
),
217+
bias=(1 if merged['bias'] is not None else 0),
218+
)
219+
220+
# 3. Call model to make prediction
221+
arguments = [arguments[x] for x in self.conv_transpose2d_pred.model.features]
222+
223+
pred_dest = self.conv_transpose2d_pred.predict(arguments, dest_device.name)
224+
pred_orig = self.conv_transpose2d_pred.predict(arguments, operation.device.name)
225+
226+
return operation.run_time_ms * pred_dest / pred_orig
227+
184228
def _linear_scale(self, operation, dest_device):
185229
merged = name_all_arguments(
186230
LINEAR_PARAMS,

analyzer/habitat/data/bmm/model.pth

-32.1 MB
Binary file not shown.

analyzer/habitat/data/checksums

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
b2c102539c05fd9169774c9fc32f0732a82b679e bmm/model.pth
2-
376bd8d9233c3384be01e638191e842ebc212f5f conv2d/model.pth
1+
7e09507e4a97b9b5b2d274b6a5f3bfb7ea1dbd97 bmm/model.pth
2+
065178ab02df36e7401bd4674a14ee940a1cd8a1 conv2d/model.pth
3+
d8a583c0b9068cb92276468272935ce5b0b78ca9 convtranspose2d/model.pth
34
659a00c6cff529613b40d5166fe7d93f42e8327d kernels.sqlite
4-
9f17fb74321fe1a82d9d647cd8c976b724b69a94 linear/model.pth
5-
ef93bb74178d38660b31ac0bef262c39b1e6ba16 lstm/model.pth
5+
5f0be970e001ba9b66b7fd9b100f01a2f91ecf8d linear/model.pth
6+
a4bd128777ef5c90c3aaa1dd2abfed42e6024f3d lstm/model.pth
-32.1 MB
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:06c9bb03de9d81ed9d22febcd90a338d5a4e851be4b686e4aa6037fa195354aa
3+
size 33650493

analyzer/habitat/data/devices.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,4 +161,51 @@ RTX3090:
161161
base_clock_mhz: 1395
162162
peak_gflops_per_second: 19346
163163

164+
A40:
165+
compute_major: 8
166+
compute_minor: 6
167+
max_threads_per_block: 1024
168+
max_threads_per_multiprocessor: 1536
169+
regs_per_block: 65536
170+
regs_per_multiprocessor: 65536
171+
warp_size: 32
172+
shared_mem_per_block: 49152
173+
shared_mem_per_multiprocessor: 102400
174+
num_sms: 84
175+
shared_mem_per_block_optin: 101376
176+
mem_bandwidth_gb: 614
177+
base_clock_mhz: 1305
178+
peak_gflops_per_second: 18680
179+
180+
A4000:
181+
compute_major: 8
182+
compute_minor: 6
183+
max_threads_per_block: 1024
184+
max_threads_per_multiprocessor: 1536
185+
regs_per_block: 65536
186+
regs_per_multiprocessor: 65536
187+
warp_size: 32
188+
shared_mem_per_block: 49152
189+
shared_mem_per_multiprocessor: 102400
190+
num_sms: 48
191+
shared_mem_per_block_optin: 101376
192+
mem_bandwidth_gb: 378
193+
base_clock_mhz: 735
194+
peak_gflops_per_second: 10499
195+
196+
RTX4000:
197+
compute_major: 7
198+
compute_minor: 5
199+
max_threads_per_block: 1024
200+
max_threads_per_multiprocessor: 1024
201+
regs_per_block: 65536
202+
regs_per_multiprocessor: 65536
203+
warp_size: 32
204+
shared_mem_per_block: 49152
205+
shared_mem_per_multiprocessor: 65536
206+
num_sms: 36
207+
shared_mem_per_block_optin: 65536
208+
mem_bandwidth_gb: 364
209+
base_clock_mhz: 1005
210+
peak_gflops_per_second: 4245
164211

-32.1 MB
Binary file not shown.

analyzer/habitat/data/lstm/model.pth

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:cf50e632cb66f3f8585f65a14d44f36b373ea320f60fb27ab1477d5e6983c249
3-
size 33647145
2+
oid sha256:feca09482a1c7f48f9b07fa58abd85b34467107382e33fece92d2fd8af863675
3+
size 33646397

experiments/run_experiment.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@
2222
habitat.Device.T4,
2323
habitat.Device.RTX2070,
2424
habitat.Device.RTX2080Ti,
25+
26+
habitat.Device.RTX2080Ti,
27+
habitat.Device.RTX3090,
28+
habitat.Device.A100,
29+
habitat.Device.A40,
30+
habitat.Device.A4000,
31+
habitat.Device.RTX4000,
2532
]
2633

2734
RESNET50_BATCHES = [16, 32, 64]
@@ -237,9 +244,9 @@ def main():
237244
)
238245

239246
run_dcgan_experiments(context)
240-
# run_inception_experiments(context)
241-
# run_resnet50_experiments(context)
242-
# run_gnmt_experiments(context)
247+
run_inception_experiments(context)
248+
run_resnet50_experiments(context)
249+
run_gnmt_experiments(context)
243250
# run_transformer_experiments(context)
244251

245252

tools/recording/record_common.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import logging
2+
import gc
23
import random
34
import signal
45

6+
import torch
7+
58
import habitat
69
from habitat.profiling.operation import OperationProfiler
710
from database import Recorder
811

12+
13+
914
logger = logging.getLogger(__name__)
1015

1116

@@ -111,6 +116,9 @@ def measure_configurations(self, args, num_configs):
111116
args.rank + 1,
112117
args.world_size,
113118
)
119+
120+
last_count = self._recorder.get_num_recordings()
121+
114122
try:
115123
for idx, config_id in enumerate(to_record):
116124
if idx < num_configs_measured:
@@ -126,6 +134,9 @@ def measure_configurations(self, args, num_configs):
126134

127135
if idx % 100 == 0:
128136
self._recorder.commit()
137+
cur_count = self._recorder.get_num_recordings()
138+
logger.info(f"commit. num_recordings: {cur_count}, new: {cur_count-last_count}")
139+
last_count = cur_count
129140

130141
if self._shutdown_early:
131142
break
@@ -144,10 +155,18 @@ def _measure(self, config):
144155

145156
except RuntimeError as e:
146157
msg = str(e)
158+
159+
if "out of memory" in msg:
160+
allocated = torch.cuda.memory_allocated()
161+
torch.cuda.empty_cache()
162+
gc.collect()
163+
logger.info(f"Cleared memory: {allocated} -> {torch.cuda.memory_allocated()}")
164+
147165
if ("out of memory" not in msg and
148166
"cuDNN error" not in msg and
149167
"Calculated padded" not in msg):
150168
logger.exception('Unexpected error during measurement.')
169+
# logger.info("error: " + msg)
151170
return None, None
152171

153172
def _record(self, config, forward_result, backward_result):

0 commit comments

Comments
 (0)