Skip to content

Add new devices to Habitat #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions analyzer/habitat/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
SPECIAL_OPERATIONS = {
# Convolution
'conv2d',
'conv_transpose2d',

# Matrix multiply operations
'linear',
Expand Down
16 changes: 5 additions & 11 deletions analyzer/habitat/analysis/mlp/dataset_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,29 +35,23 @@ def get_dataset(path, features, device_features=None):
print("Loaded file %s (%d entries)" % (f, len(df.index)))

if device_name not in devices:
devices[device_name] = df
else:
devices[device_name] = devices[device_name].append(df)
devices[device_name] = []
devices[device_name].append(df)

for device in devices.keys():
devices[device] = pd.concat(devices[device])
print("Device %s contains %d entries" % (device, len(devices[device].index)))

print()

print("Merging")
df_merged = functools.reduce(
lambda df1, df2: pd.merge(df1, df2, on=features),
devices.values()
)

print("Generating dataset")
# generate vectorized dataset (one entry for each device with device params)
device_params = get_all_devices(device_features)

x, y = [], []
for device in devices.keys():
df_merged_device = df_merged[features + [device, ]]
for row in tqdm(df_merged_device.iterrows(), leave=False, desc=device, total=len(df_merged_device.index)):
df_device = devices[device]
for row in tqdm(df_device.iterrows(), leave=False, desc=device, total=len(df_device.index)):
row = row[1]

x.append(list(row[:-1]) + device_params[device])
Expand Down
3 changes: 3 additions & 0 deletions analyzer/habitat/analysis/mlp/devices.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ T4,16,GDDR6,320,40,,8.1,
V100,16,HBM2,900,80,,14.028,
A100,40,HBM2,1555,108,9.5,19.0,300.0
RTX3090,24,GDDR6X,936.2,82,556.0,35.58,35.58
A40,48,GDDR6,614.9,84,1.168,37.4,299.4
A4000,16,GDDR6,378.1,48,0.599,19.17,19.17
RTX4000,8,GDDR6,364.1,36,0.2225,7.119,7.119
20 changes: 20 additions & 0 deletions analyzer/habitat/analysis/mlp/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ def forward(self, x):

return x

class ConvTranspose2DMLP(nn.Module):
def __init__(self, layers, layer_size):
super().__init__()

self.features = ['bias', 'batch', 'image_size', 'in_channels', 'out_channels', 'kernel_size', 'stride',
'padding']

# properly manage device parameters
self.fc1 = nn.Linear(len(self.features) + 4, layer_size)
self.mlp = MLPBase(layers, layer_size)
self.fc2 = nn.Linear(layer_size, 1)

def forward(self, x):
x = self.fc1(x)
x = F.relu(x)
x = self.mlp(x)
x = self.fc2(x)

return x

class BMMMLP(nn.Module):
def __init__(self, layers, layer_size):
Expand Down Expand Up @@ -119,6 +138,7 @@ def __init__(self, model_name, layers, layer_size, model_path=None):
"linear": LinearMLP,
"lstm": LSTMMLP,
"conv2d": Conv2DMLP,
"conv_transpose2d": ConvTranspose2DMLP,
"bmm": BMMMLP,
}[self.model_name](layers, layer_size)

Expand Down
44 changes: 44 additions & 0 deletions analyzer/habitat/analysis/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
'input', 'weight', 'bias', 'stride', 'padding', 'dilation', 'groups',
]

CONVTRANSPOSE2D_PARAMS = [
'input', 'weight', 'bias', 'stride', 'padding', 'dilation', 'groups',
]

LINEAR_PARAMS = ['input', 'weight', 'bias']

BMM_PARAMS = ['input', 'mat2', 'out']
Expand Down Expand Up @@ -79,6 +83,10 @@ def __init__(
"bmm", 8, 1024,
path_to_data("bmm/model.pth"),
)
self.conv_transpose2d_pred = RuntimePredictor(
"conv_transpose2d", 8, 1024,
path_to_data("conv_transpose2d/model.pth"),
)


def predict_operation(self, operation, dest_device):
Expand All @@ -99,6 +107,8 @@ def predict_operation(self, operation, dest_device):
return self._special_scale(operation, dest_device, self._linear_scale)
elif operation.name == 'bmm':
return self._special_scale(operation, dest_device, self._bmm_scale)
elif operation.name == 'conv_transpose2d':
return self._special_scale(operation, dest_device, self._conv_transpose2d_scale)

logger.warn('Unhandled special operation: %s', operation.name)
return PredictedOperation(
Expand Down Expand Up @@ -181,6 +191,40 @@ def _conv2d_scale(self, operation, dest_device):

return operation.run_time_ms * pred_dest / pred_orig

def _conv_transpose2d_scale(self, operation, dest_device):
# 1. Merge arguments (give them all names)
merged = name_all_arguments(
CONVTRANSPOSE2D_PARAMS,
operation.arguments.args,
operation.arguments.kwargs,
)

# 2. Construct arguments that the predictor expects
arguments = dict(
batch=merged['input'][0],
image_size=merged['input'][2],
in_channels=merged['input'][1],
out_channels=merged['weight'][0],
kernel_size=merged['weight'][2],
stride=(
merged['stride'][0]
if isinstance(merged['stride'], tuple) else merged['stride']
),
padding=(
merged['padding'][0]
if isinstance(merged['padding'], tuple) else merged['padding']
),
bias=(1 if merged['bias'] is not None else 0),
)

# 3. Call model to make prediction
arguments = [arguments[x] for x in self.conv_transpose2d_pred.model.features]

pred_dest = self.conv_transpose2d_pred.predict(arguments, dest_device.name)
pred_orig = self.conv_transpose2d_pred.predict(arguments, operation.device.name)

return operation.run_time_ms * pred_dest / pred_orig

def _linear_scale(self, operation, dest_device):
merged = name_all_arguments(
LINEAR_PARAMS,
Expand Down
Binary file modified analyzer/habitat/data/bmm/model.pth
Binary file not shown.
9 changes: 5 additions & 4 deletions analyzer/habitat/data/checksums
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
b2c102539c05fd9169774c9fc32f0732a82b679e bmm/model.pth
376bd8d9233c3384be01e638191e842ebc212f5f conv2d/model.pth
7e09507e4a97b9b5b2d274b6a5f3bfb7ea1dbd97 bmm/model.pth
065178ab02df36e7401bd4674a14ee940a1cd8a1 conv2d/model.pth
d8a583c0b9068cb92276468272935ce5b0b78ca9 convtranspose2d/model.pth
659a00c6cff529613b40d5166fe7d93f42e8327d kernels.sqlite
9f17fb74321fe1a82d9d647cd8c976b724b69a94 linear/model.pth
ef93bb74178d38660b31ac0bef262c39b1e6ba16 lstm/model.pth
5f0be970e001ba9b66b7fd9b100f01a2f91ecf8d linear/model.pth
a4bd128777ef5c90c3aaa1dd2abfed42e6024f3d lstm/model.pth
Binary file modified analyzer/habitat/data/conv2d/model.pth
Binary file not shown.
3 changes: 3 additions & 0 deletions analyzer/habitat/data/conv_transpose2d/model.pth
Git LFS file not shown
47 changes: 47 additions & 0 deletions analyzer/habitat/data/devices.yml
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,51 @@ RTX3090:
base_clock_mhz: 1395
peak_gflops_per_second: 19346

A40:
compute_major: 8
compute_minor: 6
max_threads_per_block: 1024
max_threads_per_multiprocessor: 1536
regs_per_block: 65536
regs_per_multiprocessor: 65536
warp_size: 32
shared_mem_per_block: 49152
shared_mem_per_multiprocessor: 102400
num_sms: 84
shared_mem_per_block_optin: 101376
mem_bandwidth_gb: 614
base_clock_mhz: 1305
peak_gflops_per_second: 18680

A4000:
compute_major: 8
compute_minor: 6
max_threads_per_block: 1024
max_threads_per_multiprocessor: 1536
regs_per_block: 65536
regs_per_multiprocessor: 65536
warp_size: 32
shared_mem_per_block: 49152
shared_mem_per_multiprocessor: 102400
num_sms: 48
shared_mem_per_block_optin: 101376
mem_bandwidth_gb: 378
base_clock_mhz: 735
peak_gflops_per_second: 10499

RTX4000:
compute_major: 7
compute_minor: 5
max_threads_per_block: 1024
max_threads_per_multiprocessor: 1024
regs_per_block: 65536
regs_per_multiprocessor: 65536
warp_size: 32
shared_mem_per_block: 49152
shared_mem_per_multiprocessor: 65536
num_sms: 36
shared_mem_per_block_optin: 65536
mem_bandwidth_gb: 364
base_clock_mhz: 1005
peak_gflops_per_second: 4245

Binary file modified analyzer/habitat/data/linear/model.pth
Binary file not shown.
4 changes: 2 additions & 2 deletions analyzer/habitat/data/lstm/model.pth
Git LFS file not shown
13 changes: 10 additions & 3 deletions experiments/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
habitat.Device.T4,
habitat.Device.RTX2070,
habitat.Device.RTX2080Ti,

habitat.Device.RTX2080Ti,
habitat.Device.RTX3090,
habitat.Device.A100,
habitat.Device.A40,
habitat.Device.A4000,
habitat.Device.RTX4000,
]

RESNET50_BATCHES = [16, 32, 64]
Expand Down Expand Up @@ -237,9 +244,9 @@ def main():
)

run_dcgan_experiments(context)
# run_inception_experiments(context)
# run_resnet50_experiments(context)
# run_gnmt_experiments(context)
run_inception_experiments(context)
run_resnet50_experiments(context)
run_gnmt_experiments(context)
# run_transformer_experiments(context)


Expand Down
19 changes: 19 additions & 0 deletions tools/recording/record_common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import logging
import gc
import random
import signal

import torch

import habitat
from habitat.profiling.operation import OperationProfiler
from database import Recorder



logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -111,6 +116,9 @@ def measure_configurations(self, args, num_configs):
args.rank + 1,
args.world_size,
)

last_count = self._recorder.get_num_recordings()

try:
for idx, config_id in enumerate(to_record):
if idx < num_configs_measured:
Expand All @@ -126,6 +134,9 @@ def measure_configurations(self, args, num_configs):

if idx % 100 == 0:
self._recorder.commit()
cur_count = self._recorder.get_num_recordings()
logger.info(f"commit. num_recordings: {cur_count}, new: {cur_count-last_count}")
last_count = cur_count

if self._shutdown_early:
break
Expand All @@ -144,10 +155,18 @@ def _measure(self, config):

except RuntimeError as e:
msg = str(e)

if "out of memory" in msg:
allocated = torch.cuda.memory_allocated()
torch.cuda.empty_cache()
gc.collect()
logger.info(f"Cleared memory: {allocated} -> {torch.cuda.memory_allocated()}")

if ("out of memory" not in msg and
"cuDNN error" not in msg and
"Calculated padded" not in msg):
logger.exception('Unexpected error during measurement.')
# logger.info("error: " + msg)
return None, None

def _record(self, config, forward_result, backward_result):
Expand Down