Skip to content

Commit

Permalink
Adds mesh rule for a3-megagpu-8g. (#936)
Browse files Browse the repository at this point in the history
  • Loading branch information
markblee authored Jan 23, 2025
1 parent ac7a3ed commit 6a9f980
Show file tree
Hide file tree
Showing 17 changed files with 18 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ mesh_rules[5][1][2]: 1
mesh_rules[5][1][3]: 8
mesh_rules[5][1][4]: 1
mesh_rules[5][1][5]: 1
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[6][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[6][1][0]: 1
mesh_rules[6][1][1]: -1
mesh_rules[6][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ mesh_rules[4][1][2]: 1
mesh_rules[4][1][3]: 8
mesh_rules[4][1][4]: 1
mesh_rules[4][1][5]: 1
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[5][1][0]: 1
mesh_rules[5][1][1]: -1
mesh_rules[5][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ mesh_rules[4][1][2]: 1
mesh_rules[4][1][3]: 8
mesh_rules[4][1][4]: 1
mesh_rules[4][1][5]: 1
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[5][1][0]: 1
mesh_rules[5][1][1]: -1
mesh_rules[5][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ mesh_rules[4][1][2]: 1
mesh_rules[4][1][3]: 8
mesh_rules[4][1][4]: 1
mesh_rules[4][1][5]: 1
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[5][1][0]: 1
mesh_rules[5][1][1]: -1
mesh_rules[5][1][2]: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ mesh_rules[4][1][2]: 1
mesh_rules[4][1][3]: 8
mesh_rules[4][1][4]: 1
mesh_rules[4][1][5]: 1
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)'
mesh_rules[5][0]: 'gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)'
mesh_rules[5][1][0]: 1
mesh_rules[5][1][1]: -1
mesh_rules[5][1][2]: 1
Expand Down
4 changes: 2 additions & 2 deletions axlearn/experiments/text/gpt/fuji.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def get_trainer_kwargs(
# v2 on gpu-p5.48xlarge-256, step time: 1.78s/step, MFU 39%.
# TODO(kelvin-zou): need to match 1.5s/step perf on TransformerEngine.
(
"gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)",
"gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)",
mesh_shape_from_axes(data=-1, fsdp=8),
),
),
Expand Down Expand Up @@ -412,7 +412,7 @@ def get_trainer_kwargs(
),
("tpu-v5p-.*", mesh_shape_from_axes(data=-1, fsdp=8)),
(
"gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g)-(256|512|1024)",
"gpu-(p5.48xlarge|p4de.24xlarge|a3-highgpu-8g|a3-megagpu-8g)-(256|512|1024)",
mesh_shape_from_axes(data=-1, fsdp=8),
),
),
Expand Down

0 comments on commit 6a9f980

Please sign in to comment.