Skip to content

Commit bc20e48

Browse files
authored
Merge pull request vllm-project#13 from ROCm/fused_moe_configs
Initial mi300 fused_moe tuning using docker: pytorch-private:vllm0.3.3_ROCm6.2_pytorch2.3_hipblaslt0.7_v1
2 parents 74383a8 + 71f3e85 commit bc20e48

File tree

4 files changed

+494
-0
lines changed

4 files changed

+494
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_M": 16,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 128,
6+
"GROUP_SIZE_M": 1,
7+
"num_stages": 0
8+
},
9+
"2": {
10+
"BLOCK_SIZE_M": 16,
11+
"BLOCK_SIZE_N": 64,
12+
"BLOCK_SIZE_K": 128,
13+
"GROUP_SIZE_M": 1,
14+
"num_stages": 0
15+
},
16+
"4": {
17+
"BLOCK_SIZE_M": 16,
18+
"BLOCK_SIZE_N": 64,
19+
"BLOCK_SIZE_K": 256,
20+
"GROUP_SIZE_M": 64,
21+
"num_stages": 1
22+
},
23+
"8": {
24+
"BLOCK_SIZE_M": 16,
25+
"BLOCK_SIZE_N": 64,
26+
"BLOCK_SIZE_K": 256,
27+
"GROUP_SIZE_M": 32,
28+
"num_stages": 1
29+
},
30+
"16": {
31+
"BLOCK_SIZE_M": 16,
32+
"BLOCK_SIZE_N": 64,
33+
"BLOCK_SIZE_K": 256,
34+
"GROUP_SIZE_M": 8,
35+
"num_stages": 1
36+
},
37+
"24": {
38+
"BLOCK_SIZE_M": 16,
39+
"BLOCK_SIZE_N": 64,
40+
"BLOCK_SIZE_K": 256,
41+
"GROUP_SIZE_M": 64,
42+
"num_stages": 1
43+
},
44+
"32": {
45+
"BLOCK_SIZE_M": 16,
46+
"BLOCK_SIZE_N": 128,
47+
"BLOCK_SIZE_K": 256,
48+
"GROUP_SIZE_M": 8,
49+
"num_stages": 1
50+
},
51+
"48": {
52+
"BLOCK_SIZE_M": 16,
53+
"BLOCK_SIZE_N": 64,
54+
"BLOCK_SIZE_K": 128,
55+
"GROUP_SIZE_M": 8,
56+
"num_stages": 0
57+
},
58+
"64": {
59+
"BLOCK_SIZE_M": 64,
60+
"BLOCK_SIZE_N": 64,
61+
"BLOCK_SIZE_K": 128,
62+
"GROUP_SIZE_M": 8,
63+
"num_stages": 0
64+
},
65+
"96": {
66+
"BLOCK_SIZE_M": 32,
67+
"BLOCK_SIZE_N": 128,
68+
"BLOCK_SIZE_K": 128,
69+
"GROUP_SIZE_M": 16,
70+
"num_stages": 0
71+
},
72+
"128": {
73+
"BLOCK_SIZE_M": 64,
74+
"BLOCK_SIZE_N": 64,
75+
"BLOCK_SIZE_K": 128,
76+
"GROUP_SIZE_M": 8,
77+
"num_stages": 0
78+
},
79+
"256": {
80+
"BLOCK_SIZE_M": 128,
81+
"BLOCK_SIZE_N": 128,
82+
"BLOCK_SIZE_K": 64,
83+
"GROUP_SIZE_M": 8,
84+
"num_stages": 0
85+
},
86+
"512": {
87+
"BLOCK_SIZE_M": 256,
88+
"BLOCK_SIZE_N": 128,
89+
"BLOCK_SIZE_K": 64,
90+
"GROUP_SIZE_M": 8,
91+
"num_stages": 0
92+
},
93+
"1024": {
94+
"BLOCK_SIZE_M": 128,
95+
"BLOCK_SIZE_N": 128,
96+
"BLOCK_SIZE_K": 64,
97+
"GROUP_SIZE_M": 1,
98+
"num_stages": 0
99+
},
100+
"1536": {
101+
"BLOCK_SIZE_M": 128,
102+
"BLOCK_SIZE_N": 128,
103+
"BLOCK_SIZE_K": 64,
104+
"GROUP_SIZE_M": 1,
105+
"num_stages": 0
106+
},
107+
"2048": {
108+
"BLOCK_SIZE_M": 128,
109+
"BLOCK_SIZE_N": 256,
110+
"BLOCK_SIZE_K": 64,
111+
"GROUP_SIZE_M": 1,
112+
"num_stages": 0
113+
},
114+
"3072": {
115+
"BLOCK_SIZE_M": 128,
116+
"BLOCK_SIZE_N": 256,
117+
"BLOCK_SIZE_K": 64,
118+
"GROUP_SIZE_M": 1,
119+
"num_stages": 0
120+
},
121+
"4096": {
122+
"BLOCK_SIZE_M": 128,
123+
"BLOCK_SIZE_N": 128,
124+
"BLOCK_SIZE_K": 64,
125+
"GROUP_SIZE_M": 1,
126+
"num_stages": 0
127+
}
128+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_M": 16,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 128,
6+
"GROUP_SIZE_M": 64
7+
},
8+
"2": {
9+
"BLOCK_SIZE_M": 16,
10+
"BLOCK_SIZE_N": 128,
11+
"BLOCK_SIZE_K": 32,
12+
"GROUP_SIZE_M": 32
13+
},
14+
"4": {
15+
"BLOCK_SIZE_M": 16,
16+
"BLOCK_SIZE_N": 32,
17+
"BLOCK_SIZE_K": 64,
18+
"GROUP_SIZE_M": 8
19+
},
20+
"8": {
21+
"BLOCK_SIZE_M": 16,
22+
"BLOCK_SIZE_N": 64,
23+
"BLOCK_SIZE_K": 256,
24+
"GROUP_SIZE_M": 1
25+
},
26+
"16": {
27+
"BLOCK_SIZE_M": 16,
28+
"BLOCK_SIZE_N": 64,
29+
"BLOCK_SIZE_K": 256,
30+
"GROUP_SIZE_M": 1
31+
},
32+
"24": {
33+
"BLOCK_SIZE_M": 32,
34+
"BLOCK_SIZE_N": 128,
35+
"BLOCK_SIZE_K": 128,
36+
"GROUP_SIZE_M": 1
37+
},
38+
"32": {
39+
"BLOCK_SIZE_M": 64,
40+
"BLOCK_SIZE_N": 64,
41+
"BLOCK_SIZE_K": 64,
42+
"GROUP_SIZE_M": 8
43+
},
44+
"48": {
45+
"BLOCK_SIZE_M": 128,
46+
"BLOCK_SIZE_N": 64,
47+
"BLOCK_SIZE_K": 64,
48+
"GROUP_SIZE_M": 8
49+
},
50+
"64": {
51+
"BLOCK_SIZE_M": 64,
52+
"BLOCK_SIZE_N": 32,
53+
"BLOCK_SIZE_K": 128,
54+
"GROUP_SIZE_M": 1
55+
},
56+
"96": {
57+
"BLOCK_SIZE_M": 32,
58+
"BLOCK_SIZE_N": 64,
59+
"BLOCK_SIZE_K": 128,
60+
"GROUP_SIZE_M": 8
61+
},
62+
"128": {
63+
"BLOCK_SIZE_M": 64,
64+
"BLOCK_SIZE_N": 64,
65+
"BLOCK_SIZE_K": 128,
66+
"GROUP_SIZE_M": 32
67+
},
68+
"256": {
69+
"BLOCK_SIZE_M": 32,
70+
"BLOCK_SIZE_N": 128,
71+
"BLOCK_SIZE_K": 64,
72+
"GROUP_SIZE_M": 1
73+
},
74+
"512": {
75+
"BLOCK_SIZE_M": 64,
76+
"BLOCK_SIZE_N": 64,
77+
"BLOCK_SIZE_K": 128,
78+
"GROUP_SIZE_M": 1
79+
},
80+
"1024": {
81+
"BLOCK_SIZE_M": 64,
82+
"BLOCK_SIZE_N": 128,
83+
"BLOCK_SIZE_K": 64,
84+
"GROUP_SIZE_M": 1
85+
},
86+
"1536": {
87+
"BLOCK_SIZE_M": 64,
88+
"BLOCK_SIZE_N": 64,
89+
"BLOCK_SIZE_K": 128,
90+
"GROUP_SIZE_M": 1
91+
},
92+
"2048": {
93+
"BLOCK_SIZE_M": 128,
94+
"BLOCK_SIZE_N": 128,
95+
"BLOCK_SIZE_K": 64,
96+
"GROUP_SIZE_M": 1
97+
},
98+
"3072": {
99+
"BLOCK_SIZE_M": 128,
100+
"BLOCK_SIZE_N": 128,
101+
"BLOCK_SIZE_K": 64,
102+
"GROUP_SIZE_M": 1
103+
},
104+
"4096": {
105+
"BLOCK_SIZE_M": 128,
106+
"BLOCK_SIZE_N": 128,
107+
"BLOCK_SIZE_K": 64,
108+
"GROUP_SIZE_M": 1
109+
}
110+
}
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
{
2+
"1": {
3+
"BLOCK_SIZE_M": 16,
4+
"BLOCK_SIZE_N": 64,
5+
"BLOCK_SIZE_K": 64,
6+
"GROUP_SIZE_M": 8,
7+
"num_stages": 0
8+
},
9+
"2": {
10+
"BLOCK_SIZE_M": 16,
11+
"BLOCK_SIZE_N": 128,
12+
"BLOCK_SIZE_K": 64,
13+
"GROUP_SIZE_M": 1,
14+
"num_stages": 0
15+
},
16+
"4": {
17+
"BLOCK_SIZE_M": 16,
18+
"BLOCK_SIZE_N": 64,
19+
"BLOCK_SIZE_K": 32,
20+
"GROUP_SIZE_M": 32,
21+
"num_stages": 1
22+
},
23+
"8": {
24+
"BLOCK_SIZE_M": 16,
25+
"BLOCK_SIZE_N": 32,
26+
"BLOCK_SIZE_K": 256,
27+
"GROUP_SIZE_M": 8,
28+
"num_stages": 1
29+
},
30+
"16": {
31+
"BLOCK_SIZE_M": 32,
32+
"BLOCK_SIZE_N": 128,
33+
"BLOCK_SIZE_K": 128,
34+
"GROUP_SIZE_M": 16,
35+
"num_stages": 1
36+
},
37+
"24": {
38+
"BLOCK_SIZE_M": 16,
39+
"BLOCK_SIZE_N": 64,
40+
"BLOCK_SIZE_K": 256,
41+
"GROUP_SIZE_M": 8,
42+
"num_stages": 1
43+
},
44+
"32": {
45+
"BLOCK_SIZE_M": 16,
46+
"BLOCK_SIZE_N": 256,
47+
"BLOCK_SIZE_K": 64,
48+
"GROUP_SIZE_M": 16,
49+
"num_stages": 0
50+
},
51+
"48": {
52+
"BLOCK_SIZE_M": 16,
53+
"BLOCK_SIZE_N": 128,
54+
"BLOCK_SIZE_K": 256,
55+
"GROUP_SIZE_M": 16,
56+
"num_stages": 1
57+
},
58+
"64": {
59+
"BLOCK_SIZE_M": 64,
60+
"BLOCK_SIZE_N": 64,
61+
"BLOCK_SIZE_K": 64,
62+
"GROUP_SIZE_M": 32,
63+
"num_stages": 0
64+
},
65+
"96": {
66+
"BLOCK_SIZE_M": 32,
67+
"BLOCK_SIZE_N": 32,
68+
"BLOCK_SIZE_K": 64,
69+
"GROUP_SIZE_M": 16,
70+
"num_stages": 0
71+
},
72+
"128": {
73+
"BLOCK_SIZE_M": 64,
74+
"BLOCK_SIZE_N": 256,
75+
"BLOCK_SIZE_K": 64,
76+
"GROUP_SIZE_M": 8,
77+
"num_stages": 0
78+
},
79+
"256": {
80+
"BLOCK_SIZE_M": 128,
81+
"BLOCK_SIZE_N": 128,
82+
"BLOCK_SIZE_K": 64,
83+
"GROUP_SIZE_M": 8,
84+
"num_stages": 0
85+
},
86+
"512": {
87+
"BLOCK_SIZE_M": 64,
88+
"BLOCK_SIZE_N": 64,
89+
"BLOCK_SIZE_K": 128,
90+
"GROUP_SIZE_M": 1,
91+
"num_stages": 0
92+
},
93+
"1024": {
94+
"BLOCK_SIZE_M": 64,
95+
"BLOCK_SIZE_N": 128,
96+
"BLOCK_SIZE_K": 64,
97+
"GROUP_SIZE_M": 1,
98+
"num_stages": 0
99+
},
100+
"1536": {
101+
"BLOCK_SIZE_M": 128,
102+
"BLOCK_SIZE_N": 128,
103+
"BLOCK_SIZE_K": 64,
104+
"GROUP_SIZE_M": 1,
105+
"num_stages": 0
106+
},
107+
"2048": {
108+
"BLOCK_SIZE_M": 128,
109+
"BLOCK_SIZE_N": 128,
110+
"BLOCK_SIZE_K": 64,
111+
"GROUP_SIZE_M": 1,
112+
"num_stages": 0
113+
},
114+
"3072": {
115+
"BLOCK_SIZE_M": 128,
116+
"BLOCK_SIZE_N": 128,
117+
"BLOCK_SIZE_K": 64,
118+
"GROUP_SIZE_M": 1,
119+
"num_stages": 0
120+
},
121+
"4096": {
122+
"BLOCK_SIZE_M": 128,
123+
"BLOCK_SIZE_N": 128,
124+
"BLOCK_SIZE_K": 64,
125+
"GROUP_SIZE_M": 1,
126+
"num_stages": 0
127+
}
128+
}

0 commit comments

Comments
 (0)