File tree 4 files changed +500
-194
lines changed
vllm/model_executor/layers/fused_moe/configs
4 files changed +500
-194
lines changed Original file line number Diff line number Diff line change 1
1
{
2
2
"1" : {
3
3
"BLOCK_SIZE_M" : 16 ,
4
- "BLOCK_SIZE_N" : 64 ,
5
- "BLOCK_SIZE_K" : 128 ,
4
+ "BLOCK_SIZE_N" : 32 ,
5
+ "BLOCK_SIZE_K" : 256 ,
6
6
"GROUP_SIZE_M" : 1 ,
7
- "num_stages" : 0
7
+ "num_warps" : 2 ,
8
+ "num_stages" : 0 ,
9
+ "waves_per_eu" : 0 ,
10
+ "matrix_instr_nonkdim" : 16 ,
11
+ "kpack" : 1
8
12
},
9
13
"2" : {
10
14
"BLOCK_SIZE_M" : 16 ,
11
- "BLOCK_SIZE_N" : 64 ,
15
+ "BLOCK_SIZE_N" : 16 ,
12
16
"BLOCK_SIZE_K" : 128 ,
13
17
"GROUP_SIZE_M" : 1 ,
14
- "num_stages" : 0
18
+ "num_warps" : 2 ,
19
+ "num_stages" : 0 ,
20
+ "waves_per_eu" : 0 ,
21
+ "matrix_instr_nonkdim" : 16 ,
22
+ "kpack" : 2
15
23
},
16
24
"4" : {
17
25
"BLOCK_SIZE_M" : 16 ,
18
- "BLOCK_SIZE_N" : 64 ,
26
+ "BLOCK_SIZE_N" : 32 ,
19
27
"BLOCK_SIZE_K" : 256 ,
20
- "GROUP_SIZE_M" : 64 ,
21
- "num_stages" : 1
28
+ "GROUP_SIZE_M" : 1 ,
29
+ "num_warps" : 2 ,
30
+ "num_stages" : 0 ,
31
+ "waves_per_eu" : 0 ,
32
+ "matrix_instr_nonkdim" : 16 ,
33
+ "kpack" : 2
22
34
},
23
35
"8" : {
24
36
"BLOCK_SIZE_M" : 16 ,
25
- "BLOCK_SIZE_N" : 64 ,
37
+ "BLOCK_SIZE_N" : 16 ,
26
38
"BLOCK_SIZE_K" : 256 ,
27
- "GROUP_SIZE_M" : 32 ,
28
- "num_stages" : 1
39
+ "GROUP_SIZE_M" : 1 ,
40
+ "num_warps" : 1 ,
41
+ "num_stages" : 0 ,
42
+ "waves_per_eu" : 0 ,
43
+ "matrix_instr_nonkdim" : 16 ,
44
+ "kpack" : 2
29
45
},
30
46
"16" : {
31
47
"BLOCK_SIZE_M" : 16 ,
32
- "BLOCK_SIZE_N" : 64 ,
48
+ "BLOCK_SIZE_N" : 16 ,
33
49
"BLOCK_SIZE_K" : 256 ,
34
- "GROUP_SIZE_M" : 8 ,
35
- "num_stages" : 1
50
+ "GROUP_SIZE_M" : 1 ,
51
+ "num_warps" : 4 ,
52
+ "num_stages" : 0 ,
53
+ "waves_per_eu" : 0 ,
54
+ "matrix_instr_nonkdim" : 16 ,
55
+ "kpack" : 2
36
56
},
37
57
"24" : {
38
58
"BLOCK_SIZE_M" : 16 ,
39
- "BLOCK_SIZE_N" : 64 ,
40
- "BLOCK_SIZE_K" : 256 ,
41
- "GROUP_SIZE_M" : 64 ,
42
- "num_stages" : 1
59
+ "BLOCK_SIZE_N" : 32 ,
60
+ "BLOCK_SIZE_K" : 64 ,
61
+ "GROUP_SIZE_M" : 1 ,
62
+ "num_warps" : 1 ,
63
+ "num_stages" : 0 ,
64
+ "waves_per_eu" : 0 ,
65
+ "matrix_instr_nonkdim" : 16 ,
66
+ "kpack" : 2
43
67
},
44
68
"32" : {
45
69
"BLOCK_SIZE_M" : 16 ,
46
- "BLOCK_SIZE_N" : 128 ,
47
- "BLOCK_SIZE_K" : 256 ,
48
- "GROUP_SIZE_M" : 8 ,
49
- "num_stages" : 1
70
+ "BLOCK_SIZE_N" : 16 ,
71
+ "BLOCK_SIZE_K" : 128 ,
72
+ "GROUP_SIZE_M" : 4 ,
73
+ "num_warps" : 2 ,
74
+ "num_stages" : 0 ,
75
+ "waves_per_eu" : 0 ,
76
+ "matrix_instr_nonkdim" : 16 ,
77
+ "kpack" : 1
50
78
},
51
79
"48" : {
52
80
"BLOCK_SIZE_M" : 16 ,
53
- "BLOCK_SIZE_N" : 64 ,
81
+ "BLOCK_SIZE_N" : 16 ,
54
82
"BLOCK_SIZE_K" : 128 ,
55
- "GROUP_SIZE_M" : 8 ,
56
- "num_stages" : 0
83
+ "GROUP_SIZE_M" : 4 ,
84
+ "num_warps" : 2 ,
85
+ "num_stages" : 0 ,
86
+ "waves_per_eu" : 0 ,
87
+ "matrix_instr_nonkdim" : 16 ,
88
+ "kpack" : 2
57
89
},
58
90
"64" : {
59
- "BLOCK_SIZE_M" : 64 ,
91
+ "BLOCK_SIZE_M" : 32 ,
60
92
"BLOCK_SIZE_N" : 64 ,
61
93
"BLOCK_SIZE_K" : 128 ,
62
- "GROUP_SIZE_M" : 8 ,
63
- "num_stages" : 0
94
+ "GROUP_SIZE_M" : 4 ,
95
+ "num_warps" : 8 ,
96
+ "num_stages" : 0 ,
97
+ "waves_per_eu" : 0 ,
98
+ "matrix_instr_nonkdim" : 16 ,
99
+ "kpack" : 2
64
100
},
65
101
"96" : {
66
102
"BLOCK_SIZE_M" : 32 ,
67
- "BLOCK_SIZE_N" : 128 ,
103
+ "BLOCK_SIZE_N" : 32 ,
68
104
"BLOCK_SIZE_K" : 128 ,
69
- "GROUP_SIZE_M" : 16 ,
70
- "num_stages" : 0
105
+ "GROUP_SIZE_M" : 4 ,
106
+ "num_warps" : 4 ,
107
+ "num_stages" : 0 ,
108
+ "waves_per_eu" : 0 ,
109
+ "matrix_instr_nonkdim" : 16 ,
110
+ "kpack" : 2
71
111
},
72
112
"128" : {
73
113
"BLOCK_SIZE_M" : 64 ,
74
114
"BLOCK_SIZE_N" : 64 ,
75
- "BLOCK_SIZE_K" : 128 ,
76
- "GROUP_SIZE_M" : 8 ,
77
- "num_stages" : 0
115
+ "BLOCK_SIZE_K" : 64 ,
116
+ "GROUP_SIZE_M" : 4 ,
117
+ "num_warps" : 8 ,
118
+ "num_stages" : 0 ,
119
+ "waves_per_eu" : 0 ,
120
+ "matrix_instr_nonkdim" : 16 ,
121
+ "kpack" : 2
78
122
},
79
123
"256" : {
80
124
"BLOCK_SIZE_M" : 128 ,
81
125
"BLOCK_SIZE_N" : 128 ,
82
126
"BLOCK_SIZE_K" : 64 ,
83
- "GROUP_SIZE_M" : 8 ,
84
- "num_stages" : 0
127
+ "GROUP_SIZE_M" : 4 ,
128
+ "num_warps" : 8 ,
129
+ "num_stages" : 0 ,
130
+ "waves_per_eu" : 0 ,
131
+ "matrix_instr_nonkdim" : 16 ,
132
+ "kpack" : 1
85
133
},
86
134
"512" : {
87
- "BLOCK_SIZE_M" : 256 ,
135
+ "BLOCK_SIZE_M" : 128 ,
88
136
"BLOCK_SIZE_N" : 128 ,
89
137
"BLOCK_SIZE_K" : 64 ,
90
- "GROUP_SIZE_M" : 8 ,
91
- "num_stages" : 0
138
+ "GROUP_SIZE_M" : 4 ,
139
+ "num_warps" : 8 ,
140
+ "num_stages" : 0 ,
141
+ "waves_per_eu" : 0 ,
142
+ "matrix_instr_nonkdim" : 16 ,
143
+ "kpack" : 2
92
144
},
93
145
"1024" : {
94
146
"BLOCK_SIZE_M" : 128 ,
95
147
"BLOCK_SIZE_N" : 128 ,
96
148
"BLOCK_SIZE_K" : 64 ,
97
149
"GROUP_SIZE_M" : 1 ,
98
- "num_stages" : 0
150
+ "num_warps" : 8 ,
151
+ "num_stages" : 0 ,
152
+ "waves_per_eu" : 0 ,
153
+ "matrix_instr_nonkdim" : 32 ,
154
+ "kpack" : 2
99
155
},
100
156
"1536" : {
101
157
"BLOCK_SIZE_M" : 128 ,
102
158
"BLOCK_SIZE_N" : 128 ,
103
159
"BLOCK_SIZE_K" : 64 ,
104
160
"GROUP_SIZE_M" : 1 ,
105
- "num_stages" : 0
161
+ "num_warps" : 8 ,
162
+ "num_stages" : 0 ,
163
+ "waves_per_eu" : 0 ,
164
+ "matrix_instr_nonkdim" : 16 ,
165
+ "kpack" : 2
106
166
},
107
167
"2048" : {
108
168
"BLOCK_SIZE_M" : 128 ,
109
- "BLOCK_SIZE_N" : 256 ,
169
+ "BLOCK_SIZE_N" : 128 ,
110
170
"BLOCK_SIZE_K" : 64 ,
111
171
"GROUP_SIZE_M" : 1 ,
112
- "num_stages" : 0
172
+ "num_warps" : 8 ,
173
+ "num_stages" : 0 ,
174
+ "waves_per_eu" : 0 ,
175
+ "matrix_instr_nonkdim" : 16 ,
176
+ "kpack" : 2
113
177
},
114
178
"3072" : {
115
179
"BLOCK_SIZE_M" : 128 ,
116
- "BLOCK_SIZE_N" : 256 ,
180
+ "BLOCK_SIZE_N" : 128 ,
117
181
"BLOCK_SIZE_K" : 64 ,
118
182
"GROUP_SIZE_M" : 1 ,
119
- "num_stages" : 0
183
+ "num_warps" : 8 ,
184
+ "num_stages" : 0 ,
185
+ "waves_per_eu" : 0 ,
186
+ "matrix_instr_nonkdim" : 16 ,
187
+ "kpack" : 1
120
188
},
121
189
"4096" : {
122
190
"BLOCK_SIZE_M" : 128 ,
123
191
"BLOCK_SIZE_N" : 128 ,
124
192
"BLOCK_SIZE_K" : 64 ,
125
193
"GROUP_SIZE_M" : 1 ,
126
- "num_stages" : 0
194
+ "num_warps" : 8 ,
195
+ "num_stages" : 0 ,
196
+ "waves_per_eu" : 0 ,
197
+ "matrix_instr_nonkdim" : 16 ,
198
+ "kpack" : 1
127
199
}
128
200
}
You can’t perform that action at this time.
0 commit comments