File tree 2 files changed +5
-5
lines changed
src/llmcompressor/modifiers/awq
2 files changed +5
-5
lines changed Original file line number Diff line number Diff line change 25
25
__all__ = ["AWQModifier" ]
26
26
27
27
28
+ # TODO (Brian INFERENG-531) Add support for offloaded models
28
29
class AWQModifier (Modifier ):
29
30
"""
30
31
Implements the AWQ (Activation-Weighted Quantization) algorithm,
Original file line number Diff line number Diff line change @@ -28,8 +28,8 @@ class AWQMapping:
28
28
"re:.*input_layernorm" ,
29
29
["re:.*q_proj" , "re:.*k_proj" , "re:.*v_proj" ],
30
30
),
31
- # TODO this should only be added if v_proj/o_proj shapes match up
32
- # should we check during validation and skip if this is not the case?
31
+ # TODO (Brian INFERENG-530) when resolving, only add
32
+ # if v_proj/o_proj shapes match up
33
33
AWQMapping ("re:.*v_proj" , ["re:.*o_proj" ]),
34
34
AWQMapping (
35
35
"re:.*post_attention_layernorm" ,
@@ -40,9 +40,8 @@ class AWQMapping:
40
40
["re:.*down_proj" ],
41
41
),
42
42
],
43
- "Qwen" : [
44
- # TODO add Qwen mappings
45
- ],
43
+ # TODO (Brian INFERENG-529) Add Qwen mappings
44
+ # "Qwen": [ ],
46
45
}
47
46
48
47
You can’t perform that action at this time.
0 commit comments