Skip to content

Commit f0364e8

Browse files
mcremon-metafacebook-github-bot
authored andcommitted
Fix quantized_matmul with 4D inputs (#4335)
Summary: Pull Request resolved: #4335 MobileBERT has a matmul with 4D inputs (`[1, 4, 8, 32]` by `[1, 4, 32, 8]`) which is erroring out AoT in the meta kernel. This diff fixes the meta kernel to handle cases where the leading dimensions are more than one (the kernel itself can already handle it!). Also move the exported graph dump to `export_program`, where it belongs. This prevents some double printing in some cases. Note: this diff needs at GH approval! Reviewed By: dulinriley, zonglinpengmeta Differential Revision: D60050087 fbshipit-source-id: de09ed2fb9c5cdf729cc020119bf090d0f0c70c4
1 parent 844a69f commit f0364e8

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

backends/cadence/aot/compiler.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def quantize_pt2(
7676
def export_program(
7777
model: torch.nn.Module,
7878
inputs: tuple[object, ...],
79+
dump_graphs: bool = False,
7980
) -> ExportedProgram:
8081
assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
8182

@@ -99,7 +100,13 @@ def export_program(
99100
torch._C._set_mkldnn_enabled(False)
100101

101102
# else: capture the model and return it.
102-
return export(model, inputs)
103+
expo_program = export(model, inputs)
104+
105+
if dump_graphs:
106+
logging.info("Exported graph:")
107+
expo_program.graph_module.graph.print_tabular()
108+
109+
return expo_program
103110

104111

105112
# Export the model and lower it to an EdgeProgramManager (in edge IR).
@@ -111,11 +118,7 @@ def export_to_edge(
111118
assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
112119

113120
# Export the model into an ExportedProgram.
114-
expo_program = export_program(model, inputs)
115-
116-
if dump_graphs:
117-
logging.info("Exported graph:")
118-
expo_program.graph_module.graph.print_tabular()
121+
expo_program = export_program(model, inputs, dump_graphs=dump_graphs)
119122

120123
# Call to_edge to convert the graph to edge IR.
121124
# Note: dim_order is skipped (https://github.com/pytorch/executorch/issues/3704)

0 commit comments

Comments
 (0)