restructure entire gemm benchmarking and remove all vmfbs from git repo

nod-ai · Oct 1, 2024 · e7231b3 · e7231b3
1 parent 7c14c18
commit e7231b3
Show file tree

Hide file tree

Showing 809 changed files with 1,354 additions and 1,259 deletions.
diff --git a/.gitignore b/.gitignore
@@ -23,7 +23,8 @@ wheelhouse
 *.egg-info
 *.whl
 
-# Gemm Bench
+# Bench Artifacts
 gemm/vmfb/
+attention/vmfb/
+conv/vmfb/
 results/
-
diff --git a/attention/vmfb/attention_128x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_128x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_128x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_128x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_128x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_128x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_128x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_128x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_128x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_128x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_128x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_128x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_128x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_128x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_128x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_128x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_128x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_128x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_128x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_128x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_12x384x64x64x384xf16.vmfb b/attention/vmfb/attention_12x384x64x64x384xf16.vmfb
diff --git a/attention/vmfb/attention_16x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_16x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_16x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_16x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_16x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_16x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_16x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_16x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_16x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_16x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_16x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_16x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_16x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_16x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_16x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_16x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_16x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_16x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_16x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_16x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_192x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_192x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_192x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_192x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_192x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_192x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_192x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_192x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_192x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_192x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_192x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_192x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_192x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_192x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_192x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_192x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_192x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_192x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_192x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_192x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_1x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_1x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_1x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_1x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_1x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_1x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_1x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_1x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_1x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_1x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_1x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_1x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_1x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_1x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_1x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_1x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_1x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_1x4096x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_1x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_1x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_1x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_1x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_20x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_20x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_20x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_20x4096x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_2x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_2x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_2x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_2x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_2x1024x64x64x64xf16.vmfb b/attention/vmfb/attention_2x1024x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_2x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_2x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_2x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_2x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_2x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_2x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_2x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_2x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_2x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_2x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_2x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_2x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_2x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_2x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_2x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_2x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_32x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_32x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_32x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_32x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_32x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_32x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_32x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_32x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_32x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_32x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_32x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_32x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_32x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_32x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_32x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_32x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_32x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_32x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_32x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_32x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_40x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_40x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_40x1024x64x64x64xf16.vmfb b/attention/vmfb/attention_40x1024x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_48x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_48x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_48x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_48x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_48x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_48x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_48x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_48x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_48x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_48x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_48x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_48x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_48x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_48x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_48x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_48x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_48x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_48x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_48x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_48x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_4x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_4x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_4x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_4x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_4x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_4x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_4x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_4x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_4x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_4x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_4x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_4x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_4x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_4x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_4x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_4x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_4x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_4x4096x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_4x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_4x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_4x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_4x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_64x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_64x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_64x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_64x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_64x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_64x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_64x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_64x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_64x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_64x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_64x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_64x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_64x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_64x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_64x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_64x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_64x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_64x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_64x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_64x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_768x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_768x4096x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_8x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_8x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_8x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_8x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_8x1024x64x64x64xf16.vmfb b/attention/vmfb/attention_8x1024x64x64x64xf16.vmfb
diff --git a/attention/vmfb/attention_8x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_8x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_8x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_8x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_8x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_8x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_8x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_8x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_8x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_8x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_8x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_8x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_8x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_8x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_8x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_8x8192x64x64x8192xf16.vmfb
diff --git a/attention/vmfb/attention_96x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_96x1024x128x128x1024xf16.vmfb
diff --git a/attention/vmfb/attention_96x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_96x1024x64x64x1024xf16.vmfb
diff --git a/attention/vmfb/attention_96x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_96x16384x128x128x16384xf16.vmfb
diff --git a/attention/vmfb/attention_96x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_96x16384x64x64x16384xf16.vmfb
diff --git a/attention/vmfb/attention_96x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_96x2048x128x128x2048xf16.vmfb
diff --git a/attention/vmfb/attention_96x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_96x2048x64x64x2048xf16.vmfb
diff --git a/attention/vmfb/attention_96x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_96x4096x128x128x4096xf16.vmfb
diff --git a/attention/vmfb/attention_96x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_96x4096x64x64x4096xf16.vmfb
diff --git a/attention/vmfb/attention_96x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_96x8192x128x128x8192xf16.vmfb
diff --git a/attention/vmfb/attention_96x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_96x8192x64x64x8192xf16.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb
diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb
diff --git a/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<10240x16xbf16> {
+    func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<10240x16xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<10240x16xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x16xbf16>) -> tensor<10240x16xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<10240x16xbf16>) -> tensor<10240x16xbf16>
         return %2 : tensor<10240x16xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x16xf16>) -> tensor<10240x16xf16> {
+    func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x16xf16>) -> tensor<10240x16xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<10240x16xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x16xf16>) -> tensor<10240x16xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x16xf16>) outs(%1 : tensor<10240x16xf16>) -> tensor<10240x16xf16>
         return %2 : tensor<10240x16xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<10240x1xbf16> {
+    func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<10240x1xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<10240x1xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x1xbf16>) -> tensor<10240x1xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<10240x1xbf16>) -> tensor<10240x1xbf16>
         return %2 : tensor<10240x1xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x1xf16>) -> tensor<10240x1xf16> {
+    func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x1xf16>) -> tensor<10240x1xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<10240x1xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x1xf16>) -> tensor<10240x1xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x1xf16>) outs(%1 : tensor<10240x1xf16>) -> tensor<10240x1xf16>
         return %2 : tensor<10240x1xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<10240x2xbf16> {
+    func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<10240x2xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<10240x2xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x2xbf16>) -> tensor<10240x2xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<10240x2xbf16>) -> tensor<10240x2xbf16>
         return %2 : tensor<10240x2xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x2xf16>) -> tensor<10240x2xf16> {
+    func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x2xf16>) -> tensor<10240x2xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<10240x2xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x2xf16>) -> tensor<10240x2xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x2xf16>) outs(%1 : tensor<10240x2xf16>) -> tensor<10240x2xf16>
         return %2 : tensor<10240x2xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<10240x32xbf16> {
+    func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<10240x32xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<10240x32xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x32xbf16>) -> tensor<10240x32xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<10240x32xbf16>) -> tensor<10240x32xbf16>
         return %2 : tensor<10240x32xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x32xf16>) -> tensor<10240x32xf16> {
+    func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x32xf16>) -> tensor<10240x32xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<10240x32xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x32xf16>) -> tensor<10240x32xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x32xf16>) outs(%1 : tensor<10240x32xf16>) -> tensor<10240x32xf16>
         return %2 : tensor<10240x32xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<10240x4xbf16> {
+    func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<10240x4xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<10240x4xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x4xbf16>) -> tensor<10240x4xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<10240x4xbf16>) -> tensor<10240x4xbf16>
         return %2 : tensor<10240x4xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x4xf16>) -> tensor<10240x4xf16> {
+    func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x4xf16>) -> tensor<10240x4xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<10240x4xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x4xf16>) -> tensor<10240x4xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x4xf16>) outs(%1 : tensor<10240x4xf16>) -> tensor<10240x4xf16>
         return %2 : tensor<10240x4xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<10240x8xbf16> {
+    func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<10240x8xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<10240x8xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x8xbf16>) -> tensor<10240x8xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<10240x8xbf16>) -> tensor<10240x8xbf16>
         return %2 : tensor<10240x8xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x8xf16>) -> tensor<10240x8xf16> {
+    func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x8xf16>) -> tensor<10240x8xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<10240x8xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x8xf16>) -> tensor<10240x8xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x8xf16>) outs(%1 : tensor<10240x8xf16>) -> tensor<10240x8xf16>
         return %2 : tensor<10240x8xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<1280x16xbf16> {
+    func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<1280x16xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<1280x16xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x16xbf16>) -> tensor<1280x16xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<1280x16xbf16>) -> tensor<1280x16xbf16>
         return %2 : tensor<1280x16xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x16xf16>) -> tensor<1280x16xf16> {
+    func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x16xf16>) -> tensor<1280x16xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<1280x16xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x16xf16>) -> tensor<1280x16xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x16xf16>) outs(%1 : tensor<1280x16xf16>) -> tensor<1280x16xf16>
         return %2 : tensor<1280x16xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<1280x1xbf16> {
+    func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<1280x1xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<1280x1xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x1xbf16>) -> tensor<1280x1xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<1280x1xbf16>) -> tensor<1280x1xbf16>
         return %2 : tensor<1280x1xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x1xf16>) -> tensor<1280x1xf16> {
+    func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x1xf16>) -> tensor<1280x1xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<1280x1xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x1xf16>) -> tensor<1280x1xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x1xf16>) outs(%1 : tensor<1280x1xf16>) -> tensor<1280x1xf16>
         return %2 : tensor<1280x1xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<1280x2xbf16> {
+    func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<1280x2xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<1280x2xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x2xbf16>) -> tensor<1280x2xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<1280x2xbf16>) -> tensor<1280x2xbf16>
         return %2 : tensor<1280x2xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x2xf16>) -> tensor<1280x2xf16> {
+    func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x2xf16>) -> tensor<1280x2xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<1280x2xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x2xf16>) -> tensor<1280x2xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x2xf16>) outs(%1 : tensor<1280x2xf16>) -> tensor<1280x2xf16>
         return %2 : tensor<1280x2xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<1280x32xbf16> {
+    func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<1280x32xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<1280x32xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x32xbf16>) -> tensor<1280x32xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<1280x32xbf16>) -> tensor<1280x32xbf16>
         return %2 : tensor<1280x32xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x32xf16>) -> tensor<1280x32xf16> {
+    func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x32xf16>) -> tensor<1280x32xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<1280x32xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x32xf16>) -> tensor<1280x32xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x32xf16>) outs(%1 : tensor<1280x32xf16>) -> tensor<1280x32xf16>
         return %2 : tensor<1280x32xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<1280x4xbf16> {
+    func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<1280x4xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<1280x4xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x4xbf16>) -> tensor<1280x4xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<1280x4xbf16>) -> tensor<1280x4xbf16>
         return %2 : tensor<1280x4xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x4xf16>) -> tensor<1280x4xf16> {
+    func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x4xf16>) -> tensor<1280x4xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<1280x4xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x4xf16>) -> tensor<1280x4xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x4xf16>) outs(%1 : tensor<1280x4xf16>) -> tensor<1280x4xf16>
         return %2 : tensor<1280x4xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<1280x8xbf16> {
+    func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<1280x8xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<1280x8xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x8xbf16>) -> tensor<1280x8xbf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<1280x8xbf16>) -> tensor<1280x8xbf16>
         return %2 : tensor<1280x8xbf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir
@@ -1,10 +1,10 @@
 
 module {
-    func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x8xf16>) -> tensor<1280x8xf16> {
+    func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x8xf16>) -> tensor<1280x8xf16> {
         %cst = arith.constant 0.000000e+00 : f16
         %0 = tensor.empty() : tensor<1280x8xf16>
         %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x8xf16>) -> tensor<1280x8xf16>
         %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x8xf16>) outs(%1 : tensor<1280x8xf16>) -> tensor<1280x8xf16>
         return %2 : tensor<1280x8xf16>
     }
-} 
+}
diff --git a/gemm/mlir/gemm_128_1280_2048_bf16.mlir b/gemm/mlir/gemm_128_1280_2048_bf16.mlir
@@ -1,9 +1,9 @@
 module {
-    func.func @main_0(%arg0: tensor<128x2048xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> {
+    func.func @main(%arg0: tensor<128x2048xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> {
         %cst = arith.constant 0.000000e+00 : bf16
         %0 = tensor.empty() : tensor<128x1280xbf16>
         %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16>
         %2 = linalg.matmul ins(%arg0, %arg1 : tensor<128x2048xbf16>, tensor<2048x1280xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16>
         return %2 : tensor<128x1280xbf16>
     }
-} 
+}