diff --git a/.gitignore b/.gitignore index 1b4209a..5a75f31 100644 --- a/.gitignore +++ b/.gitignore @@ -23,7 +23,8 @@ wheelhouse *.egg-info *.whl -# Gemm Bench +# Bench Artifacts gemm/vmfb/ +attention/vmfb/ +conv/vmfb/ results/ - diff --git a/attention/vmfb/attention_128x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_128x1024x128x128x1024xf16.vmfb deleted file mode 100644 index 7c3490f..0000000 Binary files a/attention/vmfb/attention_128x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_128x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 8ece34c..0000000 Binary files a/attention/vmfb/attention_128x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_128x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 89402c0..0000000 Binary files a/attention/vmfb/attention_128x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_128x16384x64x64x16384xf16.vmfb deleted file mode 100644 index fea5b70..0000000 Binary files a/attention/vmfb/attention_128x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_128x2048x128x128x2048xf16.vmfb deleted file mode 100644 index ec50bd8..0000000 Binary files a/attention/vmfb/attention_128x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_128x2048x64x64x2048xf16.vmfb deleted file mode 100644 index c37aa19..0000000 Binary files a/attention/vmfb/attention_128x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_128x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 7d0136a..0000000 Binary files a/attention/vmfb/attention_128x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_128x4096x64x64x4096xf16.vmfb deleted file mode 100644 index 05bc66f..0000000 Binary files a/attention/vmfb/attention_128x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_128x8192x128x128x8192xf16.vmfb deleted file mode 100644 index d667542..0000000 Binary files a/attention/vmfb/attention_128x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_128x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_128x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 70e2681..0000000 Binary files a/attention/vmfb/attention_128x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_12x384x64x64x384xf16.vmfb b/attention/vmfb/attention_12x384x64x64x384xf16.vmfb deleted file mode 100644 index b4c2b2c..0000000 Binary files a/attention/vmfb/attention_12x384x64x64x384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_16x1024x128x128x1024xf16.vmfb deleted file mode 100644 index bec08a4..0000000 Binary files a/attention/vmfb/attention_16x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_16x1024x64x64x1024xf16.vmfb deleted file mode 100644 index dc0fe57..0000000 Binary files a/attention/vmfb/attention_16x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_16x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 2e4a4ab..0000000 Binary files a/attention/vmfb/attention_16x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_16x16384x64x64x16384xf16.vmfb deleted file mode 100644 index b089f0c..0000000 Binary files a/attention/vmfb/attention_16x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_16x2048x128x128x2048xf16.vmfb deleted file mode 100644 index 09d0412..0000000 Binary files a/attention/vmfb/attention_16x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_16x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 8b0bacb..0000000 Binary files a/attention/vmfb/attention_16x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_16x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 6a34acf..0000000 Binary files a/attention/vmfb/attention_16x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_16x4096x64x64x4096xf16.vmfb deleted file mode 100644 index afdbfa7..0000000 Binary files a/attention/vmfb/attention_16x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_16x8192x128x128x8192xf16.vmfb deleted file mode 100644 index cf5bd14..0000000 Binary files a/attention/vmfb/attention_16x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_16x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_16x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 38d303b..0000000 Binary files a/attention/vmfb/attention_16x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_192x1024x128x128x1024xf16.vmfb deleted file mode 100644 index 4ae9d19..0000000 Binary files a/attention/vmfb/attention_192x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_192x1024x64x64x1024xf16.vmfb deleted file mode 100644 index fc46dcf..0000000 Binary files a/attention/vmfb/attention_192x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_192x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 715b290..0000000 Binary files a/attention/vmfb/attention_192x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_192x16384x64x64x16384xf16.vmfb deleted file mode 100644 index 5b01a5b..0000000 Binary files a/attention/vmfb/attention_192x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_192x2048x128x128x2048xf16.vmfb deleted file mode 100644 index bce1552..0000000 Binary files a/attention/vmfb/attention_192x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_192x2048x64x64x2048xf16.vmfb deleted file mode 100644 index cfb23f1..0000000 Binary files a/attention/vmfb/attention_192x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_192x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 6e98b97..0000000 Binary files a/attention/vmfb/attention_192x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_192x4096x64x64x4096xf16.vmfb deleted file mode 100644 index a948ed4..0000000 Binary files a/attention/vmfb/attention_192x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_192x8192x128x128x8192xf16.vmfb deleted file mode 100644 index 2d5b932..0000000 Binary files a/attention/vmfb/attention_192x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_192x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_192x8192x64x64x8192xf16.vmfb deleted file mode 100644 index ce154a8..0000000 Binary files a/attention/vmfb/attention_192x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_1x1024x128x128x1024xf16.vmfb deleted file mode 100644 index 3cf1e55..0000000 Binary files a/attention/vmfb/attention_1x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_1x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 3f39388..0000000 Binary files a/attention/vmfb/attention_1x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_1x16384x128x128x16384xf16.vmfb deleted file mode 100644 index de13b1f..0000000 Binary files a/attention/vmfb/attention_1x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_1x16384x64x64x16384xf16.vmfb deleted file mode 100644 index 938d2cd..0000000 Binary files a/attention/vmfb/attention_1x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_1x2048x128x128x2048xf16.vmfb deleted file mode 100644 index c09f218..0000000 Binary files a/attention/vmfb/attention_1x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_1x2048x64x64x2048xf16.vmfb deleted file mode 100644 index a2e36e1..0000000 Binary files a/attention/vmfb/attention_1x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_1x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 6a16230..0000000 Binary files a/attention/vmfb/attention_1x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_1x4096x64x64x4096xf16.vmfb deleted file mode 100644 index 6057b7f..0000000 Binary files a/attention/vmfb/attention_1x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_1x4096x64x64x64xf16.vmfb deleted file mode 100644 index a9246fe..0000000 Binary files a/attention/vmfb/attention_1x4096x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_1x8192x128x128x8192xf16.vmfb deleted file mode 100644 index 080fd73..0000000 Binary files a/attention/vmfb/attention_1x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_1x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_1x8192x64x64x8192xf16.vmfb deleted file mode 100644 index f46aa51..0000000 Binary files a/attention/vmfb/attention_1x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_20x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_20x4096x64x64x4096xf16.vmfb deleted file mode 100644 index 424702f..0000000 Binary files a/attention/vmfb/attention_20x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_20x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_20x4096x64x64x64xf16.vmfb deleted file mode 100644 index 5f3725e..0000000 Binary files a/attention/vmfb/attention_20x4096x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_2x1024x128x128x1024xf16.vmfb deleted file mode 100644 index eb0be8b..0000000 Binary files a/attention/vmfb/attention_2x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_2x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 3e66ff3..0000000 Binary files a/attention/vmfb/attention_2x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x1024x64x64x64xf16.vmfb b/attention/vmfb/attention_2x1024x64x64x64xf16.vmfb deleted file mode 100644 index 1f98e57..0000000 Binary files a/attention/vmfb/attention_2x1024x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_2x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 1465c10..0000000 Binary files a/attention/vmfb/attention_2x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_2x16384x64x64x16384xf16.vmfb deleted file mode 100644 index 2e8b0fc..0000000 Binary files a/attention/vmfb/attention_2x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_2x2048x128x128x2048xf16.vmfb deleted file mode 100644 index 6ad2d49..0000000 Binary files a/attention/vmfb/attention_2x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_2x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 8a5feae..0000000 Binary files a/attention/vmfb/attention_2x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_2x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 80e7b4d..0000000 Binary files a/attention/vmfb/attention_2x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_2x4096x64x64x4096xf16.vmfb deleted file mode 100644 index 28b0733..0000000 Binary files a/attention/vmfb/attention_2x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_2x8192x128x128x8192xf16.vmfb deleted file mode 100644 index eb49642..0000000 Binary files a/attention/vmfb/attention_2x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_2x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_2x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 10bd77f..0000000 Binary files a/attention/vmfb/attention_2x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_32x1024x128x128x1024xf16.vmfb deleted file mode 100644 index a5a2226..0000000 Binary files a/attention/vmfb/attention_32x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_32x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 47febe6..0000000 Binary files a/attention/vmfb/attention_32x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_32x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 7197ee8..0000000 Binary files a/attention/vmfb/attention_32x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_32x16384x64x64x16384xf16.vmfb deleted file mode 100644 index 9c66eab..0000000 Binary files a/attention/vmfb/attention_32x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_32x2048x128x128x2048xf16.vmfb deleted file mode 100644 index a8a11f0..0000000 Binary files a/attention/vmfb/attention_32x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_32x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 23cad72..0000000 Binary files a/attention/vmfb/attention_32x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_32x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 7ed4feb..0000000 Binary files a/attention/vmfb/attention_32x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_32x4096x64x64x4096xf16.vmfb deleted file mode 100644 index f67f0a9..0000000 Binary files a/attention/vmfb/attention_32x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_32x8192x128x128x8192xf16.vmfb deleted file mode 100644 index 48d4c63..0000000 Binary files a/attention/vmfb/attention_32x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_32x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_32x8192x64x64x8192xf16.vmfb deleted file mode 100644 index b4a9ba1..0000000 Binary files a/attention/vmfb/attention_32x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_40x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_40x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 405bdaf..0000000 Binary files a/attention/vmfb/attention_40x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_40x1024x64x64x64xf16.vmfb b/attention/vmfb/attention_40x1024x64x64x64xf16.vmfb deleted file mode 100644 index aecdf23..0000000 Binary files a/attention/vmfb/attention_40x1024x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_48x1024x128x128x1024xf16.vmfb deleted file mode 100644 index 84bc961..0000000 Binary files a/attention/vmfb/attention_48x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_48x1024x64x64x1024xf16.vmfb deleted file mode 100644 index dcb8053..0000000 Binary files a/attention/vmfb/attention_48x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_48x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 6030730..0000000 Binary files a/attention/vmfb/attention_48x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_48x16384x64x64x16384xf16.vmfb deleted file mode 100644 index 5121c8a..0000000 Binary files a/attention/vmfb/attention_48x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_48x2048x128x128x2048xf16.vmfb deleted file mode 100644 index 5b4a736..0000000 Binary files a/attention/vmfb/attention_48x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_48x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 31ac981..0000000 Binary files a/attention/vmfb/attention_48x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_48x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 66ae333..0000000 Binary files a/attention/vmfb/attention_48x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_48x4096x64x64x4096xf16.vmfb deleted file mode 100644 index 3222407..0000000 Binary files a/attention/vmfb/attention_48x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_48x8192x128x128x8192xf16.vmfb deleted file mode 100644 index c8ac465..0000000 Binary files a/attention/vmfb/attention_48x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_48x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_48x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 12e17aa..0000000 Binary files a/attention/vmfb/attention_48x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_4x1024x128x128x1024xf16.vmfb deleted file mode 100644 index 61759ec..0000000 Binary files a/attention/vmfb/attention_4x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_4x1024x64x64x1024xf16.vmfb deleted file mode 100644 index e97b7ae..0000000 Binary files a/attention/vmfb/attention_4x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_4x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 5a36bf0..0000000 Binary files a/attention/vmfb/attention_4x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_4x16384x64x64x16384xf16.vmfb deleted file mode 100644 index ffbbcb3..0000000 Binary files a/attention/vmfb/attention_4x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_4x2048x128x128x2048xf16.vmfb deleted file mode 100644 index a4ea6c9..0000000 Binary files a/attention/vmfb/attention_4x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_4x2048x64x64x2048xf16.vmfb deleted file mode 100644 index d76fb8f..0000000 Binary files a/attention/vmfb/attention_4x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_4x4096x128x128x4096xf16.vmfb deleted file mode 100644 index f862895..0000000 Binary files a/attention/vmfb/attention_4x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_4x4096x64x64x4096xf16.vmfb deleted file mode 100644 index 767d3eb..0000000 Binary files a/attention/vmfb/attention_4x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_4x4096x64x64x64xf16.vmfb deleted file mode 100644 index a4e8b21..0000000 Binary files a/attention/vmfb/attention_4x4096x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_4x8192x128x128x8192xf16.vmfb deleted file mode 100644 index a087bf3..0000000 Binary files a/attention/vmfb/attention_4x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_4x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_4x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 059df1a..0000000 Binary files a/attention/vmfb/attention_4x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_64x1024x128x128x1024xf16.vmfb deleted file mode 100644 index a0295a2..0000000 Binary files a/attention/vmfb/attention_64x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_64x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 8e68b50..0000000 Binary files a/attention/vmfb/attention_64x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_64x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 67d8911..0000000 Binary files a/attention/vmfb/attention_64x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_64x16384x64x64x16384xf16.vmfb deleted file mode 100644 index d548b8a..0000000 Binary files a/attention/vmfb/attention_64x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_64x2048x128x128x2048xf16.vmfb deleted file mode 100644 index 797fcff..0000000 Binary files a/attention/vmfb/attention_64x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_64x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 41ef8f2..0000000 Binary files a/attention/vmfb/attention_64x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_64x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 62b63e2..0000000 Binary files a/attention/vmfb/attention_64x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_64x4096x64x64x4096xf16.vmfb deleted file mode 100644 index eda7758..0000000 Binary files a/attention/vmfb/attention_64x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_64x8192x128x128x8192xf16.vmfb deleted file mode 100644 index 68aae1c..0000000 Binary files a/attention/vmfb/attention_64x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_64x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_64x8192x64x64x8192xf16.vmfb deleted file mode 100644 index bdc231e..0000000 Binary files a/attention/vmfb/attention_64x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_768x4096x64x64x64xf16.vmfb b/attention/vmfb/attention_768x4096x64x64x64xf16.vmfb deleted file mode 100644 index cd9d9ad..0000000 Binary files a/attention/vmfb/attention_768x4096x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_8x1024x128x128x1024xf16.vmfb deleted file mode 100644 index 4c8fb53..0000000 Binary files a/attention/vmfb/attention_8x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_8x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 44033ea..0000000 Binary files a/attention/vmfb/attention_8x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x1024x64x64x64xf16.vmfb b/attention/vmfb/attention_8x1024x64x64x64xf16.vmfb deleted file mode 100644 index 5b8212f..0000000 Binary files a/attention/vmfb/attention_8x1024x64x64x64xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_8x16384x128x128x16384xf16.vmfb deleted file mode 100644 index f0ddb38..0000000 Binary files a/attention/vmfb/attention_8x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_8x16384x64x64x16384xf16.vmfb deleted file mode 100644 index 900f552..0000000 Binary files a/attention/vmfb/attention_8x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_8x2048x128x128x2048xf16.vmfb deleted file mode 100644 index 4813ea9..0000000 Binary files a/attention/vmfb/attention_8x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_8x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 0b40915..0000000 Binary files a/attention/vmfb/attention_8x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_8x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 2c9b6af..0000000 Binary files a/attention/vmfb/attention_8x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_8x4096x64x64x4096xf16.vmfb deleted file mode 100644 index f208a68..0000000 Binary files a/attention/vmfb/attention_8x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_8x8192x128x128x8192xf16.vmfb deleted file mode 100644 index e7cb257..0000000 Binary files a/attention/vmfb/attention_8x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_8x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_8x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 3e38735..0000000 Binary files a/attention/vmfb/attention_8x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x1024x128x128x1024xf16.vmfb b/attention/vmfb/attention_96x1024x128x128x1024xf16.vmfb deleted file mode 100644 index e6f5a77..0000000 Binary files a/attention/vmfb/attention_96x1024x128x128x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x1024x64x64x1024xf16.vmfb b/attention/vmfb/attention_96x1024x64x64x1024xf16.vmfb deleted file mode 100644 index 3c98db9..0000000 Binary files a/attention/vmfb/attention_96x1024x64x64x1024xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x16384x128x128x16384xf16.vmfb b/attention/vmfb/attention_96x16384x128x128x16384xf16.vmfb deleted file mode 100644 index 80347c4..0000000 Binary files a/attention/vmfb/attention_96x16384x128x128x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x16384x64x64x16384xf16.vmfb b/attention/vmfb/attention_96x16384x64x64x16384xf16.vmfb deleted file mode 100644 index f421314..0000000 Binary files a/attention/vmfb/attention_96x16384x64x64x16384xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x2048x128x128x2048xf16.vmfb b/attention/vmfb/attention_96x2048x128x128x2048xf16.vmfb deleted file mode 100644 index f5c959b..0000000 Binary files a/attention/vmfb/attention_96x2048x128x128x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x2048x64x64x2048xf16.vmfb b/attention/vmfb/attention_96x2048x64x64x2048xf16.vmfb deleted file mode 100644 index 4c482f6..0000000 Binary files a/attention/vmfb/attention_96x2048x64x64x2048xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x4096x128x128x4096xf16.vmfb b/attention/vmfb/attention_96x4096x128x128x4096xf16.vmfb deleted file mode 100644 index 3312178..0000000 Binary files a/attention/vmfb/attention_96x4096x128x128x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x4096x64x64x4096xf16.vmfb b/attention/vmfb/attention_96x4096x64x64x4096xf16.vmfb deleted file mode 100644 index ee83a16..0000000 Binary files a/attention/vmfb/attention_96x4096x64x64x4096xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x8192x128x128x8192xf16.vmfb b/attention/vmfb/attention_96x8192x128x128x8192xf16.vmfb deleted file mode 100644 index cd119be..0000000 Binary files a/attention/vmfb/attention_96x8192x128x128x8192xf16.vmfb and /dev/null differ diff --git a/attention/vmfb/attention_96x8192x64x64x8192xf16.vmfb b/attention/vmfb/attention_96x8192x64x64x8192xf16.vmfb deleted file mode 100644 index 60fc0ca..0000000 Binary files a/attention/vmfb/attention_96x8192x64x64x8192xf16.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 90ba607..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index c5c4834..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 6a848a0..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 5f31e34..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 31fbdc3..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index d713448..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index cb314fc..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index a873616..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 485ce60..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index a29d15f..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index c13030d..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 45bdada..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 851f16f..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 34c5996..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index ec04687..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 69355da..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index f804da6..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index bf5f2d4..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 24b2d30..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 18fafce..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index a035c49..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 85886ea..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 07c6054..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 8034479..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 7dbface..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 0c86b7c..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 3b27184..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 70d2b31..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 4a17a4d..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index a180673..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 6b646a4..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 6138c98..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 30fe861..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index b14dcdd..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index f7a494a..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 400c9b7..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 58c0ce5..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index a1b88d2..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index b123ad7..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 3a7439e..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 43ffeb9..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index d8efef1..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 9aa0b09..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 02b26af..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 735c1c7..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 83c2dfe..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 887dd4a..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index d6826f8..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 92ed8ab..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 9dc35c7..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 1def039..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index e22986b..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 19b2d16..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 6be3307..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index ba6e7d1..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index c12f222..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 34e251f..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 503eb32..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 3837457..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 8eb8287..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 5fa495b..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 4d84e19..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 757cb62..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index c43acfa..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index f7999f8..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 4e3f847..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 4ad5015..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 0bb8c42..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 2f5380e..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 800fff5..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 20a91fa..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 569db13..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb deleted file mode 100644 index 519a1e3..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 34cd2fa..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb deleted file mode 100644 index f7c98dd..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb deleted file mode 100644 index 542df15..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb b/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb deleted file mode 100644 index af7836e..0000000 Binary files a/conv/vmfb/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 94902a0..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 4c6b315..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index fc6d0f0..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 9ed6b72..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 8b6c7c5..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 105fa6a..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 13930f0..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 3859b23..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 9cda9bd..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 686f2ce..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 95e8ac8..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 6c6de3a..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index b1f1839..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 5cd5b56..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 10477f9..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index e2688ae..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index a5ad4cd..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index ee9a095..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 9f0d831..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index f7ed27d..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index ab65c78..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 7c667b6..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 08f5257..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 69bad46..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 1bffc8c..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index c8d1650..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 71c6649..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 804c6f3..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 127d334..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index cb91ea6..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 2a16e17..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 02fbba2..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index b3e94c5..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 86f0c25..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index a3d903f..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index bb9b90d..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 228ec1c..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 3864a71..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 94a8a71..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 6d88c07..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 8ce8a4a..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index e3ceb41..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index b203b43..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 30dce78..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 7de2f43..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index b9c2920..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 2a7db58..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index c054fac..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index f992692..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index a20c26f..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 8153614..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 0790864..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index a086b8d..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 2f5c4d8..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 985f486..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 434e5bc..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 65c472c..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index ccc863f..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index fbbcf57..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 6c71955..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 51892d2..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index a1ca1f7..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index b43d422..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 75c6026..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 7cd222a..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 9747602..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 3f85f6d..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 0dca3e7..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 9758f0c..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index b4aac89..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 9198024..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb deleted file mode 100644 index f1f34d6..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 7e2d852..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb deleted file mode 100644 index a4fd2ce..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb deleted file mode 100644 index f9353cb..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb deleted file mode 100644 index 6d49435..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.vmfb and /dev/null differ diff --git a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb b/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb deleted file mode 100644 index 1c85020..0000000 Binary files a/conv/vmfb/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.vmfb and /dev/null differ diff --git a/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir index 6518245..2f56e73 100644 --- a/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<10240x16xbf16> { + func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<10240x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<10240x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x16xbf16>) -> tensor<10240x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<10240x16xbf16>) -> tensor<10240x16xbf16> return %2 : tensor<10240x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir index b205b6b..78c8d49 100644 --- a/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x16xf16>) -> tensor<10240x16xf16> { + func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x16xf16>) -> tensor<10240x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<10240x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x16xf16>) -> tensor<10240x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x16xf16>) outs(%1 : tensor<10240x16xf16>) -> tensor<10240x16xf16> return %2 : tensor<10240x16xf16> } -} +} diff --git a/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir index c8f11c7..ad452e2 100644 --- a/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<10240x1xbf16> { + func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<10240x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<10240x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x1xbf16>) -> tensor<10240x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<10240x1xbf16>) -> tensor<10240x1xbf16> return %2 : tensor<10240x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir index 6e209c5..71b8145 100644 --- a/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x1xf16>) -> tensor<10240x1xf16> { + func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x1xf16>) -> tensor<10240x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<10240x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x1xf16>) -> tensor<10240x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x1xf16>) outs(%1 : tensor<10240x1xf16>) -> tensor<10240x1xf16> return %2 : tensor<10240x1xf16> } -} +} diff --git a/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir index 81b7648..b0f1298 100644 --- a/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<10240x2xbf16> { + func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<10240x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<10240x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x2xbf16>) -> tensor<10240x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<10240x2xbf16>) -> tensor<10240x2xbf16> return %2 : tensor<10240x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir index 356bbee..273354c 100644 --- a/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x2xf16>) -> tensor<10240x2xf16> { + func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x2xf16>) -> tensor<10240x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<10240x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x2xf16>) -> tensor<10240x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x2xf16>) outs(%1 : tensor<10240x2xf16>) -> tensor<10240x2xf16> return %2 : tensor<10240x2xf16> } -} +} diff --git a/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir index cf172cd..f1ec0ed 100644 --- a/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<10240x32xbf16> { + func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<10240x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<10240x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x32xbf16>) -> tensor<10240x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<10240x32xbf16>) -> tensor<10240x32xbf16> return %2 : tensor<10240x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir index 2b86e9d..3a3e10a 100644 --- a/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x32xf16>) -> tensor<10240x32xf16> { + func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x32xf16>) -> tensor<10240x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<10240x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x32xf16>) -> tensor<10240x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x32xf16>) outs(%1 : tensor<10240x32xf16>) -> tensor<10240x32xf16> return %2 : tensor<10240x32xf16> } -} +} diff --git a/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir index 5190491..2b73883 100644 --- a/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<10240x4xbf16> { + func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<10240x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<10240x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x4xbf16>) -> tensor<10240x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<10240x4xbf16>) -> tensor<10240x4xbf16> return %2 : tensor<10240x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir index e220fe4..2a97ec8 100644 --- a/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x4xf16>) -> tensor<10240x4xf16> { + func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x4xf16>) -> tensor<10240x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<10240x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x4xf16>) -> tensor<10240x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x4xf16>) outs(%1 : tensor<10240x4xf16>) -> tensor<10240x4xf16> return %2 : tensor<10240x4xf16> } -} +} diff --git a/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir index 7d7d21f..a5c4f70 100644 --- a/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<10240x8xbf16> { + func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<10240x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<10240x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x8xbf16>) -> tensor<10240x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<10240x8xbf16>) -> tensor<10240x8xbf16> return %2 : tensor<10240x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir index 774eb17..96ca8f3 100644 --- a/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x8xf16>) -> tensor<10240x8xf16> { + func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x8xf16>) -> tensor<10240x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<10240x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x8xf16>) -> tensor<10240x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x8xf16>) outs(%1 : tensor<10240x8xf16>) -> tensor<10240x8xf16> return %2 : tensor<10240x8xf16> } -} +} diff --git a/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir index cce0498..3baa555 100644 --- a/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<1280x16xbf16> { + func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<1280x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1280x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x16xbf16>) -> tensor<1280x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<1280x16xbf16>) -> tensor<1280x16xbf16> return %2 : tensor<1280x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir index a9bcd82..3fe4759 100644 --- a/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x16xf16>) -> tensor<1280x16xf16> { + func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x16xf16>) -> tensor<1280x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1280x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x16xf16>) -> tensor<1280x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x16xf16>) outs(%1 : tensor<1280x16xf16>) -> tensor<1280x16xf16> return %2 : tensor<1280x16xf16> } -} +} diff --git a/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir index 13eb35d..3d2ccc5 100644 --- a/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<1280x1xbf16> { + func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<1280x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1280x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x1xbf16>) -> tensor<1280x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<1280x1xbf16>) -> tensor<1280x1xbf16> return %2 : tensor<1280x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir index 3c68737..b723290 100644 --- a/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x1xf16>) -> tensor<1280x1xf16> { + func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x1xf16>) -> tensor<1280x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1280x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x1xf16>) -> tensor<1280x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x1xf16>) outs(%1 : tensor<1280x1xf16>) -> tensor<1280x1xf16> return %2 : tensor<1280x1xf16> } -} +} diff --git a/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir index 081118a..3f23515 100644 --- a/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<1280x2xbf16> { + func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<1280x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1280x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x2xbf16>) -> tensor<1280x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<1280x2xbf16>) -> tensor<1280x2xbf16> return %2 : tensor<1280x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir index f111865..32fdd34 100644 --- a/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x2xf16>) -> tensor<1280x2xf16> { + func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x2xf16>) -> tensor<1280x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1280x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x2xf16>) -> tensor<1280x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x2xf16>) outs(%1 : tensor<1280x2xf16>) -> tensor<1280x2xf16> return %2 : tensor<1280x2xf16> } -} +} diff --git a/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir index e0cd5f1..e9bf063 100644 --- a/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<1280x32xbf16> { + func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<1280x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1280x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x32xbf16>) -> tensor<1280x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<1280x32xbf16>) -> tensor<1280x32xbf16> return %2 : tensor<1280x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir index 014bc6e..faf8f1a 100644 --- a/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x32xf16>) -> tensor<1280x32xf16> { + func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x32xf16>) -> tensor<1280x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1280x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x32xf16>) -> tensor<1280x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x32xf16>) outs(%1 : tensor<1280x32xf16>) -> tensor<1280x32xf16> return %2 : tensor<1280x32xf16> } -} +} diff --git a/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir index 6f92786..d844019 100644 --- a/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<1280x4xbf16> { + func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<1280x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1280x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x4xbf16>) -> tensor<1280x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<1280x4xbf16>) -> tensor<1280x4xbf16> return %2 : tensor<1280x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir index 9f50653..f7ead50 100644 --- a/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x4xf16>) -> tensor<1280x4xf16> { + func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x4xf16>) -> tensor<1280x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1280x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x4xf16>) -> tensor<1280x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x4xf16>) outs(%1 : tensor<1280x4xf16>) -> tensor<1280x4xf16> return %2 : tensor<1280x4xf16> } -} +} diff --git a/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir index 8654770..8f2da95 100644 --- a/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<1280x8xbf16> { + func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<1280x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1280x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x8xbf16>) -> tensor<1280x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<1280x8xbf16>) -> tensor<1280x8xbf16> return %2 : tensor<1280x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir index 4a87cab..4c96f74 100644 --- a/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x8xf16>) -> tensor<1280x8xf16> { + func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x8xf16>) -> tensor<1280x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1280x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x8xf16>) -> tensor<1280x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x8xf16>) outs(%1 : tensor<1280x8xf16>) -> tensor<1280x8xf16> return %2 : tensor<1280x8xf16> } -} +} diff --git a/gemm/mlir/gemm_128_1280_2048_bf16.mlir b/gemm/mlir/gemm_128_1280_2048_bf16.mlir index f113fc4..c758c9d 100644 --- a/gemm/mlir/gemm_128_1280_2048_bf16.mlir +++ b/gemm/mlir/gemm_128_1280_2048_bf16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<128x2048xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> { + func.func @main(%arg0: tensor<128x2048xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<128x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<128x2048xbf16>, tensor<2048x1280xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> return %2 : tensor<128x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir b/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir index 5880178..0cb012c 100644 --- a/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x128xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> { + func.func @main(%arg0: tensor<2048x128xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<128x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x128xbf16>, tensor<2048x1280xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> return %2 : tensor<128x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir b/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir index 4163d4c..32f5e6f 100644 --- a/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir +++ b/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<128x2048xbf16>, %arg1: tensor<1280x2048xbf16>) -> tensor<128x1280xbf16> { + func.func @main(%arg0: tensor<128x2048xbf16>, %arg1: tensor<1280x2048xbf16>) -> tensor<128x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<128x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<128x2048xbf16>, tensor<1280x2048xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> return %2 : tensor<128x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_128_1280_2048_f16.mlir b/gemm/mlir/gemm_128_1280_2048_f16.mlir index 39e0785..84ea04a 100644 --- a/gemm/mlir/gemm_128_1280_2048_f16.mlir +++ b/gemm/mlir/gemm_128_1280_2048_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<128x2048xf16>, %arg1: tensor<2048x1280xf16>) -> tensor<128x1280xf16> { + func.func @main(%arg0: tensor<128x2048xf16>, %arg1: tensor<2048x1280xf16>) -> tensor<128x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<128x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<128x1280xf16>) -> tensor<128x1280xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<128x2048xf16>, tensor<2048x1280xf16>) outs(%1 : tensor<128x1280xf16>) -> tensor<128x1280xf16> return %2 : tensor<128x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir b/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir index f6328f9..45cda80 100644 --- a/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x128xf16>, %arg1: tensor<2048x1280xf16>) -> tensor<128x1280xf16> { + func.func @main(%arg0: tensor<2048x128xf16>, %arg1: tensor<2048x1280xf16>) -> tensor<128x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<128x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<128x1280xf16>) -> tensor<128x1280xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x128xf16>, tensor<2048x1280xf16>) outs(%1 : tensor<128x1280xf16>) -> tensor<128x1280xf16> return %2 : tensor<128x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir b/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir index de333b7..408620b 100644 --- a/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir +++ b/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<128x2048xf16>, %arg1: tensor<1280x2048xf16>) -> tensor<128x1280xf16> { + func.func @main(%arg0: tensor<128x2048xf16>, %arg1: tensor<1280x2048xf16>) -> tensor<128x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<128x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<128x1280xf16>) -> tensor<128x1280xf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<128x2048xf16>, tensor<1280x2048xf16>) outs(%1 : tensor<128x1280xf16>) -> tensor<128x1280xf16> return %2 : tensor<128x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir index 5587c3e..bf06e53 100644 --- a/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<13824x16xbf16> { + func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<13824x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<13824x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x16xbf16>) -> tensor<13824x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<13824x16xbf16>) -> tensor<13824x16xbf16> return %2 : tensor<13824x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir index 6904c26..6820445 100644 --- a/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x16xf16>) -> tensor<13824x16xf16> { + func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x16xf16>) -> tensor<13824x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<13824x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x16xf16>) -> tensor<13824x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x16xf16>) outs(%1 : tensor<13824x16xf16>) -> tensor<13824x16xf16> return %2 : tensor<13824x16xf16> } -} +} diff --git a/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir index 67242a6..bddc513 100644 --- a/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<13824x1xbf16> { + func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<13824x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<13824x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x1xbf16>) -> tensor<13824x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<13824x1xbf16>) -> tensor<13824x1xbf16> return %2 : tensor<13824x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir index 6c8d6f6..de51690 100644 --- a/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x1xf16>) -> tensor<13824x1xf16> { + func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x1xf16>) -> tensor<13824x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<13824x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x1xf16>) -> tensor<13824x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x1xf16>) outs(%1 : tensor<13824x1xf16>) -> tensor<13824x1xf16> return %2 : tensor<13824x1xf16> } -} +} diff --git a/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir index 4887c3c..b73977b 100644 --- a/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<13824x2xbf16> { + func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<13824x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<13824x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x2xbf16>) -> tensor<13824x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<13824x2xbf16>) -> tensor<13824x2xbf16> return %2 : tensor<13824x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir index ca0a0c3..b763847 100644 --- a/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x2xf16>) -> tensor<13824x2xf16> { + func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x2xf16>) -> tensor<13824x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<13824x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x2xf16>) -> tensor<13824x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x2xf16>) outs(%1 : tensor<13824x2xf16>) -> tensor<13824x2xf16> return %2 : tensor<13824x2xf16> } -} +} diff --git a/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir index 0df7805..3be8ecf 100644 --- a/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<13824x32xbf16> { + func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<13824x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<13824x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x32xbf16>) -> tensor<13824x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<13824x32xbf16>) -> tensor<13824x32xbf16> return %2 : tensor<13824x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir index 4b59dd3..2069eef 100644 --- a/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x32xf16>) -> tensor<13824x32xf16> { + func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x32xf16>) -> tensor<13824x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<13824x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x32xf16>) -> tensor<13824x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x32xf16>) outs(%1 : tensor<13824x32xf16>) -> tensor<13824x32xf16> return %2 : tensor<13824x32xf16> } -} +} diff --git a/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir index 552d8df..3ac974f 100644 --- a/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<13824x4xbf16> { + func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<13824x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<13824x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x4xbf16>) -> tensor<13824x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<13824x4xbf16>) -> tensor<13824x4xbf16> return %2 : tensor<13824x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir index 7b8d299..2d2dbaf 100644 --- a/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x4xf16>) -> tensor<13824x4xf16> { + func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x4xf16>) -> tensor<13824x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<13824x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x4xf16>) -> tensor<13824x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x4xf16>) outs(%1 : tensor<13824x4xf16>) -> tensor<13824x4xf16> return %2 : tensor<13824x4xf16> } -} +} diff --git a/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir index 4c0d745..30c7d55 100644 --- a/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<13824x8xbf16> { + func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<13824x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<13824x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x8xbf16>) -> tensor<13824x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<13824x8xbf16>) -> tensor<13824x8xbf16> return %2 : tensor<13824x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir index 2109f18..96d5e3c 100644 --- a/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x8xf16>) -> tensor<13824x8xf16> { + func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x8xf16>) -> tensor<13824x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<13824x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x8xf16>) -> tensor<13824x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x8xf16>) outs(%1 : tensor<13824x8xf16>) -> tensor<13824x8xf16> return %2 : tensor<13824x8xf16> } -} +} diff --git a/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir index 9fdd075..ebb53bd 100644 --- a/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<14336x16xbf16> { + func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<14336x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<14336x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x16xbf16>) -> tensor<14336x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<14336x16xbf16>) -> tensor<14336x16xbf16> return %2 : tensor<14336x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir index a5627cb..1c62bae 100644 --- a/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x16xf16>) -> tensor<14336x16xf16> { + func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x16xf16>) -> tensor<14336x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<14336x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x16xf16>) -> tensor<14336x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x16xf16>) outs(%1 : tensor<14336x16xf16>) -> tensor<14336x16xf16> return %2 : tensor<14336x16xf16> } -} +} diff --git a/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir index c4eddd3..12e1750 100644 --- a/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<14336x1xbf16> { + func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<14336x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<14336x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x1xbf16>) -> tensor<14336x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<14336x1xbf16>) -> tensor<14336x1xbf16> return %2 : tensor<14336x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir index f6623f1..b3cee07 100644 --- a/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x1xf16>) -> tensor<14336x1xf16> { + func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x1xf16>) -> tensor<14336x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<14336x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x1xf16>) -> tensor<14336x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x1xf16>) outs(%1 : tensor<14336x1xf16>) -> tensor<14336x1xf16> return %2 : tensor<14336x1xf16> } -} +} diff --git a/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir index 31ff061..ce3f701 100644 --- a/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<14336x2xbf16> { + func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<14336x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<14336x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x2xbf16>) -> tensor<14336x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<14336x2xbf16>) -> tensor<14336x2xbf16> return %2 : tensor<14336x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir index 19b60e7..100d62f 100644 --- a/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x2xf16>) -> tensor<14336x2xf16> { + func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x2xf16>) -> tensor<14336x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<14336x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x2xf16>) -> tensor<14336x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x2xf16>) outs(%1 : tensor<14336x2xf16>) -> tensor<14336x2xf16> return %2 : tensor<14336x2xf16> } -} +} diff --git a/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir index 5233c40..39a012e 100644 --- a/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<14336x32xbf16> { + func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<14336x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<14336x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x32xbf16>) -> tensor<14336x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<14336x32xbf16>) -> tensor<14336x32xbf16> return %2 : tensor<14336x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir index 79dc048..6457a07 100644 --- a/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x32xf16>) -> tensor<14336x32xf16> { + func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x32xf16>) -> tensor<14336x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<14336x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x32xf16>) -> tensor<14336x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x32xf16>) outs(%1 : tensor<14336x32xf16>) -> tensor<14336x32xf16> return %2 : tensor<14336x32xf16> } -} +} diff --git a/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir index da10c2b..99bcffb 100644 --- a/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<14336x4xbf16> { + func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<14336x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<14336x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x4xbf16>) -> tensor<14336x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<14336x4xbf16>) -> tensor<14336x4xbf16> return %2 : tensor<14336x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir index f1b0d2e..6c93d68 100644 --- a/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x4xf16>) -> tensor<14336x4xf16> { + func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x4xf16>) -> tensor<14336x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<14336x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x4xf16>) -> tensor<14336x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x4xf16>) outs(%1 : tensor<14336x4xf16>) -> tensor<14336x4xf16> return %2 : tensor<14336x4xf16> } -} +} diff --git a/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir index b2ee9d2..22146cb 100644 --- a/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<14336x8xbf16> { + func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<14336x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<14336x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x8xbf16>) -> tensor<14336x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<14336x8xbf16>) -> tensor<14336x8xbf16> return %2 : tensor<14336x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir index 8bcf588..452edf9 100644 --- a/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x8xf16>) -> tensor<14336x8xf16> { + func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x8xf16>) -> tensor<14336x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<14336x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x8xf16>) -> tensor<14336x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x8xf16>) outs(%1 : tensor<14336x8xf16>) -> tensor<14336x8xf16> return %2 : tensor<14336x8xf16> } -} +} diff --git a/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir index 60c13e0..da57d0c 100644 --- a/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<15360x16xbf16> { + func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<15360x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<15360x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x16xbf16>) -> tensor<15360x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<15360x16xbf16>) -> tensor<15360x16xbf16> return %2 : tensor<15360x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir index 8748c90..b15d265 100644 --- a/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x16xf16>) -> tensor<15360x16xf16> { + func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x16xf16>) -> tensor<15360x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<15360x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x16xf16>) -> tensor<15360x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x16xf16>) outs(%1 : tensor<15360x16xf16>) -> tensor<15360x16xf16> return %2 : tensor<15360x16xf16> } -} +} diff --git a/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir index 0b1567d..b0d9c92 100644 --- a/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<15360x1xbf16> { + func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<15360x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<15360x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x1xbf16>) -> tensor<15360x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<15360x1xbf16>) -> tensor<15360x1xbf16> return %2 : tensor<15360x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir index e985d8a..d458ee9 100644 --- a/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x1xf16>) -> tensor<15360x1xf16> { + func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x1xf16>) -> tensor<15360x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<15360x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x1xf16>) -> tensor<15360x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x1xf16>) outs(%1 : tensor<15360x1xf16>) -> tensor<15360x1xf16> return %2 : tensor<15360x1xf16> } -} +} diff --git a/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir index 5f3266e..032eae5 100644 --- a/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<15360x2xbf16> { + func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<15360x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<15360x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x2xbf16>) -> tensor<15360x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<15360x2xbf16>) -> tensor<15360x2xbf16> return %2 : tensor<15360x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir index d4dbe8b..18a0d50 100644 --- a/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x2xf16>) -> tensor<15360x2xf16> { + func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x2xf16>) -> tensor<15360x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<15360x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x2xf16>) -> tensor<15360x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x2xf16>) outs(%1 : tensor<15360x2xf16>) -> tensor<15360x2xf16> return %2 : tensor<15360x2xf16> } -} +} diff --git a/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir index a3bd858..8f7fa25 100644 --- a/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<15360x32xbf16> { + func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<15360x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<15360x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x32xbf16>) -> tensor<15360x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<15360x32xbf16>) -> tensor<15360x32xbf16> return %2 : tensor<15360x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir index 1b7f012..fc86593 100644 --- a/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x32xf16>) -> tensor<15360x32xf16> { + func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x32xf16>) -> tensor<15360x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<15360x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x32xf16>) -> tensor<15360x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x32xf16>) outs(%1 : tensor<15360x32xf16>) -> tensor<15360x32xf16> return %2 : tensor<15360x32xf16> } -} +} diff --git a/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir index f95cedd..f388bfc 100644 --- a/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<15360x4xbf16> { + func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<15360x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<15360x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x4xbf16>) -> tensor<15360x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<15360x4xbf16>) -> tensor<15360x4xbf16> return %2 : tensor<15360x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir index 5ce3aa7..c8666aa 100644 --- a/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x4xf16>) -> tensor<15360x4xf16> { + func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x4xf16>) -> tensor<15360x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<15360x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x4xf16>) -> tensor<15360x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x4xf16>) outs(%1 : tensor<15360x4xf16>) -> tensor<15360x4xf16> return %2 : tensor<15360x4xf16> } -} +} diff --git a/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir index bfe420d..813f5a1 100644 --- a/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<15360x8xbf16> { + func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<15360x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<15360x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x8xbf16>) -> tensor<15360x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<15360x8xbf16>) -> tensor<15360x8xbf16> return %2 : tensor<15360x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir index 7f0dc72..5df7526 100644 --- a/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x8xf16>) -> tensor<15360x8xf16> { + func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x8xf16>) -> tensor<15360x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<15360x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x8xf16>) -> tensor<15360x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x8xf16>) outs(%1 : tensor<15360x8xf16>) -> tensor<15360x8xf16> return %2 : tensor<15360x8xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir index 9a886db..50136f8 100644 --- a/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<16000x16xbf16> { + func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<16000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> return %2 : tensor<16000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir index b013989..e0ebb71 100644 --- a/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<16000x16xf16> { + func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<16000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x16xf16>) -> tensor<16000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<16000x16xf16>) -> tensor<16000x16xf16> return %2 : tensor<16000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir index 10ddd1d..95ae5e6 100644 --- a/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<16000x16xbf16> { + func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<16000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> return %2 : tensor<16000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir index 35b7b27..c1107cc 100644 --- a/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<16000x16xf16> { + func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<16000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x16xf16>) -> tensor<16000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<16000x16xf16>) -> tensor<16000x16xf16> return %2 : tensor<16000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir index 9c53fc4..d0fc2f2 100644 --- a/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<16000x1xbf16> { + func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<16000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> return %2 : tensor<16000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir index 70c5c26..7182791 100644 --- a/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<16000x1xf16> { + func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<16000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x1xf16>) -> tensor<16000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<16000x1xf16>) -> tensor<16000x1xf16> return %2 : tensor<16000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir index 8930976..8258663 100644 --- a/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<16000x1xbf16> { + func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<16000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> return %2 : tensor<16000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir index e91b93e..8186ad5 100644 --- a/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<16000x1xf16> { + func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<16000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x1xf16>) -> tensor<16000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<16000x1xf16>) -> tensor<16000x1xf16> return %2 : tensor<16000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir index 57d5461..11c07f2 100644 --- a/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<16000x2xbf16> { + func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<16000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> return %2 : tensor<16000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir index 39d6ed2..3efeb6a 100644 --- a/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<16000x2xf16> { + func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<16000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x2xf16>) -> tensor<16000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<16000x2xf16>) -> tensor<16000x2xf16> return %2 : tensor<16000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir index ca32231..28e4d63 100644 --- a/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<16000x2xbf16> { + func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<16000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> return %2 : tensor<16000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir index c2a7e2c..8c125de 100644 --- a/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<16000x2xf16> { + func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<16000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x2xf16>) -> tensor<16000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<16000x2xf16>) -> tensor<16000x2xf16> return %2 : tensor<16000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir index c631bc7..a47ce25 100644 --- a/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<16000x32xbf16> { + func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<16000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> return %2 : tensor<16000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir index 983b2bf..5ea27d7 100644 --- a/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<16000x32xf16> { + func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<16000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x32xf16>) -> tensor<16000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<16000x32xf16>) -> tensor<16000x32xf16> return %2 : tensor<16000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir index 04085df..72308e0 100644 --- a/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<16000x32xbf16> { + func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<16000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> return %2 : tensor<16000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir index 274f470..e5f6d3b 100644 --- a/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<16000x32xf16> { + func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<16000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x32xf16>) -> tensor<16000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<16000x32xf16>) -> tensor<16000x32xf16> return %2 : tensor<16000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir index 077e87c..a514a47 100644 --- a/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<16000x4xbf16> { + func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<16000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> return %2 : tensor<16000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir index e30e392..1b73c07 100644 --- a/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<16000x4xf16> { + func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<16000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x4xf16>) -> tensor<16000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<16000x4xf16>) -> tensor<16000x4xf16> return %2 : tensor<16000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir index b345a5f..1de70e2 100644 --- a/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<16000x4xbf16> { + func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<16000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> return %2 : tensor<16000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir index 8d4d4e7..a035de1 100644 --- a/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<16000x4xf16> { + func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<16000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x4xf16>) -> tensor<16000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<16000x4xf16>) -> tensor<16000x4xf16> return %2 : tensor<16000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir index 43a46c9..23c98e5 100644 --- a/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<16000x8xbf16> { + func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<16000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> return %2 : tensor<16000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir index 1eab178..25ea2f2 100644 --- a/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<16000x8xf16> { + func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<16000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x8xf16>) -> tensor<16000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<16000x8xf16>) -> tensor<16000x8xf16> return %2 : tensor<16000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir index 463e418..8b5ce5a 100644 --- a/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<16000x8xbf16> { + func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<16000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<16000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> return %2 : tensor<16000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir index 2ecfebf..b53f1c0 100644 --- a/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<16000x8xf16> { + func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<16000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<16000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x8xf16>) -> tensor<16000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<16000x8xf16>) -> tensor<16000x8xf16> return %2 : tensor<16000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir index 0ad1566..0498cb4 100644 --- a/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<1920x16xbf16> { + func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<1920x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1920x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x16xbf16>) -> tensor<1920x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<1920x16xbf16>) -> tensor<1920x16xbf16> return %2 : tensor<1920x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir index 253e857..7a26a60 100644 --- a/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x16xf16>) -> tensor<1920x16xf16> { + func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x16xf16>) -> tensor<1920x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1920x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x16xf16>) -> tensor<1920x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x16xf16>) outs(%1 : tensor<1920x16xf16>) -> tensor<1920x16xf16> return %2 : tensor<1920x16xf16> } -} +} diff --git a/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir index c96d9a7..69a8142 100644 --- a/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<1920x1xbf16> { + func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<1920x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1920x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x1xbf16>) -> tensor<1920x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<1920x1xbf16>) -> tensor<1920x1xbf16> return %2 : tensor<1920x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir index 0444dd8..7f56072 100644 --- a/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x1xf16>) -> tensor<1920x1xf16> { + func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x1xf16>) -> tensor<1920x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1920x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x1xf16>) -> tensor<1920x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x1xf16>) outs(%1 : tensor<1920x1xf16>) -> tensor<1920x1xf16> return %2 : tensor<1920x1xf16> } -} +} diff --git a/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir index 508a6e5..8241b87 100644 --- a/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<1920x2xbf16> { + func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<1920x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1920x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x2xbf16>) -> tensor<1920x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<1920x2xbf16>) -> tensor<1920x2xbf16> return %2 : tensor<1920x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir index cc7ec7c..8410b70 100644 --- a/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x2xf16>) -> tensor<1920x2xf16> { + func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x2xf16>) -> tensor<1920x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1920x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x2xf16>) -> tensor<1920x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x2xf16>) outs(%1 : tensor<1920x2xf16>) -> tensor<1920x2xf16> return %2 : tensor<1920x2xf16> } -} +} diff --git a/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir index 0a74e00..fb33ba0 100644 --- a/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<1920x32xbf16> { + func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<1920x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1920x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x32xbf16>) -> tensor<1920x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<1920x32xbf16>) -> tensor<1920x32xbf16> return %2 : tensor<1920x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir index f12339f..17e9ebc 100644 --- a/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x32xf16>) -> tensor<1920x32xf16> { + func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x32xf16>) -> tensor<1920x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1920x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x32xf16>) -> tensor<1920x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x32xf16>) outs(%1 : tensor<1920x32xf16>) -> tensor<1920x32xf16> return %2 : tensor<1920x32xf16> } -} +} diff --git a/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir index 8d9a5f0..5f1c806 100644 --- a/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<1920x4xbf16> { + func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<1920x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1920x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x4xbf16>) -> tensor<1920x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<1920x4xbf16>) -> tensor<1920x4xbf16> return %2 : tensor<1920x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir index abb25bf..cd45416 100644 --- a/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x4xf16>) -> tensor<1920x4xf16> { + func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x4xf16>) -> tensor<1920x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1920x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x4xf16>) -> tensor<1920x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x4xf16>) outs(%1 : tensor<1920x4xf16>) -> tensor<1920x4xf16> return %2 : tensor<1920x4xf16> } -} +} diff --git a/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir index ee1a352..bb5ee3c 100644 --- a/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<1920x8xbf16> { + func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<1920x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<1920x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x8xbf16>) -> tensor<1920x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<1920x8xbf16>) -> tensor<1920x8xbf16> return %2 : tensor<1920x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir index 2c1faa3..7f94a48 100644 --- a/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x8xf16>) -> tensor<1920x8xf16> { + func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x8xf16>) -> tensor<1920x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<1920x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x8xf16>) -> tensor<1920x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x8xf16>) outs(%1 : tensor<1920x8xf16>) -> tensor<1920x8xf16> return %2 : tensor<1920x8xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_10240_1280_bf16.mlir b/gemm/mlir/gemm_2048_10240_1280_bf16.mlir index 74b20a4..d65d3a7 100644 --- a/gemm/mlir/gemm_2048_10240_1280_bf16.mlir +++ b/gemm/mlir/gemm_2048_10240_1280_bf16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x10240xbf16>) -> tensor<2048x10240xbf16> { + func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x10240xbf16>) -> tensor<2048x10240xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x10240xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<1280x10240xbf16>) outs(%1 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> return %2 : tensor<2048x10240xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir b/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir index 5e8be84..84241c7 100644 --- a/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x2048xbf16>, %arg1: tensor<1280x10240xbf16>) -> tensor<2048x10240xbf16> { + func.func @main(%arg0: tensor<1280x2048xbf16>, %arg1: tensor<1280x10240xbf16>) -> tensor<2048x10240xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x10240xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xbf16>, tensor<1280x10240xbf16>) outs(%1 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> return %2 : tensor<2048x10240xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir b/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir index e103cc1..28e61ff 100644 --- a/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir +++ b/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<10240x1280xbf16>) -> tensor<2048x10240xbf16> { + func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<10240x1280xbf16>) -> tensor<2048x10240xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x10240xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<10240x1280xbf16>) outs(%1 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> return %2 : tensor<2048x10240xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_10240_1280_f16.mlir b/gemm/mlir/gemm_2048_10240_1280_f16.mlir index 76ff7bb..e3bbec5 100644 --- a/gemm/mlir/gemm_2048_10240_1280_f16.mlir +++ b/gemm/mlir/gemm_2048_10240_1280_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x10240xf16>) -> tensor<2048x10240xf16> { + func.func @main(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x10240xf16>) -> tensor<2048x10240xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x10240xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xf16>, tensor<1280x10240xf16>) outs(%1 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> return %2 : tensor<2048x10240xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir b/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir index cea341b..6a0033c 100644 --- a/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x2048xf16>, %arg1: tensor<1280x10240xf16>) -> tensor<2048x10240xf16> { + func.func @main(%arg0: tensor<1280x2048xf16>, %arg1: tensor<1280x10240xf16>) -> tensor<2048x10240xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x10240xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xf16>, tensor<1280x10240xf16>) outs(%1 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> return %2 : tensor<2048x10240xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir b/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir index 69f813b..065e02f 100644 --- a/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir +++ b/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xf16>, %arg1: tensor<10240x1280xf16>) -> tensor<2048x10240xf16> { + func.func @main(%arg0: tensor<2048x1280xf16>, %arg1: tensor<10240x1280xf16>) -> tensor<2048x10240xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x10240xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x1280xf16>, tensor<10240x1280xf16>) outs(%1 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> return %2 : tensor<2048x10240xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_1280_bf16.mlir b/gemm/mlir/gemm_2048_1280_1280_bf16.mlir index c5b2018..91c8ae2 100644 --- a/gemm/mlir/gemm_2048_1280_1280_bf16.mlir +++ b/gemm/mlir/gemm_2048_1280_1280_bf16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { + func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<1280x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> return %2 : tensor<2048x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir b/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir index 9c68853..a155776 100644 --- a/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x2048xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { + func.func @main(%arg0: tensor<1280x2048xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xbf16>, tensor<1280x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> return %2 : tensor<2048x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir b/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir index 6879238..2087786 100644 --- a/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir +++ b/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { + func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<1280x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> return %2 : tensor<2048x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_1280_f16.mlir b/gemm/mlir/gemm_2048_1280_1280_f16.mlir index c505e3f..ceb58a2 100644 --- a/gemm/mlir/gemm_2048_1280_1280_f16.mlir +++ b/gemm/mlir/gemm_2048_1280_1280_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { + func.func @main(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xf16>, tensor<1280x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> return %2 : tensor<2048x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir b/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir index df9a150..13b6466 100644 --- a/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x2048xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { + func.func @main(%arg0: tensor<1280x2048xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xf16>, tensor<1280x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> return %2 : tensor<2048x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir b/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir index 436c879..92384aa 100644 --- a/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir +++ b/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { + func.func @main(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x1280xf16>, tensor<1280x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> return %2 : tensor<2048x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_5120_bf16.mlir b/gemm/mlir/gemm_2048_1280_5120_bf16.mlir index 97e7e4a..6739dcc 100644 --- a/gemm/mlir/gemm_2048_1280_5120_bf16.mlir +++ b/gemm/mlir/gemm_2048_1280_5120_bf16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x5120xbf16>, %arg1: tensor<5120x1280xbf16>) -> tensor<2048x1280xbf16> { + func.func @main(%arg0: tensor<2048x5120xbf16>, %arg1: tensor<5120x1280xbf16>) -> tensor<2048x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x5120xbf16>, tensor<5120x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> return %2 : tensor<2048x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir b/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir index bb19ae7..d99f327 100644 --- a/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x2048xbf16>, %arg1: tensor<5120x1280xbf16>) -> tensor<2048x1280xbf16> { + func.func @main(%arg0: tensor<5120x2048xbf16>, %arg1: tensor<5120x1280xbf16>) -> tensor<2048x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x2048xbf16>, tensor<5120x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> return %2 : tensor<2048x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir b/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir index bd9d8c7..ef0bd8e 100644 --- a/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir +++ b/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x5120xbf16>, %arg1: tensor<1280x5120xbf16>) -> tensor<2048x1280xbf16> { + func.func @main(%arg0: tensor<2048x5120xbf16>, %arg1: tensor<1280x5120xbf16>) -> tensor<2048x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2048x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x5120xbf16>, tensor<1280x5120xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> return %2 : tensor<2048x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_5120_f16.mlir b/gemm/mlir/gemm_2048_1280_5120_f16.mlir index bd29efa..c75885c 100644 --- a/gemm/mlir/gemm_2048_1280_5120_f16.mlir +++ b/gemm/mlir/gemm_2048_1280_5120_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x5120xf16>, %arg1: tensor<5120x1280xf16>) -> tensor<2048x1280xf16> { + func.func @main(%arg0: tensor<2048x5120xf16>, %arg1: tensor<5120x1280xf16>) -> tensor<2048x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x5120xf16>, tensor<5120x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> return %2 : tensor<2048x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir b/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir index 4b63108..3d6fa99 100644 --- a/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x2048xf16>, %arg1: tensor<5120x1280xf16>) -> tensor<2048x1280xf16> { + func.func @main(%arg0: tensor<5120x2048xf16>, %arg1: tensor<5120x1280xf16>) -> tensor<2048x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x2048xf16>, tensor<5120x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> return %2 : tensor<2048x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir b/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir index aa4b90e..9ccaabb 100644 --- a/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir +++ b/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x5120xf16>, %arg1: tensor<1280x5120xf16>) -> tensor<2048x1280xf16> { + func.func @main(%arg0: tensor<2048x5120xf16>, %arg1: tensor<1280x5120xf16>) -> tensor<2048x1280xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x1280xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x5120xf16>, tensor<1280x5120xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> return %2 : tensor<2048x1280xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_2048_1024_f16.mlir b/gemm/mlir/gemm_2048_2048_1024_f16.mlir index a88bfce..cc77455 100644 --- a/gemm/mlir/gemm_2048_2048_1024_f16.mlir +++ b/gemm/mlir/gemm_2048_2048_1024_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x1024xf16>, %arg1: tensor<1024x2048xf16>) -> tensor<2048x2048xf16> { + func.func @main(%arg0: tensor<2048x1024xf16>, %arg1: tensor<1024x2048xf16>) -> tensor<2048x2048xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x2048xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1024xf16>, tensor<1024x2048xf16>) outs(%1 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> return %2 : tensor<2048x2048xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_2048_65536_f16.mlir b/gemm/mlir/gemm_2048_2048_65536_f16.mlir index dec419c..34b9849 100644 --- a/gemm/mlir/gemm_2048_2048_65536_f16.mlir +++ b/gemm/mlir/gemm_2048_2048_65536_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x65536xf16>, %arg1: tensor<65536x2048xf16>) -> tensor<2048x2048xf16> { + func.func @main(%arg0: tensor<2048x65536xf16>, %arg1: tensor<65536x2048xf16>) -> tensor<2048x2048xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x2048xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x65536xf16>, tensor<65536x2048xf16>) outs(%1 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> return %2 : tensor<2048x2048xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_2048_8192_f16.mlir b/gemm/mlir/gemm_2048_2048_8192_f16.mlir index ca66186..e9f3dd8 100644 --- a/gemm/mlir/gemm_2048_2048_8192_f16.mlir +++ b/gemm/mlir/gemm_2048_2048_8192_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<8192x2048xf16>) -> tensor<2048x2048xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<8192x2048xf16>) -> tensor<2048x2048xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x2048xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<8192x2048xf16>) outs(%1 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> return %2 : tensor<2048x2048xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_8192_1024_f16.mlir b/gemm/mlir/gemm_2048_8192_1024_f16.mlir index 71c0ec6..edfa213 100644 --- a/gemm/mlir/gemm_2048_8192_1024_f16.mlir +++ b/gemm/mlir/gemm_2048_8192_1024_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x1024xf16>, %arg1: tensor<1024x8192xf16>) -> tensor<2048x8192xf16> { + func.func @main(%arg0: tensor<2048x1024xf16>, %arg1: tensor<1024x8192xf16>) -> tensor<2048x8192xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x8192xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1024xf16>, tensor<1024x8192xf16>) outs(%1 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> return %2 : tensor<2048x8192xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_8192_65536_f16.mlir b/gemm/mlir/gemm_2048_8192_65536_f16.mlir index 7f0e2f0..e419b78 100644 --- a/gemm/mlir/gemm_2048_8192_65536_f16.mlir +++ b/gemm/mlir/gemm_2048_8192_65536_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x65536xf16>, %arg1: tensor<65536x8192xf16>) -> tensor<2048x8192xf16> { + func.func @main(%arg0: tensor<2048x65536xf16>, %arg1: tensor<65536x8192xf16>) -> tensor<2048x8192xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x8192xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x65536xf16>, tensor<65536x8192xf16>) outs(%1 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> return %2 : tensor<2048x8192xf16> } -} +} diff --git a/gemm/mlir/gemm_2048_8192_8192_f16.mlir b/gemm/mlir/gemm_2048_8192_8192_f16.mlir index 09a023a..cc93de1 100644 --- a/gemm/mlir/gemm_2048_8192_8192_f16.mlir +++ b/gemm/mlir/gemm_2048_8192_8192_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<8192x8192xf16>) -> tensor<2048x8192xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<8192x8192xf16>) -> tensor<2048x8192xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2048x8192xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<8192x8192xf16>) outs(%1 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> return %2 : tensor<2048x8192xf16> } -} +} diff --git a/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir index f7aace2..1ab9cc0 100644 --- a/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<2560x16xbf16> { + func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<2560x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2560x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x16xbf16>) -> tensor<2560x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<2560x16xbf16>) -> tensor<2560x16xbf16> return %2 : tensor<2560x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir index ec78c38..fd4d377 100644 --- a/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x16xf16>) -> tensor<2560x16xf16> { + func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x16xf16>) -> tensor<2560x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2560x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x16xf16>) -> tensor<2560x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x16xf16>) outs(%1 : tensor<2560x16xf16>) -> tensor<2560x16xf16> return %2 : tensor<2560x16xf16> } -} +} diff --git a/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir index c45082b..bf23aca 100644 --- a/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<2560x1xbf16> { + func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<2560x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2560x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x1xbf16>) -> tensor<2560x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<2560x1xbf16>) -> tensor<2560x1xbf16> return %2 : tensor<2560x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir index 13ef082..e6b86b4 100644 --- a/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x1xf16>) -> tensor<2560x1xf16> { + func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x1xf16>) -> tensor<2560x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2560x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x1xf16>) -> tensor<2560x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x1xf16>) outs(%1 : tensor<2560x1xf16>) -> tensor<2560x1xf16> return %2 : tensor<2560x1xf16> } -} +} diff --git a/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir index 4f3eb6b..de185be 100644 --- a/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<2560x2xbf16> { + func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<2560x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2560x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x2xbf16>) -> tensor<2560x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<2560x2xbf16>) -> tensor<2560x2xbf16> return %2 : tensor<2560x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir index c82ae7a..3e6664e 100644 --- a/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x2xf16>) -> tensor<2560x2xf16> { + func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x2xf16>) -> tensor<2560x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2560x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x2xf16>) -> tensor<2560x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x2xf16>) outs(%1 : tensor<2560x2xf16>) -> tensor<2560x2xf16> return %2 : tensor<2560x2xf16> } -} +} diff --git a/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir index a25b3b1..45d0840 100644 --- a/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<2560x32xbf16> { + func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<2560x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2560x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x32xbf16>) -> tensor<2560x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<2560x32xbf16>) -> tensor<2560x32xbf16> return %2 : tensor<2560x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir index 961e0f4..456b6c6 100644 --- a/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x32xf16>) -> tensor<2560x32xf16> { + func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x32xf16>) -> tensor<2560x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2560x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x32xf16>) -> tensor<2560x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x32xf16>) outs(%1 : tensor<2560x32xf16>) -> tensor<2560x32xf16> return %2 : tensor<2560x32xf16> } -} +} diff --git a/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir index ac70b7d..d377ec1 100644 --- a/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<2560x4xbf16> { + func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<2560x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2560x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x4xbf16>) -> tensor<2560x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<2560x4xbf16>) -> tensor<2560x4xbf16> return %2 : tensor<2560x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir index 0832a98..a152ec3 100644 --- a/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x4xf16>) -> tensor<2560x4xf16> { + func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x4xf16>) -> tensor<2560x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2560x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x4xf16>) -> tensor<2560x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x4xf16>) outs(%1 : tensor<2560x4xf16>) -> tensor<2560x4xf16> return %2 : tensor<2560x4xf16> } -} +} diff --git a/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir index e46598a..76c1250 100644 --- a/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<2560x8xbf16> { + func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<2560x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2560x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x8xbf16>) -> tensor<2560x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<2560x8xbf16>) -> tensor<2560x8xbf16> return %2 : tensor<2560x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir index 5ce7a8f..fff4a68 100644 --- a/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x8xf16>) -> tensor<2560x8xf16> { + func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x8xf16>) -> tensor<2560x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<2560x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x8xf16>) -> tensor<2560x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x8xf16>) outs(%1 : tensor<2560x8xf16>) -> tensor<2560x8xf16> return %2 : tensor<2560x8xf16> } -} +} diff --git a/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir index 58da7c2..e06171a 100644 --- a/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<27648x16xbf16> { + func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<27648x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<27648x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x16xbf16>) -> tensor<27648x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<27648x16xbf16>) -> tensor<27648x16xbf16> return %2 : tensor<27648x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir index e7431ec..9af970d 100644 --- a/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x16xf16>) -> tensor<27648x16xf16> { + func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x16xf16>) -> tensor<27648x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<27648x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x16xf16>) -> tensor<27648x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x16xf16>) outs(%1 : tensor<27648x16xf16>) -> tensor<27648x16xf16> return %2 : tensor<27648x16xf16> } -} +} diff --git a/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir index c2f34a7..dda9b15 100644 --- a/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<27648x1xbf16> { + func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<27648x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<27648x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x1xbf16>) -> tensor<27648x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<27648x1xbf16>) -> tensor<27648x1xbf16> return %2 : tensor<27648x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir index 5e83ea2..f2d5c42 100644 --- a/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x1xf16>) -> tensor<27648x1xf16> { + func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x1xf16>) -> tensor<27648x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<27648x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x1xf16>) -> tensor<27648x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x1xf16>) outs(%1 : tensor<27648x1xf16>) -> tensor<27648x1xf16> return %2 : tensor<27648x1xf16> } -} +} diff --git a/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir index b0dd205..e16cd24 100644 --- a/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<27648x2xbf16> { + func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<27648x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<27648x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x2xbf16>) -> tensor<27648x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<27648x2xbf16>) -> tensor<27648x2xbf16> return %2 : tensor<27648x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir index 82058eb..dcf4508 100644 --- a/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x2xf16>) -> tensor<27648x2xf16> { + func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x2xf16>) -> tensor<27648x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<27648x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x2xf16>) -> tensor<27648x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x2xf16>) outs(%1 : tensor<27648x2xf16>) -> tensor<27648x2xf16> return %2 : tensor<27648x2xf16> } -} +} diff --git a/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir index 32277d3..0a408fd 100644 --- a/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<27648x32xbf16> { + func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<27648x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<27648x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x32xbf16>) -> tensor<27648x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<27648x32xbf16>) -> tensor<27648x32xbf16> return %2 : tensor<27648x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir index 71aacf2..90927a3 100644 --- a/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x32xf16>) -> tensor<27648x32xf16> { + func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x32xf16>) -> tensor<27648x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<27648x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x32xf16>) -> tensor<27648x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x32xf16>) outs(%1 : tensor<27648x32xf16>) -> tensor<27648x32xf16> return %2 : tensor<27648x32xf16> } -} +} diff --git a/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir index b0357df..20f2150 100644 --- a/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<27648x4xbf16> { + func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<27648x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<27648x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x4xbf16>) -> tensor<27648x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<27648x4xbf16>) -> tensor<27648x4xbf16> return %2 : tensor<27648x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir index 1801fc5..af948b5 100644 --- a/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x4xf16>) -> tensor<27648x4xf16> { + func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x4xf16>) -> tensor<27648x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<27648x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x4xf16>) -> tensor<27648x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x4xf16>) outs(%1 : tensor<27648x4xf16>) -> tensor<27648x4xf16> return %2 : tensor<27648x4xf16> } -} +} diff --git a/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir index 0a79fc5..fd43a3e 100644 --- a/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<27648x8xbf16> { + func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<27648x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<27648x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x8xbf16>) -> tensor<27648x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<27648x8xbf16>) -> tensor<27648x8xbf16> return %2 : tensor<27648x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir index fd5f73a..6d0ec2e 100644 --- a/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x8xf16>) -> tensor<27648x8xf16> { + func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x8xf16>) -> tensor<27648x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<27648x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x8xf16>) -> tensor<27648x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x8xf16>) outs(%1 : tensor<27648x8xf16>) -> tensor<27648x8xf16> return %2 : tensor<27648x8xf16> } -} +} diff --git a/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir index 8099b7e..10c20ee 100644 --- a/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<28672x16xbf16> { + func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<28672x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<28672x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x16xbf16>) -> tensor<28672x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<28672x16xbf16>) -> tensor<28672x16xbf16> return %2 : tensor<28672x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir index 83e15ae..f923157 100644 --- a/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x16xf16>) -> tensor<28672x16xf16> { + func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x16xf16>) -> tensor<28672x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<28672x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x16xf16>) -> tensor<28672x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x16xf16>) outs(%1 : tensor<28672x16xf16>) -> tensor<28672x16xf16> return %2 : tensor<28672x16xf16> } -} +} diff --git a/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir index e5bad51..6a24568 100644 --- a/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<28672x1xbf16> { + func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<28672x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<28672x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x1xbf16>) -> tensor<28672x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<28672x1xbf16>) -> tensor<28672x1xbf16> return %2 : tensor<28672x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir index 76b2743..a4bb37c 100644 --- a/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x1xf16>) -> tensor<28672x1xf16> { + func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x1xf16>) -> tensor<28672x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<28672x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x1xf16>) -> tensor<28672x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x1xf16>) outs(%1 : tensor<28672x1xf16>) -> tensor<28672x1xf16> return %2 : tensor<28672x1xf16> } -} +} diff --git a/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir index 280a4e1..24fd156 100644 --- a/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<28672x2xbf16> { + func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<28672x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<28672x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x2xbf16>) -> tensor<28672x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<28672x2xbf16>) -> tensor<28672x2xbf16> return %2 : tensor<28672x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir index 715fb2f..85df0ac 100644 --- a/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x2xf16>) -> tensor<28672x2xf16> { + func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x2xf16>) -> tensor<28672x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<28672x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x2xf16>) -> tensor<28672x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x2xf16>) outs(%1 : tensor<28672x2xf16>) -> tensor<28672x2xf16> return %2 : tensor<28672x2xf16> } -} +} diff --git a/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir index 37bcc48..e920955 100644 --- a/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<28672x32xbf16> { + func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<28672x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<28672x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x32xbf16>) -> tensor<28672x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<28672x32xbf16>) -> tensor<28672x32xbf16> return %2 : tensor<28672x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir index 1537d79..44a1361 100644 --- a/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x32xf16>) -> tensor<28672x32xf16> { + func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x32xf16>) -> tensor<28672x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<28672x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x32xf16>) -> tensor<28672x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x32xf16>) outs(%1 : tensor<28672x32xf16>) -> tensor<28672x32xf16> return %2 : tensor<28672x32xf16> } -} +} diff --git a/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir index 7470e52..7ce0353 100644 --- a/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<28672x4xbf16> { + func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<28672x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<28672x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x4xbf16>) -> tensor<28672x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<28672x4xbf16>) -> tensor<28672x4xbf16> return %2 : tensor<28672x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir index ed276d3..a773111 100644 --- a/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x4xf16>) -> tensor<28672x4xf16> { + func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x4xf16>) -> tensor<28672x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<28672x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x4xf16>) -> tensor<28672x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x4xf16>) outs(%1 : tensor<28672x4xf16>) -> tensor<28672x4xf16> return %2 : tensor<28672x4xf16> } -} +} diff --git a/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir index 2db383b..5a2541f 100644 --- a/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<28672x8xbf16> { + func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<28672x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<28672x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x8xbf16>) -> tensor<28672x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<28672x8xbf16>) -> tensor<28672x8xbf16> return %2 : tensor<28672x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir index 94a2919..9226cfe 100644 --- a/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x8xf16>) -> tensor<28672x8xf16> { + func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x8xf16>) -> tensor<28672x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<28672x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x8xf16>) -> tensor<28672x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x8xf16>) outs(%1 : tensor<28672x8xf16>) -> tensor<28672x8xf16> return %2 : tensor<28672x8xf16> } -} +} diff --git a/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir b/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir index ad41cc8..1040350 100644 --- a/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir +++ b/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2x8192xbf16>, %arg1: tensor<1280x8192xbf16>) -> tensor<2x1280xbf16> { + func.func @main(%arg0: tensor<2x8192xbf16>, %arg1: tensor<1280x8192xbf16>) -> tensor<2x1280xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2x1280xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2x1280xbf16>) -> tensor<2x1280xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2x8192xbf16>, tensor<1280x8192xbf16>) outs(%1 : tensor<2x1280xbf16>) -> tensor<2x1280xbf16> return %2 : tensor<2x1280xbf16> } -} +} diff --git a/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir b/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir index c25a2e1..7f6b6ea 100644 --- a/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir +++ b/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2x8192xbf16>, %arg1: tensor<3584x8192xbf16>) -> tensor<2x3584xbf16> { + func.func @main(%arg0: tensor<2x8192xbf16>, %arg1: tensor<3584x8192xbf16>) -> tensor<2x3584xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2x3584xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2x3584xbf16>) -> tensor<2x3584xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2x8192xbf16>, tensor<3584x8192xbf16>) outs(%1 : tensor<2x3584xbf16>) -> tensor<2x3584xbf16> return %2 : tensor<2x3584xbf16> } -} +} diff --git a/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir b/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir index 25ca618..6ac8002 100644 --- a/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir +++ b/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2x8192xbf16>, %arg1: tensor<7168x8192xbf16>) -> tensor<2x7168xbf16> { + func.func @main(%arg0: tensor<2x8192xbf16>, %arg1: tensor<7168x8192xbf16>) -> tensor<2x7168xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<2x7168xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2x7168xbf16>) -> tensor<2x7168xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2x8192xbf16>, tensor<7168x8192xbf16>) outs(%1 : tensor<2x7168xbf16>) -> tensor<2x7168xbf16> return %2 : tensor<2x7168xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir index 6b11ff1..986fbe3 100644 --- a/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<32000x16xbf16> { + func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<32000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> return %2 : tensor<32000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir index b6d25de..bb83872 100644 --- a/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<32000x16xf16> { + func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<32000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x16xf16>) -> tensor<32000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<32000x16xf16>) -> tensor<32000x16xf16> return %2 : tensor<32000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir index d2ad418..af63a99 100644 --- a/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<32000x16xbf16> { + func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<32000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> return %2 : tensor<32000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir index 5802b26..9881c6e 100644 --- a/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<32000x16xf16> { + func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<32000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x16xf16>) -> tensor<32000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<32000x16xf16>) -> tensor<32000x16xf16> return %2 : tensor<32000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir index 75a9deb..4d33257 100644 --- a/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<32000x1xbf16> { + func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<32000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> return %2 : tensor<32000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir index 8805125..9849f9c 100644 --- a/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<32000x1xf16> { + func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<32000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x1xf16>) -> tensor<32000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<32000x1xf16>) -> tensor<32000x1xf16> return %2 : tensor<32000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir index dd5abe0..cdf30e8 100644 --- a/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<32000x1xbf16> { + func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<32000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> return %2 : tensor<32000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir index 1d25619..fb063c9 100644 --- a/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<32000x1xf16> { + func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<32000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x1xf16>) -> tensor<32000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<32000x1xf16>) -> tensor<32000x1xf16> return %2 : tensor<32000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir index 0cbecc4..ffcff1f 100644 --- a/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<32000x2xbf16> { + func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<32000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> return %2 : tensor<32000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir index b7112b7..74b1e6a 100644 --- a/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<32000x2xf16> { + func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<32000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x2xf16>) -> tensor<32000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<32000x2xf16>) -> tensor<32000x2xf16> return %2 : tensor<32000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir index f614264..5c6b46d 100644 --- a/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<32000x2xbf16> { + func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<32000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> return %2 : tensor<32000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir index f6d736a..5623d69 100644 --- a/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<32000x2xf16> { + func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<32000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x2xf16>) -> tensor<32000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<32000x2xf16>) -> tensor<32000x2xf16> return %2 : tensor<32000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir index 8dce8b5..6585842 100644 --- a/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<32000x32xbf16> { + func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<32000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> return %2 : tensor<32000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir index 0447404..dfc38c7 100644 --- a/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<32000x32xf16> { + func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<32000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x32xf16>) -> tensor<32000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<32000x32xf16>) -> tensor<32000x32xf16> return %2 : tensor<32000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir index e3247ed..efaefd2 100644 --- a/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<32000x32xbf16> { + func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<32000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> return %2 : tensor<32000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir index 50202db..d82b086 100644 --- a/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<32000x32xf16> { + func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<32000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x32xf16>) -> tensor<32000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<32000x32xf16>) -> tensor<32000x32xf16> return %2 : tensor<32000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir index 792f2bf..f52612c 100644 --- a/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<32000x4xbf16> { + func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<32000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> return %2 : tensor<32000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir index 4a22243..43e179b 100644 --- a/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<32000x4xf16> { + func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<32000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x4xf16>) -> tensor<32000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<32000x4xf16>) -> tensor<32000x4xf16> return %2 : tensor<32000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir index fe9b7b4..e3a7fcc 100644 --- a/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<32000x4xbf16> { + func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<32000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> return %2 : tensor<32000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir index 881ece7..c430b43 100644 --- a/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<32000x4xf16> { + func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<32000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x4xf16>) -> tensor<32000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<32000x4xf16>) -> tensor<32000x4xf16> return %2 : tensor<32000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir index 7a8eda9..c3082b6 100644 --- a/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<32000x8xbf16> { + func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<32000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> return %2 : tensor<32000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir index 5258a32..84959d3 100644 --- a/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<32000x8xf16> { + func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<32000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x8xf16>) -> tensor<32000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<32000x8xf16>) -> tensor<32000x8xf16> return %2 : tensor<32000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir index b23902c..7cbee49 100644 --- a/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<32000x8xbf16> { + func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<32000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<32000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> return %2 : tensor<32000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir index 675af52..67d245e 100644 --- a/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<32000x8xf16> { + func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<32000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<32000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x8xf16>) -> tensor<32000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<32000x8xf16>) -> tensor<32000x8xf16> return %2 : tensor<32000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir index d15ae4e..ab4fa46 100644 --- a/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<3456x16xbf16> { + func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<3456x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3456x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x16xbf16>) -> tensor<3456x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<3456x16xbf16>) -> tensor<3456x16xbf16> return %2 : tensor<3456x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir index 196b277..0c15001 100644 --- a/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x16xf16>) -> tensor<3456x16xf16> { + func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x16xf16>) -> tensor<3456x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3456x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x16xf16>) -> tensor<3456x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x16xf16>) outs(%1 : tensor<3456x16xf16>) -> tensor<3456x16xf16> return %2 : tensor<3456x16xf16> } -} +} diff --git a/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir index 1f33608..754923e 100644 --- a/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<3456x1xbf16> { + func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<3456x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3456x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x1xbf16>) -> tensor<3456x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<3456x1xbf16>) -> tensor<3456x1xbf16> return %2 : tensor<3456x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir index e80c247..a179e69 100644 --- a/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x1xf16>) -> tensor<3456x1xf16> { + func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x1xf16>) -> tensor<3456x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3456x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x1xf16>) -> tensor<3456x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x1xf16>) outs(%1 : tensor<3456x1xf16>) -> tensor<3456x1xf16> return %2 : tensor<3456x1xf16> } -} +} diff --git a/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir index bcc53a2..68afe12 100644 --- a/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<3456x2xbf16> { + func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<3456x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3456x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x2xbf16>) -> tensor<3456x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<3456x2xbf16>) -> tensor<3456x2xbf16> return %2 : tensor<3456x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir index 6ce2677..c0fe5f9 100644 --- a/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x2xf16>) -> tensor<3456x2xf16> { + func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x2xf16>) -> tensor<3456x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3456x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x2xf16>) -> tensor<3456x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x2xf16>) outs(%1 : tensor<3456x2xf16>) -> tensor<3456x2xf16> return %2 : tensor<3456x2xf16> } -} +} diff --git a/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir index f203e2d..9b8159a 100644 --- a/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<3456x32xbf16> { + func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<3456x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3456x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x32xbf16>) -> tensor<3456x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<3456x32xbf16>) -> tensor<3456x32xbf16> return %2 : tensor<3456x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir index ee6f1aa..fe43487 100644 --- a/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x32xf16>) -> tensor<3456x32xf16> { + func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x32xf16>) -> tensor<3456x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3456x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x32xf16>) -> tensor<3456x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x32xf16>) outs(%1 : tensor<3456x32xf16>) -> tensor<3456x32xf16> return %2 : tensor<3456x32xf16> } -} +} diff --git a/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir index 540c119..d6bbdaa 100644 --- a/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<3456x4xbf16> { + func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<3456x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3456x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x4xbf16>) -> tensor<3456x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<3456x4xbf16>) -> tensor<3456x4xbf16> return %2 : tensor<3456x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir index 46490bf..d1ba93e 100644 --- a/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x4xf16>) -> tensor<3456x4xf16> { + func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x4xf16>) -> tensor<3456x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3456x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x4xf16>) -> tensor<3456x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x4xf16>) outs(%1 : tensor<3456x4xf16>) -> tensor<3456x4xf16> return %2 : tensor<3456x4xf16> } -} +} diff --git a/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir index f616913..b7b3a1e 100644 --- a/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<3456x8xbf16> { + func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<3456x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3456x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x8xbf16>) -> tensor<3456x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<3456x8xbf16>) -> tensor<3456x8xbf16> return %2 : tensor<3456x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir index e661d74..60f9e0c 100644 --- a/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x8xf16>) -> tensor<3456x8xf16> { + func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x8xf16>) -> tensor<3456x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3456x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x8xf16>) -> tensor<3456x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x8xf16>) outs(%1 : tensor<3456x8xf16>) -> tensor<3456x8xf16> return %2 : tensor<3456x8xf16> } -} +} diff --git a/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir index c970daf..63c122d 100644 --- a/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<3840x16xbf16> { + func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<3840x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3840x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x16xbf16>) -> tensor<3840x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<3840x16xbf16>) -> tensor<3840x16xbf16> return %2 : tensor<3840x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir index 3af2ad7..5ed7814 100644 --- a/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x16xf16>) -> tensor<3840x16xf16> { + func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x16xf16>) -> tensor<3840x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3840x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x16xf16>) -> tensor<3840x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x16xf16>) outs(%1 : tensor<3840x16xf16>) -> tensor<3840x16xf16> return %2 : tensor<3840x16xf16> } -} +} diff --git a/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir index bd9295a..30fce43 100644 --- a/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<3840x1xbf16> { + func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<3840x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3840x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x1xbf16>) -> tensor<3840x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<3840x1xbf16>) -> tensor<3840x1xbf16> return %2 : tensor<3840x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir index 2e5ad52..c83b20c 100644 --- a/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x1xf16>) -> tensor<3840x1xf16> { + func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x1xf16>) -> tensor<3840x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3840x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x1xf16>) -> tensor<3840x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x1xf16>) outs(%1 : tensor<3840x1xf16>) -> tensor<3840x1xf16> return %2 : tensor<3840x1xf16> } -} +} diff --git a/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir index e851a6b..fde61e4 100644 --- a/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<3840x2xbf16> { + func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<3840x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3840x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x2xbf16>) -> tensor<3840x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<3840x2xbf16>) -> tensor<3840x2xbf16> return %2 : tensor<3840x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir index 2e1f931..3526c21 100644 --- a/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x2xf16>) -> tensor<3840x2xf16> { + func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x2xf16>) -> tensor<3840x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3840x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x2xf16>) -> tensor<3840x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x2xf16>) outs(%1 : tensor<3840x2xf16>) -> tensor<3840x2xf16> return %2 : tensor<3840x2xf16> } -} +} diff --git a/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir index 75c89de..aae821a 100644 --- a/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<3840x32xbf16> { + func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<3840x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3840x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x32xbf16>) -> tensor<3840x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<3840x32xbf16>) -> tensor<3840x32xbf16> return %2 : tensor<3840x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir index cba4e56..1491630 100644 --- a/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x32xf16>) -> tensor<3840x32xf16> { + func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x32xf16>) -> tensor<3840x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3840x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x32xf16>) -> tensor<3840x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x32xf16>) outs(%1 : tensor<3840x32xf16>) -> tensor<3840x32xf16> return %2 : tensor<3840x32xf16> } -} +} diff --git a/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir index 19b8835..fe34d3f 100644 --- a/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<3840x4xbf16> { + func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<3840x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3840x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x4xbf16>) -> tensor<3840x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<3840x4xbf16>) -> tensor<3840x4xbf16> return %2 : tensor<3840x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir index 02a0213..eab6a7c 100644 --- a/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x4xf16>) -> tensor<3840x4xf16> { + func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x4xf16>) -> tensor<3840x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3840x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x4xf16>) -> tensor<3840x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x4xf16>) outs(%1 : tensor<3840x4xf16>) -> tensor<3840x4xf16> return %2 : tensor<3840x4xf16> } -} +} diff --git a/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir index 04f3bc3..84bb52a 100644 --- a/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<3840x8xbf16> { + func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<3840x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<3840x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x8xbf16>) -> tensor<3840x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<3840x8xbf16>) -> tensor<3840x8xbf16> return %2 : tensor<3840x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir index 3565fd0..8c91198 100644 --- a/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x8xf16>) -> tensor<3840x8xf16> { + func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x8xf16>) -> tensor<3840x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<3840x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x8xf16>) -> tensor<3840x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x8xf16>) outs(%1 : tensor<3840x8xf16>) -> tensor<3840x8xf16> return %2 : tensor<3840x8xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir index abab876..01c0a78 100644 --- a/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<4000x16xbf16> { + func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<4000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> return %2 : tensor<4000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir index a8be651..3eb9fe7 100644 --- a/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<4000x16xf16> { + func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<4000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x16xf16>) -> tensor<4000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<4000x16xf16>) -> tensor<4000x16xf16> return %2 : tensor<4000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir index 4a9f61a..a64464a 100644 --- a/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<4000x16xbf16> { + func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<4000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> return %2 : tensor<4000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir index 0782415..68f9cda 100644 --- a/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<4000x16xf16> { + func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<4000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x16xf16>) -> tensor<4000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<4000x16xf16>) -> tensor<4000x16xf16> return %2 : tensor<4000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir index 308e330..857de41 100644 --- a/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<4000x1xbf16> { + func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<4000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> return %2 : tensor<4000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir index 32ba2b6..f64c226 100644 --- a/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<4000x1xf16> { + func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<4000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x1xf16>) -> tensor<4000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<4000x1xf16>) -> tensor<4000x1xf16> return %2 : tensor<4000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir index 7d55ed4..c98f58c 100644 --- a/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<4000x1xbf16> { + func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<4000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> return %2 : tensor<4000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir index 4058c6b..5aaef53 100644 --- a/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<4000x1xf16> { + func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<4000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x1xf16>) -> tensor<4000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<4000x1xf16>) -> tensor<4000x1xf16> return %2 : tensor<4000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir index e6acf12..cf6d890 100644 --- a/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<4000x2xbf16> { + func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<4000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> return %2 : tensor<4000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir index 1b4232f..1d7ef35 100644 --- a/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<4000x2xf16> { + func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<4000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x2xf16>) -> tensor<4000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<4000x2xf16>) -> tensor<4000x2xf16> return %2 : tensor<4000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir index ab005d9..1081115 100644 --- a/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<4000x2xbf16> { + func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<4000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> return %2 : tensor<4000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir index f760b77..5d645df 100644 --- a/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<4000x2xf16> { + func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<4000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x2xf16>) -> tensor<4000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<4000x2xf16>) -> tensor<4000x2xf16> return %2 : tensor<4000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir index 84b5c7e..faa22ff 100644 --- a/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<4000x32xbf16> { + func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<4000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> return %2 : tensor<4000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir index 6647868..eb8e87e 100644 --- a/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<4000x32xf16> { + func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<4000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x32xf16>) -> tensor<4000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<4000x32xf16>) -> tensor<4000x32xf16> return %2 : tensor<4000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir index ea59621..0688fe2 100644 --- a/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<4000x32xbf16> { + func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<4000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> return %2 : tensor<4000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir index 0eb7bad..d261394 100644 --- a/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<4000x32xf16> { + func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<4000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x32xf16>) -> tensor<4000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<4000x32xf16>) -> tensor<4000x32xf16> return %2 : tensor<4000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir index 8930578..ee32dc1 100644 --- a/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<4000x4xbf16> { + func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<4000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> return %2 : tensor<4000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir index 6189801..61b5e3d 100644 --- a/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<4000x4xf16> { + func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<4000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x4xf16>) -> tensor<4000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<4000x4xf16>) -> tensor<4000x4xf16> return %2 : tensor<4000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir index a26e946..1f73b7e 100644 --- a/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<4000x4xbf16> { + func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<4000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> return %2 : tensor<4000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir index 03b39b8..f85ff47 100644 --- a/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<4000x4xf16> { + func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<4000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x4xf16>) -> tensor<4000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<4000x4xf16>) -> tensor<4000x4xf16> return %2 : tensor<4000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir index 89a98f6..a59e9b6 100644 --- a/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<4000x8xbf16> { + func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<4000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> return %2 : tensor<4000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir index 1801f29..2821933 100644 --- a/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<4000x8xf16> { + func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<4000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x8xf16>) -> tensor<4000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<4000x8xf16>) -> tensor<4000x8xf16> return %2 : tensor<4000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir index 8ad4e74..bbaeb69 100644 --- a/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<4000x8xbf16> { + func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<4000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> return %2 : tensor<4000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir index 316083d..3bd900f 100644 --- a/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<4000x8xf16> { + func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<4000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x8xf16>) -> tensor<4000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<4000x8xf16>) -> tensor<4000x8xf16> return %2 : tensor<4000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_4096_4096_8192_bf16.mlir b/gemm/mlir/gemm_4096_4096_8192_bf16.mlir index eaf1be6..da783d2 100644 --- a/gemm/mlir/gemm_4096_4096_8192_bf16.mlir +++ b/gemm/mlir/gemm_4096_4096_8192_bf16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<8192x4096xbf16>) -> tensor<4096x4096xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<8192x4096xbf16>) -> tensor<4096x4096xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4096x4096xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<8192x4096xbf16>) outs(%1 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> return %2 : tensor<4096x4096xbf16> } -} +} diff --git a/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir b/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir index dfacc5e..f9c0df8 100644 --- a/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4096xbf16>, %arg1: tensor<8192x4096xbf16>) -> tensor<4096x4096xbf16> { + func.func @main(%arg0: tensor<8192x4096xbf16>, %arg1: tensor<8192x4096xbf16>) -> tensor<4096x4096xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4096x4096xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4096xbf16>, tensor<8192x4096xbf16>) outs(%1 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> return %2 : tensor<4096x4096xbf16> } -} +} diff --git a/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir b/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir index 651bbcc..ff2a1ac 100644 --- a/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir +++ b/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x8192xbf16>) -> tensor<4096x4096xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x8192xbf16>) -> tensor<4096x4096xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<4096x4096xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x8192xbf16>) outs(%1 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> return %2 : tensor<4096x4096xbf16> } -} +} diff --git a/gemm/mlir/gemm_4096_4096_8192_f16.mlir b/gemm/mlir/gemm_4096_4096_8192_f16.mlir index fa2f268..d21690a 100644 --- a/gemm/mlir/gemm_4096_4096_8192_f16.mlir +++ b/gemm/mlir/gemm_4096_4096_8192_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<8192x4096xf16>) -> tensor<4096x4096xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<8192x4096xf16>) -> tensor<4096x4096xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4096x4096xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<8192x4096xf16>) outs(%1 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> return %2 : tensor<4096x4096xf16> } -} +} diff --git a/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir b/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir index 86c37bf..f4ba892 100644 --- a/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x4096xf16>, %arg1: tensor<8192x4096xf16>) -> tensor<4096x4096xf16> { + func.func @main(%arg0: tensor<8192x4096xf16>, %arg1: tensor<8192x4096xf16>) -> tensor<4096x4096xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4096x4096xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4096xf16>, tensor<8192x4096xf16>) outs(%1 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> return %2 : tensor<4096x4096xf16> } -} +} diff --git a/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir b/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir index da4a938..d96e00f 100644 --- a/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir +++ b/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x8192xf16>) -> tensor<4096x4096xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x8192xf16>) -> tensor<4096x4096xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<4096x4096xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x8192xf16>) outs(%1 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> return %2 : tensor<4096x4096xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir index 7d55b6a..7e21b10 100644 --- a/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir index 38ec94b..e777fe8 100644 --- a/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir index a05a431..712a5a3 100644 --- a/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir index db46cbf..e95a174 100644 --- a/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir index 1a5833f..1f0b6cf 100644 --- a/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir index 0b4e6aa..c0efaf2 100644 --- a/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir index 4e70387..d850d73 100644 --- a/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir index 38f4d8e..e4183f4 100644 --- a/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir index 3322b49..dab5177 100644 --- a/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir index 2f73f40..e4d9277 100644 --- a/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir index d11355f..f5dfe26 100644 --- a/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir index 509426e..71c7f1f 100644 --- a/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir index 2aff311..20d9a68 100644 --- a/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_640_f16_tA.mlir b/gemm/mlir/gemm_5120_16_640_f16_tA.mlir index 9f5483e..bf06141 100644 --- a/gemm/mlir/gemm_5120_16_640_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir index 0ecdb14..4ab4378 100644 --- a/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir index fd21611..476253e 100644 --- a/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir index b031807..af65c87 100644 --- a/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<5120x16xbf16> { + func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<5120x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> return %2 : tensor<5120x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir index 7f8df36..9acb611 100644 --- a/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x16xf16>) -> tensor<5120x16xf16> { + func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x16xf16>) -> tensor<5120x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> return %2 : tensor<5120x16xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir index 1499573..fbad7cb 100644 --- a/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir index 67867ae..a7e29cd 100644 --- a/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir index 76e7072..d006ff7 100644 --- a/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir index 0bf2d47..a9fcf15 100644 --- a/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir index 453e9e2..9417831 100644 --- a/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir index 69e0946..124f5a6 100644 --- a/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir index 762e752..3779817 100644 --- a/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir index dc68ca7..6258f4f 100644 --- a/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir index ecd1418..c2c0363 100644 --- a/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir index a635c3a..27728e7 100644 --- a/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir index 5dca089..e8652a1 100644 --- a/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir index 9ec5717..d36e54c 100644 --- a/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir index 731361c..3b414a8 100644 --- a/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_640_f16_tA.mlir b/gemm/mlir/gemm_5120_1_640_f16_tA.mlir index 6447cb1..f8bbbe2 100644 --- a/gemm/mlir/gemm_5120_1_640_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir index ee4ac72..fdc2298 100644 --- a/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir index 9b18aad..be5c109 100644 --- a/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir index 4026c65..13e6f69 100644 --- a/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<5120x1xbf16> { + func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<5120x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> return %2 : tensor<5120x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir index 3d64426..572ff85 100644 --- a/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x1xf16>) -> tensor<5120x1xf16> { + func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x1xf16>) -> tensor<5120x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> return %2 : tensor<5120x1xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir index b6f4c9b..07b6e62 100644 --- a/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir index b557fb9..70ad768 100644 --- a/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir index eb48187..e83f65d 100644 --- a/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir index bd31359..e30738c 100644 --- a/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir index 6f4c566..8a04fb2 100644 --- a/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir index 021d2b5..2c77846 100644 --- a/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir index 7426ffb..25d142a 100644 --- a/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir index 4b1c18d..414bd86 100644 --- a/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir index 1f5cb73..3b81d86 100644 --- a/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir index 23dcf3c..fe954d2 100644 --- a/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir index 8e45849..6599984 100644 --- a/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir index 3498510..f88163e 100644 --- a/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir index df66ea8..8ade0ca 100644 --- a/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_640_f16_tA.mlir b/gemm/mlir/gemm_5120_2_640_f16_tA.mlir index 7e92ece..3c50f2f 100644 --- a/gemm/mlir/gemm_5120_2_640_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir index 2135217..5f8b20a 100644 --- a/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir index f90ec73..7fe73cd 100644 --- a/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir index 794914a..4460592 100644 --- a/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<5120x2xbf16> { + func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<5120x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> return %2 : tensor<5120x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir index 8c6ecbb..6e9ac82 100644 --- a/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x2xf16>) -> tensor<5120x2xf16> { + func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x2xf16>) -> tensor<5120x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> return %2 : tensor<5120x2xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir index 0c54b39..256678e 100644 --- a/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir index 6123e81..e7f5580 100644 --- a/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir index 265fdc1..d84ed24 100644 --- a/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir index 0ab9e18..f50d0d0 100644 --- a/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir index 6687a7e..a4af4b4 100644 --- a/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir index e5e927e..16e7179 100644 --- a/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir index a5e716f..bea8cb5 100644 --- a/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir index e0b47f0..d4d7491 100644 --- a/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir index f48e631..a1ec40e 100644 --- a/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir index 4e674d1..8f6301c 100644 --- a/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir index 832c3c2..4c72158 100644 --- a/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir index fc13ee2..027a09f 100644 --- a/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir index 6684884..fec70cb 100644 --- a/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_640_f16_tA.mlir b/gemm/mlir/gemm_5120_32_640_f16_tA.mlir index 8d2c153..d2e3949 100644 --- a/gemm/mlir/gemm_5120_32_640_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir index 9d389f1..7e22180 100644 --- a/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir index 5f3a76c..1d9947a 100644 --- a/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir index fa304ce..323437a 100644 --- a/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<5120x32xbf16> { + func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<5120x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> return %2 : tensor<5120x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir index 13e4c8d..91e0026 100644 --- a/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x32xf16>) -> tensor<5120x32xf16> { + func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x32xf16>) -> tensor<5120x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> return %2 : tensor<5120x32xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir index fb0b017..b02b975 100644 --- a/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir index c2129bb..cdbe240 100644 --- a/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir index 2f44985..c024c59 100644 --- a/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir index 8ba061f..1b355e9 100644 --- a/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir index 7db150c..77d316d 100644 --- a/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir index 5b697a3..b77fd46 100644 --- a/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir index e1775bd..b441065 100644 --- a/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir index 0fa3e27..78af1ae 100644 --- a/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir index 9a7c145..65e3813 100644 --- a/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir index cb83bba..055a56a 100644 --- a/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir index 051d5e1..133c6e2 100644 --- a/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir index 24df30b..3b6cabf 100644 --- a/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir index cb11302..1e22dd9 100644 --- a/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_640_f16_tA.mlir b/gemm/mlir/gemm_5120_4_640_f16_tA.mlir index 1cbfd8b..f7459f4 100644 --- a/gemm/mlir/gemm_5120_4_640_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir index 579b577..9244683 100644 --- a/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir index d16b66d..f3c0b6a 100644 --- a/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir index 678fff8..1e39bcc 100644 --- a/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<5120x4xbf16> { + func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<5120x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> return %2 : tensor<5120x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir index 708ed77..59ff5c1 100644 --- a/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x4xf16>) -> tensor<5120x4xf16> { + func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x4xf16>) -> tensor<5120x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> return %2 : tensor<5120x4xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir index c9ec3dd..090d0a3 100644 --- a/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir index 332820c..68c2973 100644 --- a/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir index 72d5e9e..b80c0d8 100644 --- a/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir index 15e392b..77658a9 100644 --- a/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir index ee5abd5..3d405b3 100644 --- a/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir index 1be3b9b..9717a1c 100644 --- a/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir index e4f6f55..e20b534 100644 --- a/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir index 8eb2094..fcb3692 100644 --- a/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir index c8cecb5..e86a941 100644 --- a/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir index abd3026..b81b946 100644 --- a/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir index 06c159d..b66fabd 100644 --- a/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir index 45ee2f7..b42ef4d 100644 --- a/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir index a171f4a..919f4aa 100644 --- a/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_640_f16_tA.mlir b/gemm/mlir/gemm_5120_8_640_f16_tA.mlir index e64ddeb..2667615 100644 --- a/gemm/mlir/gemm_5120_8_640_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir index 025a5f7..68be7d6 100644 --- a/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir index 828bcb1..b423ad4 100644 --- a/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir index 5a7f3ab..70c44a9 100644 --- a/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<5120x8xbf16> { + func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<5120x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<5120x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> return %2 : tensor<5120x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir index 8245617..79a3420 100644 --- a/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x8xf16>) -> tensor<5120x8xf16> { + func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x8xf16>) -> tensor<5120x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<5120x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> return %2 : tensor<5120x8xf16> } -} +} diff --git a/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir index eed5995..c05fd42 100644 --- a/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<57344x16xbf16> { + func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<57344x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<57344x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x16xbf16>) -> tensor<57344x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<57344x16xbf16>) -> tensor<57344x16xbf16> return %2 : tensor<57344x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir index 88699d6..a27c3ca 100644 --- a/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x16xf16>) -> tensor<57344x16xf16> { + func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x16xf16>) -> tensor<57344x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<57344x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x16xf16>) -> tensor<57344x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x16xf16>) outs(%1 : tensor<57344x16xf16>) -> tensor<57344x16xf16> return %2 : tensor<57344x16xf16> } -} +} diff --git a/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir index 09807d7..6b50b47 100644 --- a/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<57344x1xbf16> { + func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<57344x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<57344x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x1xbf16>) -> tensor<57344x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<57344x1xbf16>) -> tensor<57344x1xbf16> return %2 : tensor<57344x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir index 49be87f..a391e24 100644 --- a/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x1xf16>) -> tensor<57344x1xf16> { + func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x1xf16>) -> tensor<57344x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<57344x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x1xf16>) -> tensor<57344x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x1xf16>) outs(%1 : tensor<57344x1xf16>) -> tensor<57344x1xf16> return %2 : tensor<57344x1xf16> } -} +} diff --git a/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir index db022a1..b176f2c 100644 --- a/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<57344x2xbf16> { + func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<57344x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<57344x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x2xbf16>) -> tensor<57344x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<57344x2xbf16>) -> tensor<57344x2xbf16> return %2 : tensor<57344x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir index b0b3085..ffac68f 100644 --- a/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x2xf16>) -> tensor<57344x2xf16> { + func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x2xf16>) -> tensor<57344x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<57344x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x2xf16>) -> tensor<57344x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x2xf16>) outs(%1 : tensor<57344x2xf16>) -> tensor<57344x2xf16> return %2 : tensor<57344x2xf16> } -} +} diff --git a/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir index 963021f..bbe0c75 100644 --- a/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<57344x32xbf16> { + func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<57344x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<57344x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x32xbf16>) -> tensor<57344x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<57344x32xbf16>) -> tensor<57344x32xbf16> return %2 : tensor<57344x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir index 828eedb..34675d0 100644 --- a/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x32xf16>) -> tensor<57344x32xf16> { + func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x32xf16>) -> tensor<57344x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<57344x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x32xf16>) -> tensor<57344x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x32xf16>) outs(%1 : tensor<57344x32xf16>) -> tensor<57344x32xf16> return %2 : tensor<57344x32xf16> } -} +} diff --git a/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir index 29d4cbf..2189c7e 100644 --- a/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<57344x4xbf16> { + func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<57344x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<57344x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x4xbf16>) -> tensor<57344x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<57344x4xbf16>) -> tensor<57344x4xbf16> return %2 : tensor<57344x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir index ba6f0f2..5419137 100644 --- a/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x4xf16>) -> tensor<57344x4xf16> { + func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x4xf16>) -> tensor<57344x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<57344x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x4xf16>) -> tensor<57344x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x4xf16>) outs(%1 : tensor<57344x4xf16>) -> tensor<57344x4xf16> return %2 : tensor<57344x4xf16> } -} +} diff --git a/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir index 9334fb2..84785f7 100644 --- a/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<57344x8xbf16> { + func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<57344x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<57344x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x8xbf16>) -> tensor<57344x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<57344x8xbf16>) -> tensor<57344x8xbf16> return %2 : tensor<57344x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir index e633414..58d7ded 100644 --- a/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x8xf16>) -> tensor<57344x8xf16> { + func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x8xf16>) -> tensor<57344x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<57344x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x8xf16>) -> tensor<57344x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x8xf16>) outs(%1 : tensor<57344x8xf16>) -> tensor<57344x8xf16> return %2 : tensor<57344x8xf16> } -} +} diff --git a/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir index f624390..a9180ad 100644 --- a/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<6912x16xbf16> { + func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<6912x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<6912x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x16xbf16>) -> tensor<6912x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<6912x16xbf16>) -> tensor<6912x16xbf16> return %2 : tensor<6912x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir index 771c6da..47aaf92 100644 --- a/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x16xf16>) -> tensor<6912x16xf16> { + func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x16xf16>) -> tensor<6912x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<6912x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x16xf16>) -> tensor<6912x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x16xf16>) outs(%1 : tensor<6912x16xf16>) -> tensor<6912x16xf16> return %2 : tensor<6912x16xf16> } -} +} diff --git a/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir index cc30e53..f087893 100644 --- a/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<6912x1xbf16> { + func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<6912x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<6912x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x1xbf16>) -> tensor<6912x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<6912x1xbf16>) -> tensor<6912x1xbf16> return %2 : tensor<6912x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir index 51cd9e1..beee00d 100644 --- a/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x1xf16>) -> tensor<6912x1xf16> { + func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x1xf16>) -> tensor<6912x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<6912x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x1xf16>) -> tensor<6912x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x1xf16>) outs(%1 : tensor<6912x1xf16>) -> tensor<6912x1xf16> return %2 : tensor<6912x1xf16> } -} +} diff --git a/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir index 94f0fa1..441ec83 100644 --- a/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<6912x2xbf16> { + func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<6912x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<6912x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x2xbf16>) -> tensor<6912x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<6912x2xbf16>) -> tensor<6912x2xbf16> return %2 : tensor<6912x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir index b658eae..397c7b2 100644 --- a/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x2xf16>) -> tensor<6912x2xf16> { + func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x2xf16>) -> tensor<6912x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<6912x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x2xf16>) -> tensor<6912x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x2xf16>) outs(%1 : tensor<6912x2xf16>) -> tensor<6912x2xf16> return %2 : tensor<6912x2xf16> } -} +} diff --git a/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir index 73ee4e6..926a24a 100644 --- a/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<6912x32xbf16> { + func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<6912x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<6912x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x32xbf16>) -> tensor<6912x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<6912x32xbf16>) -> tensor<6912x32xbf16> return %2 : tensor<6912x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir index 90ec1b6..75888ec 100644 --- a/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x32xf16>) -> tensor<6912x32xf16> { + func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x32xf16>) -> tensor<6912x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<6912x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x32xf16>) -> tensor<6912x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x32xf16>) outs(%1 : tensor<6912x32xf16>) -> tensor<6912x32xf16> return %2 : tensor<6912x32xf16> } -} +} diff --git a/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir index b68ea97..105402a 100644 --- a/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<6912x4xbf16> { + func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<6912x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<6912x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x4xbf16>) -> tensor<6912x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<6912x4xbf16>) -> tensor<6912x4xbf16> return %2 : tensor<6912x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir index c1dfedf..2938490 100644 --- a/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x4xf16>) -> tensor<6912x4xf16> { + func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x4xf16>) -> tensor<6912x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<6912x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x4xf16>) -> tensor<6912x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x4xf16>) outs(%1 : tensor<6912x4xf16>) -> tensor<6912x4xf16> return %2 : tensor<6912x4xf16> } -} +} diff --git a/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir index 437677a..c62dc28 100644 --- a/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<6912x8xbf16> { + func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<6912x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<6912x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x8xbf16>) -> tensor<6912x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<6912x8xbf16>) -> tensor<6912x8xbf16> return %2 : tensor<6912x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir index 1eee406..0fc7b88 100644 --- a/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x8xf16>) -> tensor<6912x8xf16> { + func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x8xf16>) -> tensor<6912x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<6912x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x8xf16>) -> tensor<6912x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x8xf16>) outs(%1 : tensor<6912x8xf16>) -> tensor<6912x8xf16> return %2 : tensor<6912x8xf16> } -} +} diff --git a/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir index 8f8f690..c7660f1 100644 --- a/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<7168x16xbf16> { + func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<7168x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7168x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x16xbf16>) -> tensor<7168x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<7168x16xbf16>) -> tensor<7168x16xbf16> return %2 : tensor<7168x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir index ac8a58c..3b4e48c 100644 --- a/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x16xf16>) -> tensor<7168x16xf16> { + func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x16xf16>) -> tensor<7168x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7168x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x16xf16>) -> tensor<7168x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x16xf16>) outs(%1 : tensor<7168x16xf16>) -> tensor<7168x16xf16> return %2 : tensor<7168x16xf16> } -} +} diff --git a/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir index a159ede..41d8ee8 100644 --- a/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<7168x1xbf16> { + func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<7168x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7168x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x1xbf16>) -> tensor<7168x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<7168x1xbf16>) -> tensor<7168x1xbf16> return %2 : tensor<7168x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir index 42bc406..93b1d5e 100644 --- a/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x1xf16>) -> tensor<7168x1xf16> { + func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x1xf16>) -> tensor<7168x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7168x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x1xf16>) -> tensor<7168x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x1xf16>) outs(%1 : tensor<7168x1xf16>) -> tensor<7168x1xf16> return %2 : tensor<7168x1xf16> } -} +} diff --git a/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir index 0850d0e..555cca9 100644 --- a/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<7168x2xbf16> { + func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<7168x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7168x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x2xbf16>) -> tensor<7168x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<7168x2xbf16>) -> tensor<7168x2xbf16> return %2 : tensor<7168x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir index 038def0..4ab13c2 100644 --- a/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x2xf16>) -> tensor<7168x2xf16> { + func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x2xf16>) -> tensor<7168x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7168x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x2xf16>) -> tensor<7168x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x2xf16>) outs(%1 : tensor<7168x2xf16>) -> tensor<7168x2xf16> return %2 : tensor<7168x2xf16> } -} +} diff --git a/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir index 63c7701..e6b536c 100644 --- a/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<7168x32xbf16> { + func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<7168x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7168x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x32xbf16>) -> tensor<7168x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<7168x32xbf16>) -> tensor<7168x32xbf16> return %2 : tensor<7168x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir index 80bb61a..2d2744f 100644 --- a/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x32xf16>) -> tensor<7168x32xf16> { + func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x32xf16>) -> tensor<7168x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7168x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x32xf16>) -> tensor<7168x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x32xf16>) outs(%1 : tensor<7168x32xf16>) -> tensor<7168x32xf16> return %2 : tensor<7168x32xf16> } -} +} diff --git a/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir index 2d51b63..98c5839 100644 --- a/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<7168x4xbf16> { + func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<7168x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7168x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x4xbf16>) -> tensor<7168x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<7168x4xbf16>) -> tensor<7168x4xbf16> return %2 : tensor<7168x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir index bf655f6..1bf5e1c 100644 --- a/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x4xf16>) -> tensor<7168x4xf16> { + func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x4xf16>) -> tensor<7168x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7168x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x4xf16>) -> tensor<7168x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x4xf16>) outs(%1 : tensor<7168x4xf16>) -> tensor<7168x4xf16> return %2 : tensor<7168x4xf16> } -} +} diff --git a/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir index 20e0805..c7dbcb9 100644 --- a/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<7168x8xbf16> { + func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<7168x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7168x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x8xbf16>) -> tensor<7168x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<7168x8xbf16>) -> tensor<7168x8xbf16> return %2 : tensor<7168x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir index ad72f3f..f36208c 100644 --- a/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x8xf16>) -> tensor<7168x8xf16> { + func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x8xf16>) -> tensor<7168x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7168x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x8xf16>) -> tensor<7168x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x8xf16>) outs(%1 : tensor<7168x8xf16>) -> tensor<7168x8xf16> return %2 : tensor<7168x8xf16> } -} +} diff --git a/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir index 5413ea7..db4ed5e 100644 --- a/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<7680x16xbf16> { + func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<7680x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7680x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x16xbf16>) -> tensor<7680x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<7680x16xbf16>) -> tensor<7680x16xbf16> return %2 : tensor<7680x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir index 68cbc88..884fae5 100644 --- a/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x16xf16>) -> tensor<7680x16xf16> { + func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x16xf16>) -> tensor<7680x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7680x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x16xf16>) -> tensor<7680x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x16xf16>) outs(%1 : tensor<7680x16xf16>) -> tensor<7680x16xf16> return %2 : tensor<7680x16xf16> } -} +} diff --git a/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir index d22bc41..3e9229a 100644 --- a/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<7680x1xbf16> { + func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<7680x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7680x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x1xbf16>) -> tensor<7680x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<7680x1xbf16>) -> tensor<7680x1xbf16> return %2 : tensor<7680x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir index b4b7ea4..8852272 100644 --- a/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x1xf16>) -> tensor<7680x1xf16> { + func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x1xf16>) -> tensor<7680x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7680x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x1xf16>) -> tensor<7680x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x1xf16>) outs(%1 : tensor<7680x1xf16>) -> tensor<7680x1xf16> return %2 : tensor<7680x1xf16> } -} +} diff --git a/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir index 1272238..91b162d 100644 --- a/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<7680x2xbf16> { + func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<7680x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7680x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x2xbf16>) -> tensor<7680x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<7680x2xbf16>) -> tensor<7680x2xbf16> return %2 : tensor<7680x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir index 061d2cd..0b11af3 100644 --- a/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x2xf16>) -> tensor<7680x2xf16> { + func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x2xf16>) -> tensor<7680x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7680x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x2xf16>) -> tensor<7680x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x2xf16>) outs(%1 : tensor<7680x2xf16>) -> tensor<7680x2xf16> return %2 : tensor<7680x2xf16> } -} +} diff --git a/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir index e65a756..a89c462 100644 --- a/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<7680x32xbf16> { + func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<7680x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7680x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x32xbf16>) -> tensor<7680x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<7680x32xbf16>) -> tensor<7680x32xbf16> return %2 : tensor<7680x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir index 11c6226..6dd24ce 100644 --- a/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x32xf16>) -> tensor<7680x32xf16> { + func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x32xf16>) -> tensor<7680x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7680x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x32xf16>) -> tensor<7680x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x32xf16>) outs(%1 : tensor<7680x32xf16>) -> tensor<7680x32xf16> return %2 : tensor<7680x32xf16> } -} +} diff --git a/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir index 9ab8446..b0334e4 100644 --- a/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<7680x4xbf16> { + func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<7680x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7680x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x4xbf16>) -> tensor<7680x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<7680x4xbf16>) -> tensor<7680x4xbf16> return %2 : tensor<7680x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir index 18d769c..c927588 100644 --- a/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x4xf16>) -> tensor<7680x4xf16> { + func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x4xf16>) -> tensor<7680x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7680x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x4xf16>) -> tensor<7680x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x4xf16>) outs(%1 : tensor<7680x4xf16>) -> tensor<7680x4xf16> return %2 : tensor<7680x4xf16> } -} +} diff --git a/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir index c70637e..4d799fa 100644 --- a/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<7680x8xbf16> { + func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<7680x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<7680x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x8xbf16>) -> tensor<7680x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<7680x8xbf16>) -> tensor<7680x8xbf16> return %2 : tensor<7680x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir index 87eaf92..f817f9d 100644 --- a/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x8xf16>) -> tensor<7680x8xf16> { + func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x8xf16>) -> tensor<7680x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<7680x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x8xf16>) -> tensor<7680x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x8xf16>) outs(%1 : tensor<7680x8xf16>) -> tensor<7680x8xf16> return %2 : tensor<7680x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir index a5ec137..50cb640 100644 --- a/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<8000x16xbf16> { + func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<8000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> return %2 : tensor<8000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir index e5a8643..53b5315 100644 --- a/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<8000x16xf16> { + func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<8000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x16xf16>) -> tensor<8000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<8000x16xf16>) -> tensor<8000x16xf16> return %2 : tensor<8000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir index e9d1c6c..d61ae44 100644 --- a/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<8000x16xbf16> { + func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<8000x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> return %2 : tensor<8000x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir index 8d3948a..2fdaae0 100644 --- a/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<8000x16xf16> { + func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<8000x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x16xf16>) -> tensor<8000x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<8000x16xf16>) -> tensor<8000x16xf16> return %2 : tensor<8000x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir index 80a6737..2103508 100644 --- a/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<8000x1xbf16> { + func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<8000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> return %2 : tensor<8000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir index 3bd6144..d168465 100644 --- a/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<8000x1xf16> { + func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<8000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x1xf16>) -> tensor<8000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<8000x1xf16>) -> tensor<8000x1xf16> return %2 : tensor<8000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir index ecbcf17..0f58095 100644 --- a/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<8000x1xbf16> { + func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<8000x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> return %2 : tensor<8000x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir index 9c9dd7a..52e5c03 100644 --- a/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<8000x1xf16> { + func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<8000x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x1xf16>) -> tensor<8000x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<8000x1xf16>) -> tensor<8000x1xf16> return %2 : tensor<8000x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir index ace5d7d..668917b 100644 --- a/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<8000x2xbf16> { + func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<8000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> return %2 : tensor<8000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir index ea4bf75..d85abf9 100644 --- a/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<8000x2xf16> { + func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<8000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x2xf16>) -> tensor<8000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<8000x2xf16>) -> tensor<8000x2xf16> return %2 : tensor<8000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir index d6f4f98..fa3aeca 100644 --- a/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<8000x2xbf16> { + func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<8000x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> return %2 : tensor<8000x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir index 9b566e7..4d8cb01 100644 --- a/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<8000x2xf16> { + func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<8000x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x2xf16>) -> tensor<8000x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<8000x2xf16>) -> tensor<8000x2xf16> return %2 : tensor<8000x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir index 6b08d4a..e76d224 100644 --- a/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<8000x32xbf16> { + func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<8000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> return %2 : tensor<8000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir index e56cb29..5c226af 100644 --- a/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<8000x32xf16> { + func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<8000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x32xf16>) -> tensor<8000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<8000x32xf16>) -> tensor<8000x32xf16> return %2 : tensor<8000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir index e918725..4df2655 100644 --- a/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<8000x32xbf16> { + func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<8000x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> return %2 : tensor<8000x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir index bcf5ef2..656010a 100644 --- a/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<8000x32xf16> { + func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<8000x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x32xf16>) -> tensor<8000x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<8000x32xf16>) -> tensor<8000x32xf16> return %2 : tensor<8000x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir index 515fa72..f45eab2 100644 --- a/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<8000x4xbf16> { + func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<8000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> return %2 : tensor<8000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir index 2ebec27..a715200 100644 --- a/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<8000x4xf16> { + func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<8000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x4xf16>) -> tensor<8000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<8000x4xf16>) -> tensor<8000x4xf16> return %2 : tensor<8000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir index eebdaa6..a0bf7e7 100644 --- a/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<8000x4xbf16> { + func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<8000x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> return %2 : tensor<8000x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir index e086491..4d5e8c5 100644 --- a/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<8000x4xf16> { + func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<8000x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x4xf16>) -> tensor<8000x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<8000x4xf16>) -> tensor<8000x4xf16> return %2 : tensor<8000x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir index 7420d11..5a7f7e8 100644 --- a/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<8000x8xbf16> { + func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<8000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> return %2 : tensor<8000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir index 6684ea0..5552aa7 100644 --- a/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<8000x8xf16> { + func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<8000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x8xf16>) -> tensor<8000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<8000x8xf16>) -> tensor<8000x8xf16> return %2 : tensor<8000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir index bab0926..a79aac9 100644 --- a/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<8000x8xbf16> { + func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<8000x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8000x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> return %2 : tensor<8000x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir index d8697c2..4f1ed4c 100644 --- a/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<8000x8xf16> { + func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<8000x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8000x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x8xf16>) -> tensor<8000x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<8000x8xf16>) -> tensor<8000x8xf16> return %2 : tensor<8000x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir index 038e187..665dc34 100644 --- a/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir index eaf0d6c..b37806e 100644 --- a/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir index 6b7aff8..5a46495 100644 --- a/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir index de7e11c..e9fab90 100644 --- a/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir index 0d691f4..d5390e1 100644 --- a/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir index dc80f68..899b396 100644 --- a/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir index 760aa13..a052c9d 100644 --- a/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir index a161368..937f6ca 100644 --- a/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir index 532f6a4..956f501 100644 --- a/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir index 54bb2bc..5182fe9 100644 --- a/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir index 3899546..ae4ae02 100644 --- a/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir index 7039123..1510fbb 100644 --- a/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir index e7b1414..ce83d31 100644 --- a/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir index 6ac4cdd..63da0a2 100644 --- a/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir index a749aa1..b269adb 100644 --- a/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<8192x16xbf16> { + func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<8192x16xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x16xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> return %2 : tensor<8192x16xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir index d9a8957..a00d5ed 100644 --- a/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x16xf16>) -> tensor<8192x16xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x16xf16>) -> tensor<8192x16xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x16xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> return %2 : tensor<8192x16xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir index 6df831a..4dbf154 100644 --- a/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir index 91e9ae3..cfe91b7 100644 --- a/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir index 4e44496..bb3ee2c 100644 --- a/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir index 13191f4..d1bf657 100644 --- a/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir index 7042489..dcb9440 100644 --- a/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir index 2231418..bb68ab7 100644 --- a/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir index d0505c0..9d8ba4c 100644 --- a/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir index 99b51fe..7f24658 100644 --- a/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir index 3dc75a9..db96ed7 100644 --- a/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir index 56734e1..0c6617c 100644 --- a/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir index b46c7dd..fef2bfe 100644 --- a/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir index 39b2d9c..76cdec0 100644 --- a/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir index fbd4c9b..83f7005 100644 --- a/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir index c31adcc..1eedb82 100644 --- a/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir index 4ac0944..2ea5414 100644 --- a/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<8192x1xbf16> { + func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<8192x1xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x1xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> return %2 : tensor<8192x1xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir index ffdacd2..ea88565 100644 --- a/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x1xf16>) -> tensor<8192x1xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x1xf16>) -> tensor<8192x1xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x1xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> return %2 : tensor<8192x1xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2048_1024_f16.mlir b/gemm/mlir/gemm_8192_2048_1024_f16.mlir index 6bad832..f42dfba 100644 --- a/gemm/mlir/gemm_8192_2048_1024_f16.mlir +++ b/gemm/mlir/gemm_8192_2048_1024_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x1024xf16>, %arg1: tensor<1024x2048xf16>) -> tensor<8192x2048xf16> { + func.func @main(%arg0: tensor<8192x1024xf16>, %arg1: tensor<1024x2048xf16>) -> tensor<8192x2048xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2048xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x1024xf16>, tensor<1024x2048xf16>) outs(%1 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> return %2 : tensor<8192x2048xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2048_65536_f16.mlir b/gemm/mlir/gemm_8192_2048_65536_f16.mlir index 0593abc..59ae1bc 100644 --- a/gemm/mlir/gemm_8192_2048_65536_f16.mlir +++ b/gemm/mlir/gemm_8192_2048_65536_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x65536xf16>, %arg1: tensor<65536x2048xf16>) -> tensor<8192x2048xf16> { + func.func @main(%arg0: tensor<8192x65536xf16>, %arg1: tensor<65536x2048xf16>) -> tensor<8192x2048xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2048xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x65536xf16>, tensor<65536x2048xf16>) outs(%1 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> return %2 : tensor<8192x2048xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2048_8192_f16.mlir b/gemm/mlir/gemm_8192_2048_8192_f16.mlir index 84e0fd9..8e0a0a6 100644 --- a/gemm/mlir/gemm_8192_2048_8192_f16.mlir +++ b/gemm/mlir/gemm_8192_2048_8192_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x2048xf16>) -> tensor<8192x2048xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x2048xf16>) -> tensor<8192x2048xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2048xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x2048xf16>) outs(%1 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> return %2 : tensor<8192x2048xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir index 4be7e93..7b07b58 100644 --- a/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir index e88034f..fad9863 100644 --- a/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir index 8a4f3b5..c3ba4e7 100644 --- a/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir index 25e2c86..ac6a2f1 100644 --- a/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir index 46864c9..154421f 100644 --- a/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir index a044c11..531fb51 100644 --- a/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir index be33f85..a26e286 100644 --- a/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir index 04bfd9d..fa64b0d 100644 --- a/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir index 9f708d8..6bfc9dc 100644 --- a/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir index 3efba95..5891198 100644 --- a/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir index ac505cc..5bad65e 100644 --- a/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir index cf32596..2ff588d 100644 --- a/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir index 7cae46d..6017644 100644 --- a/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir index 40d0de9..ada61d1 100644 --- a/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir index e11c1a2..4a5c210 100644 --- a/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<8192x2xbf16> { + func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<8192x2xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x2xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> return %2 : tensor<8192x2xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir index 4316b29..070a5ba 100644 --- a/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x2xf16>) -> tensor<8192x2xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x2xf16>) -> tensor<8192x2xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x2xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> return %2 : tensor<8192x2xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir index 9d083b2..852e767 100644 --- a/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir index 1ffeb72..c5f5846 100644 --- a/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir index ee481ff..7f3f684 100644 --- a/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir index ddd4547..4670ddd 100644 --- a/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir index 6a61969..9b7cb18 100644 --- a/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir index 98bfa9b..ad5ff98 100644 --- a/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir index 25eb462..0e75daf 100644 --- a/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir index 5f499b8..1809761 100644 --- a/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir index acc5d08..ddbba11 100644 --- a/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir index f4cead3..45b7ca4 100644 --- a/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir index ae5974b..7134984 100644 --- a/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir index dd43c21..7df2c92 100644 --- a/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir index 199a6b0..672f613 100644 --- a/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir index be300a5..aa39da1 100644 --- a/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir index 029e2eb..beeb9f6 100644 --- a/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<8192x32xbf16> { + func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<8192x32xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x32xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> return %2 : tensor<8192x32xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir index 3182444..538b2a5 100644 --- a/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x32xf16>) -> tensor<8192x32xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x32xf16>) -> tensor<8192x32xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x32xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> return %2 : tensor<8192x32xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir index 7ee7a15..dadcc8c 100644 --- a/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir index 3f3a1d9..ae01271 100644 --- a/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir index 6ab6a37..a91f9bf 100644 --- a/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir index 1acec1d..925676f 100644 --- a/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir index a2f1152..63f589c 100644 --- a/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir index bbb0827..043dba2 100644 --- a/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir index ccceace..ffcc49d 100644 --- a/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir index 9fed52d..3e66079 100644 --- a/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir index f7a7302..13ea765 100644 --- a/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir index a7192ae..b3a4aca 100644 --- a/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir index 54161e0..111e1b9 100644 --- a/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir index 4586bc9..e9059bf 100644 --- a/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir index fc001ab..7c140f3 100644 --- a/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir index 68cf431..81e98c6 100644 --- a/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir index 9affedd..7ae31d7 100644 --- a/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<8192x4xbf16> { + func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<8192x4xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x4xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> return %2 : tensor<8192x4xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir index 95b9cd9..2378c0f 100644 --- a/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x4xf16>) -> tensor<8192x4xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x4xf16>) -> tensor<8192x4xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x4xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> return %2 : tensor<8192x4xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_5120_640_bf16.mlir b/gemm/mlir/gemm_8192_5120_640_bf16.mlir index 60e77f8..5f59098 100644 --- a/gemm/mlir/gemm_8192_5120_640_bf16.mlir +++ b/gemm/mlir/gemm_8192_5120_640_bf16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x640xbf16>, %arg1: tensor<640x5120xbf16>) -> tensor<8192x5120xbf16> { + func.func @main(%arg0: tensor<8192x640xbf16>, %arg1: tensor<640x5120xbf16>) -> tensor<8192x5120xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x5120xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x640xbf16>, tensor<640x5120xbf16>) outs(%1 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> return %2 : tensor<8192x5120xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir b/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir index 5cc2130..177684d 100644 --- a/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x8192xbf16>, %arg1: tensor<640x5120xbf16>) -> tensor<8192x5120xbf16> { + func.func @main(%arg0: tensor<640x8192xbf16>, %arg1: tensor<640x5120xbf16>) -> tensor<8192x5120xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x5120xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x8192xbf16>, tensor<640x5120xbf16>) outs(%1 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> return %2 : tensor<8192x5120xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir b/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir index 522054a..629f56f 100644 --- a/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir +++ b/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x640xbf16>, %arg1: tensor<5120x640xbf16>) -> tensor<8192x5120xbf16> { + func.func @main(%arg0: tensor<8192x640xbf16>, %arg1: tensor<5120x640xbf16>) -> tensor<8192x5120xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x5120xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<8192x640xbf16>, tensor<5120x640xbf16>) outs(%1 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> return %2 : tensor<8192x5120xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_5120_640_f16.mlir b/gemm/mlir/gemm_8192_5120_640_f16.mlir index 4f3838e..52be98c 100644 --- a/gemm/mlir/gemm_8192_5120_640_f16.mlir +++ b/gemm/mlir/gemm_8192_5120_640_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x640xf16>, %arg1: tensor<640x5120xf16>) -> tensor<8192x5120xf16> { + func.func @main(%arg0: tensor<8192x640xf16>, %arg1: tensor<640x5120xf16>) -> tensor<8192x5120xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x5120xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x640xf16>, tensor<640x5120xf16>) outs(%1 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> return %2 : tensor<8192x5120xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir b/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir index 5d7300e..97875f6 100644 --- a/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<640x8192xf16>, %arg1: tensor<640x5120xf16>) -> tensor<8192x5120xf16> { + func.func @main(%arg0: tensor<640x8192xf16>, %arg1: tensor<640x5120xf16>) -> tensor<8192x5120xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x5120xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x8192xf16>, tensor<640x5120xf16>) outs(%1 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> return %2 : tensor<8192x5120xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir b/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir index c449f76..2f37fd5 100644 --- a/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir +++ b/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x640xf16>, %arg1: tensor<5120x640xf16>) -> tensor<8192x5120xf16> { + func.func @main(%arg0: tensor<8192x640xf16>, %arg1: tensor<5120x640xf16>) -> tensor<8192x5120xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x5120xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<8192x640xf16>, tensor<5120x640xf16>) outs(%1 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> return %2 : tensor<8192x5120xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8192_1024_f16.mlir b/gemm/mlir/gemm_8192_8192_1024_f16.mlir index 9a3648a..8c5ec54 100644 --- a/gemm/mlir/gemm_8192_8192_1024_f16.mlir +++ b/gemm/mlir/gemm_8192_8192_1024_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x1024xf16>, %arg1: tensor<1024x8192xf16>) -> tensor<8192x8192xf16> { + func.func @main(%arg0: tensor<8192x1024xf16>, %arg1: tensor<1024x8192xf16>) -> tensor<8192x8192xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8192xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x1024xf16>, tensor<1024x8192xf16>) outs(%1 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> return %2 : tensor<8192x8192xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8192_65536_f16.mlir b/gemm/mlir/gemm_8192_8192_65536_f16.mlir index 6560fd3..04bdc92 100644 --- a/gemm/mlir/gemm_8192_8192_65536_f16.mlir +++ b/gemm/mlir/gemm_8192_8192_65536_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x65536xf16>, %arg1: tensor<65536x8192xf16>) -> tensor<8192x8192xf16> { + func.func @main(%arg0: tensor<8192x65536xf16>, %arg1: tensor<65536x8192xf16>) -> tensor<8192x8192xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8192xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x65536xf16>, tensor<65536x8192xf16>) outs(%1 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> return %2 : tensor<8192x8192xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8192_8192_f16.mlir b/gemm/mlir/gemm_8192_8192_8192_f16.mlir index 82a2379..232fdb7 100644 --- a/gemm/mlir/gemm_8192_8192_8192_f16.mlir +++ b/gemm/mlir/gemm_8192_8192_8192_f16.mlir @@ -1,9 +1,9 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x8192xf16>) -> tensor<8192x8192xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x8192xf16>) -> tensor<8192x8192xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8192xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x8192xf16>) outs(%1 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> return %2 : tensor<8192x8192xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir index d2db594..90fbed3 100644 --- a/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir index b2a13c3..b3d0f26 100644 --- a/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir index 943e95d..17fe727 100644 --- a/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir index f690bbd..bbf21b1 100644 --- a/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir index ea3351c..d46ec59 100644 --- a/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir index 7bf708b..30f757f 100644 --- a/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir index fd3d526..7fdd508 100644 --- a/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir index f08e510..aafb576 100644 --- a/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir index b04864b..caa3522 100644 --- a/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir index 56afcbc..9964378 100644 --- a/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir index ede7ce9..ed9262d 100644 --- a/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir index 03c6a9f..0fca3dc 100644 --- a/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir index 9060013..b7f68ff 100644 --- a/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir index 6e1d748..c143d7f 100644 --- a/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir index 52b48fd..be2e86d 100644 --- a/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<8192x8xbf16> { + func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<8192x8xbf16> { %cst = arith.constant 0.000000e+00 : bf16 %0 = tensor.empty() : tensor<8192x8xbf16> %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> return %2 : tensor<8192x8xbf16> } -} +} diff --git a/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir index 932a8de..62431ce 100644 --- a/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir +++ b/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir @@ -1,10 +1,10 @@ module { - func.func @main_0(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x8xf16>) -> tensor<8192x8xf16> { + func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x8xf16>) -> tensor<8192x8xf16> { %cst = arith.constant 0.000000e+00 : f16 %0 = tensor.empty() : tensor<8192x8xf16> %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> return %2 : tensor<8192x8xf16> } -} +} diff --git a/gemmbench/gemm_bench.py b/gemmbench/gemm_bench.py index c11d6ef..c7317f2 100644 --- a/gemmbench/gemm_bench.py +++ b/gemmbench/gemm_bench.py @@ -8,92 +8,20 @@ from tqdm import tqdm from multiprocessing import Pool, cpu_count, Manager import logging +import itertools from pathlib import Path import csv import argparse import sys from utils import * -from problems import * - -def generate_mlir_content(M, N, K, tA, tB, dtype): - - mlir_template_A = f""" -module {{ - func.func @main_0(%arg0: tensor<{K}x{M}x{dtype}>, %arg1: tensor<{K}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> {{ - %cst = arith.constant 0.000000e+00 : {dtype} - %0 = tensor.empty() : tensor<{M}x{N}x{dtype}> - %1 = linalg.fill ins(%cst : {dtype}) outs(%0 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<{K}x{M}x{dtype}>, tensor<{K}x{N}x{dtype}>) outs(%1 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> - return %2 : tensor<{M}x{N}x{dtype}> - }} -}} -""" - - mlir_template_B = f""" -module {{ - func.func @main_0(%arg0: tensor<{M}x{K}x{dtype}>, %arg1: tensor<{N}x{K}x{dtype}>) -> tensor<{M}x{N}x{dtype}> {{ - %cst = arith.constant 0.000000e+00 : {dtype} - %0 = tensor.empty() : tensor<{M}x{N}x{dtype}> - %1 = linalg.fill ins(%cst : {dtype}) outs(%0 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<{M}x{K}x{dtype}>, tensor<{N}x{K}x{dtype}>) outs(%1 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> - return %2 : tensor<{M}x{N}x{dtype}> - }} -}} -""" - - mlir_template = f"""module {{ - func.func @main_0(%arg0: tensor<{M}x{K}x{dtype}>, %arg1: tensor<{K}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> {{ - %cst = arith.constant 0.000000e+00 : {dtype} - %0 = tensor.empty() : tensor<{M}x{N}x{dtype}> - %1 = linalg.fill ins(%cst : {dtype}) outs(%0 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<{M}x{K}x{dtype}>, tensor<{K}x{N}x{dtype}>) outs(%1 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> - return %2 : tensor<{M}x{N}x{dtype}> - }} -}} -""" - if tA == "T": - return mlir_template_A - if tB == "T": - return mlir_template_B - return mlir_template - - -def compile_shape(tag, M, N, K, tA, tB, dtype, target, extra_compiler_args, vmfb_dict): - if tA == "T" and tB == "T": - return f"Can't transpose both inputs" - - # Generate MLIR content - mlir_content = generate_mlir_content(M, N, K, tA, tB, dtype) - - # Generate filenames - filename = f"gemm/mlir/gemm_{M}_{N}_{K}_{dtype}" - if tA == "T": - filename += "_tA" - elif tB == "T": - filename += "_tB" - mlir_filename = filename + ".mlir" - filename = filename.replace("mlir", "vmfb") - vmfb_filename = filename + ".vmfb" - - # Write MLIR content to file - with open(mlir_filename, 'w') as f: - f.write(mlir_content) - - # Compile MLIR to VMFB - exec_args = [ - "iree-compile", - f"{mlir_filename}", - "--iree-hal-target-backends=rocm", - f"--iree-hip-target={target}", - "--iree-llvmgpu-enable-prefetch=true", - "-o", - f"{vmfb_filename}", - ] + extra_compiler_args - ret_value, stdout = run_iree_command(exec_args) - - vmfb_dict[vmfb_filename] = [tag, M, N, K, tA, tB, dtype] - if ret_value == 0: - return f"Successfully compiled {mlir_filename} to {vmfb_filename}" +from gemm_utils import * +from problems import get_gemm_configs + + +def compile_gemm(tag, config, kernel_dir, vmfb_dir, target, extra_compiler_args): + mlir_file, vmfb_file = compile_gemm_config(config, kernel_dir, vmfb_dir, target, extra_compiler_args) + return (tag, config, mlir_file, vmfb_file) + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Config file updater.") @@ -125,94 +53,80 @@ def compile_shape(tag, M, N, K, tA, tB, dtype, target, extra_compiler_args, vmfb roofline(args.roofline, args.plot, args.batch, args.dtype, args.model) sys.exit() - shapes = [] - print(f"Generated {len(shapes)} gemm shapes.") + configs = get_gemm_configs() + print(f"Generated {len(configs)} gemm configs.") num_cpus = max(1, cpu_count() - 20) print(f"Using {num_cpus} CPUs for parallel processing.") manager = Manager() vmfb_dict = manager.dict() - all(shapes) - shape_idx = 0 - for shape in shapes: - shape += (args.target, list(args.Xiree_compile), vmfb_dict,) - shapes[shape_idx] = shape - shape_idx += 1 + repo_root = Path(__file__).parent.parent + kernel_dir = repo_root / "gemm" / "mlir" + vmfb_dir = repo_root / "gemm" / "vmfb" + kernel_dir.mkdir(parents=True, exist_ok=True) + vmfb_dir.mkdir(parents=True, exist_ok=True) + target = args.target + extra_compiler_args = list(args.Xiree_compile) + + args = itertools.starmap( + lambda tag, config: (tag, config, kernel_dir, vmfb_dir, target, extra_compiler_args), configs + ) with Pool(num_cpus) as pool: - results = list(tqdm(pool.starmap(compile_shape, shapes))) + compilation_results = list(tqdm(pool.starmap(compile_gemm, list(args)))) error_count = 0 - for result in results: - if 'error' in result.lower(): - # print(result) + for tag, config, mlir_file, vmfb_file in compilation_results: + if vmfb_file: + vmfb_dict[vmfb_file] = (tag, config) + else: error_count += 1 - print(f'{len(shapes) - error_count} Success, {error_count} Failed out of {len(shapes)} shapes') + print( + f"{len(configs) - error_count} Success, {error_count} Failed out of {len(configs)} configs" + ) print("Compilation process completed.") - repo_root = Path(__file__).parent.parent - - vmfb_dir = repo_root / Path('gemm/vmfb') - results = [] index = 0 - output_csv = "results/iree_gemm.csv" + output_csv = "results/iree_gemm_new.csv" csv_dir = os.path.dirname(output_csv) if not os.path.exists(csv_dir): os.makedirs(csv_dir) - for vmfb_filename, input_list in vmfb_dict.items(): - tag = input_list[0] - vmfb_filename = vmfb_filename.split("/")[-1] - name = vmfb_filename.split(".")[0] - M = input_list[1] - N = input_list[2] - K = input_list[3] - tA = input_list[4] - tB = input_list[5] - dtype = input_list[6] - - if tA == "T": - inp1 = f"{K}x{M}x{dtype}" - inp2 = f"{K}x{N}x{dtype}" - elif tB == "T": - inp1 = f"{M}x{K}x{dtype}" - inp2 = f"{N}x{K}x{dtype}" - else: - inp1 = f"{M}x{K}x{dtype}" - inp2 = f"{K}x{N}x{dtype}" + for vmfb_filename, value in vmfb_dict.items(): + tag, config = value + name = config.get_name() + + inp1 = config.get_inp1() + inp2 = config.get_inp2() exec_args = [ "iree-benchmark-module", f"--device=hip", "--device_allocator=caching", - f"--module={vmfb_dir}/{vmfb_filename}", - "--function=main_0", + f"--module={vmfb_filename}", + "--function=main", f"--input={inp1}", f"--input={inp2}", "--benchmark_repetitions=3", ] - # iree benchmark command for full sdxl pipeline + # iree benchmark kernels ret_value, cmd_out = run_iree_command(exec_args) ok = ret_value == 0 benchmark_gemm_mean_time_ms = bench_summary_process(ret_value, cmd_out) benchmark_gemm_mean_time_us = benchmark_gemm_mean_time_ms * 1000 - if "bf" in dtype: - bytes_per_input = int(dtype[2:]) / 8 - else: - bytes_per_input = int(dtype[1:]) / 8 - flops = 2 * M * N * K - byte_count = bytes_per_input * (M * K + N * K + M * N) + flops = config.get_flops() + byte_count = config.get_byte_count() arithmetic_intensity = flops / byte_count tflops_per_second = (flops / 1e12) / (benchmark_gemm_mean_time_us / 1e6) results.append(( - index, tag, name, M, N, K, dtype, tA, tB, + index, tag, name, config.M, config.N, config.K, config.dtype, config.tA, config.tB, round(benchmark_gemm_mean_time_us, 4), round(arithmetic_intensity, 4), round(tflops_per_second, 4), diff --git a/gemmbench/gemm_utils.py b/gemmbench/gemm_utils.py new file mode 100644 index 0000000..551d0aa --- /dev/null +++ b/gemmbench/gemm_utils.py @@ -0,0 +1,143 @@ +from utils import * +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + + +@dataclass +class GemmConfig: + M: int + N: int + K: int + tA: str + tB: str + dtype: str + + def get_name(self) -> str: + name = f"gemm_{self.M}_{self.N}_{self.K}_{self.dtype}" + if self.tA == "T": + name += "_tA" + elif self.tB == "T": + name += "_tB" + return name + + def get_inp1(self) -> str: + if self.tA == "T": + inp1 = f"{self.K}x{self.M}x{self.dtype}" + else: + inp1 = f"{self.M}x{self.K}x{self.dtype}" + return inp1 + + def get_inp2(self) -> str: + if self.tB == "T": + inp2 = f"{self.N}x{self.K}x{self.dtype}" + else: + inp2 = f"{self.K}x{self.N}x{self.dtype}" + return inp2 + + def get_byte_count(self) -> int: + dtype_bits_map = { + "f32": 32, + "f16": 16, + "bf16": 16, + "f8E4M3FNUZ": 8, + "i8": 8, + "i32": 32, + } + bytes_per_element = dtype_bits_map[self.dtype] // 8 + element_count = self.M * self.K + self.N * self.K + self.M * self.N + byte_count = element_count * bytes_per_element + return byte_count + + def get_flops(self) -> int: + flops = 2 * self.M * self.N * self.K + return flops + +def generate_mlir(config: GemmConfig): + K = config.K + M = config.M + N = config.N + dtype = config.dtype + tA = config.tA + tB = config.tB + mlir_template_A = f""" +module {{ + func.func @main(%arg0: tensor<{K}x{M}x{dtype}>, %arg1: tensor<{K}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> {{ + %cst = arith.constant 0.000000e+00 : {dtype} + %0 = tensor.empty() : tensor<{M}x{N}x{dtype}> + %1 = linalg.fill ins(%cst : {dtype}) outs(%0 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> + %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<{K}x{M}x{dtype}>, tensor<{K}x{N}x{dtype}>) outs(%1 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> + return %2 : tensor<{M}x{N}x{dtype}> + }} +}} +""" + + mlir_template_B = f""" +module {{ + func.func @main(%arg0: tensor<{M}x{K}x{dtype}>, %arg1: tensor<{N}x{K}x{dtype}>) -> tensor<{M}x{N}x{dtype}> {{ + %cst = arith.constant 0.000000e+00 : {dtype} + %0 = tensor.empty() : tensor<{M}x{N}x{dtype}> + %1 = linalg.fill ins(%cst : {dtype}) outs(%0 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> + %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<{M}x{K}x{dtype}>, tensor<{N}x{K}x{dtype}>) outs(%1 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> + return %2 : tensor<{M}x{N}x{dtype}> + }} +}} +""" + + mlir_template = f"""module {{ + func.func @main(%arg0: tensor<{M}x{K}x{dtype}>, %arg1: tensor<{K}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> {{ + %cst = arith.constant 0.000000e+00 : {dtype} + %0 = tensor.empty() : tensor<{M}x{N}x{dtype}> + %1 = linalg.fill ins(%cst : {dtype}) outs(%0 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> + %2 = linalg.matmul ins(%arg0, %arg1 : tensor<{M}x{K}x{dtype}>, tensor<{K}x{N}x{dtype}>) outs(%1 : tensor<{M}x{N}x{dtype}>) -> tensor<{M}x{N}x{dtype}> + return %2 : tensor<{M}x{N}x{dtype}> + }} +}} +""" + if tA == "T": + return mlir_template_A + if tB == "T": + return mlir_template_B + return mlir_template + + +def compile_gemm_config( + config: GemmConfig, kernel_dir: Path, vmfb_dir: Path, target, extra_compiler_args +) -> tuple[Path, Optional[Path]]: + mlir_file = kernel_dir / (config.get_name() + ".mlir") + vmfb_file = vmfb_dir / (config.get_name() + ".vmfb") + + if not os.path.exists(vmfb_dir): + os.makedirs(vmfb_dir) + + # Generate mlir content + mlir_content = generate_mlir(config) + + # Write MLIR content to file + with open(mlir_file, "w") as f: + f.write(mlir_content) + + # Compile MLIR to VMFB + exec_args = [ + "iree-compile", + f"{mlir_file}", + "--iree-hal-target-backends=rocm", + f"--iree-hip-target={target}", + "--iree-llvmgpu-enable-prefetch=true", + "-o", + f"{vmfb_file}", + ] + extra_compiler_args + + print(" ".join(exec_args)) + + ret_value, stderr = run_iree_command(exec_args) + if ret_value != 0: + print(f"Successfully compiled {mlir_file} to {vmfb_file}") + else: + error_file = vmfb_dir / (config.get_name() + "_error.txt") + print(f"Failed to compile {mlir_file}. Error dumped in {error_file}") + with open(error_file, "w") as f: + f.write(stderr.decode("utf-8")) + return mlir_file, None + + return mlir_file, vmfb_file diff --git a/gemmbench/problems.py b/gemmbench/problems.py index 73847b8..8cdada4 100644 --- a/gemmbench/problems.py +++ b/gemmbench/problems.py @@ -4,6 +4,8 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +from gemm_utils import GemmConfig + def is_compute_bound(M, N, K, bpe): """Is this GEMM compute (or memory) bound?""" magic_ratio = 64 @@ -637,197 +639,232 @@ def is_compute_bound(M, N, K, bpe): (8192, 5120, 640), ] -def llama13bmatvec(configs): +def llama13bmatvec(dtype: str) -> list[GemmConfig]: + configs = [] """LLAMA 13b, single batch, FP16.""" for m, n, k, model, gcount in LLAMA: if n == 1 and model == "13b": - configs.append(( - "llama13bmatvec", + configs.append(GemmConfig( m, n, k, "T", "N", - "f16" + dtype )) + return configs -def llama13bmatvecbf16(configs): +def llama13bmatvecbf16(dtype: str) -> list[GemmConfig]: + configs = [] """LLAMA 13b, single batch, BF16.""" for m, n, k, model, gcount in LLAMA: if n == 1 and model == "13b": - configs.append(( - "llama13bmatvecbf16", + configs.append(GemmConfig( m, n, k, "T", "N", - "bf16" + dtype )) + return configs -def llama70bmatvec(configs): +def llama70bmatvec(dtype: str) -> list[GemmConfig]: """LLAMA 70b, single batch, FP16.""" + configs = [] for m, n, k, model, gcount in LLAMA: if n == 1 and model == "70b": - configs.append(( - "llama70bmatvec", + configs.append(GemmConfig( m, n, k, "T", "N", - "f16", + dtype )) + return configs -def llama70bmatvecbf16(configs): +def llama70bmatvecbf16(dtype: str) -> list[GemmConfig]: """LLAMA 70b, single batch, BF16.""" + configs = [] for m, n, k, model, gcount in LLAMA: if n == 1 and model == "70b": - configs.append(( - "llama70bmatvecbf16", + configs.append(GemmConfig( m, n, k, "T", "N", - "bf16", + dtype )) + return configs -def llama13bskinny(configs): +def llama13bskinny(dtype: str) -> list[GemmConfig]: """LLAMA 13b, multiple batches, FP16.""" + configs = [] for m, n, k, model, gcount in LLAMA: if n == 1 and model == "13b": for batch in [2, 4, 8, 16, 32]: - configs.append(( - "llama13bskinny", + configs.append(GemmConfig( m, batch, k, "T", "N", - "f16", + dtype )) + return configs -def llama13bskinnybf16(configs): +def llama13bskinnybf16(dtype: str) -> list[GemmConfig]: """LLAMA 13b, multiple batches, BF16.""" + configs = [] for m, n, k, model, gcount in LLAMA: if n == 1 and model == "13b": for batch in [2, 4, 8, 16, 32]: - configs.append(( - "llama13bskinnybf16", + configs.append(GemmConfig( m, batch, k, "T", "N", - "bf16", + dtype )) + return configs -def llama70bskinny(configs): +def llama70bskinny(dtype: str) -> list[GemmConfig]: """LLAMA 70b, multiple batches, FP16.""" + configs = [] for m, n, k, model, gcount in LLAMA: if n == 1 and model == "70b": for batch in [2, 4, 8, 16, 32]: - configs.append(( - "llama70bskinny", + configs.append(GemmConfig( m, batch, k, "T", "N", - "f16", + dtype )) + return configs -def llama70bskinnybf16(configs): +def llama70bskinnybf16(dtype: str) -> list[GemmConfig]: """LLAMA 70b, multiple batches, BF16.""" + configs = [] for m, n, k, model, gcount in LLAMA: if n == 1 and model == "70b": for batch in [2, 4, 8, 16, 32]: - configs.append(( - "llama70bskinnybf16", + configs.append(GemmConfig( m, batch, k, "T", "N", - "bf16", + dtype )) + return configs -def gpt4memory(configs): +def gpt4memory(dtype: str) -> list[GemmConfig]: """GPT4 memory bound GEMMs; FP16.""" + configs = [] for m, n, k in GPT4: - hgemm = ("gpt4memory", m, n, k, "N", "N", "f16") + hgemm = GemmConfig(m, n, k, "N", "N", dtype) if not is_compute_bound(m, n, k, 2): yield configs.append(hgemm) + return configs -def gpt4compute(configs): +def gpt4compute(dtype: str) -> list[GemmConfig]: """GPT4 compute bound GEMMs; FP16.""" + configs = [] for m, n, k in GPT4: - hgemm = ("gpt4compute", m, n, k, "N", "N", "f16") + hgemm = GemmConfig(m, n, k, "N", "N", dtype) if is_compute_bound(m, n, k, 2): configs.append(hgemm) + return configs -def gpt4clocktest(configs): +def gpt4clocktest(dtype: str) -> list[GemmConfig]: """GPT4 compute bound GEMMs; FP16.""" + configs = [] macM, macN = 128, 128 M, N, K = 2048, 2048, 8192 - for mult in range(1, M//macM + 1): - configs.append(("clocktest", mult * macM, mult * macN, K, "N", "N", "f16")) + configs.append(GemmConfig(mult * macM, mult * macN, K, "N", "N", dtype)) + return configs -def test(configs): +def test(dtype: str) -> list[GemmConfig]: """GPT4 compute bound GEMMs; FP16.""" #M, N, K = 2048, 2048, 8192 + configs = [] M, N, K = 128, 128, 8192 - configs.append(("test", M, N, K, "N", "N", "f16")) + configs.append(GemmConfig(M, N, K, "N", "N", dtype)) M, N, K = 2048, 2048, 8192 - configs.append(("test", M, N, K, "N", "N", "f16")) + configs.append(GemmConfig(M, N, K, "N", "N", dtype)) + return configs -def llama70bmemory(configs): +def llama70bmemory(dtype: str) -> list[GemmConfig]: """LLAMA 70b memory bound GEMMs; NT; BF16.""" - + configs = [] for n in [1280, 3584, 7168]: - configs.append(("llama70bmemory", 2, n, 8192, "N", "T", "bf16")) + configs.append(GemmConfig(2, n, 8192, "N", "T", dtype)) + return configs -def compute(configs): +def compute(dtype: str) -> list[GemmConfig]: """Compute bound GEMMs.""" #for dtype in ["fp16", "bf16", "fp8"]: - for dtype in ["f16", "bf16"]: + configs = [] + for dtype in [dtype]: for tA in ["N", "T"]: for tB in ["N", "T"]: - configs.append(("compute", 4096, 4096, 8192, tA, tB, dtype)) + if tA == "N" or tB == "N": + configs.append(GemmConfig(4096, 4096, 8192, tA, tB, dtype)) + return configs -def unet(configs): - for dtype in ["f16", "bf16"]: +def unet(dtype: str) -> list[GemmConfig]: + configs = [] + for dtype in [dtype]: for tA in ["N", "T"]: for tB in ["N", "T"]: for m, n, k in UNET: - configs.append(("unet", m, n, k, tA, tB, dtype)) - -def all(configs): - llama13bmatvec(configs) - llama13bmatvecbf16(configs) - llama70bmatvec(configs) - llama70bmatvecbf16(configs) - llama13bskinny(configs) - llama13bskinnybf16(configs) - llama70bskinny(configs) - llama70bskinnybf16(configs) - gpt4memory(configs) - gpt4compute(configs) - llama70bmemory(configs) - compute(configs) - unet(configs) + if tA == "N" or tB == "N": + configs.append(GemmConfig(m, n, k, tA, tB, dtype)) + return configs + +def get_gemm_configs() -> list[tuple[str, GemmConfig]]: + configs: list[tuple[str, GemmConfig]] = [] + llama13bmatvec_configs = llama13bmatvec("f16") + llama13bmatvec_configs += llama13bmatvecbf16("bf16") + llama70bmatvec_configs = llama70bmatvec("f16") + llama70bmatvec_configs += llama70bmatvecbf16("bf16") + llama13bskinny_configs = llama13bskinny("f16") + llama13bskinny_configs += llama13bskinnybf16("bf16") + llama70bskinny_configs = llama70bskinny("f16") + llama70bskinny_configs += llama70bskinnybf16("bf16") + gpt4compute_configs = gpt4compute("f16") + llama70bmemory_configs = llama70bmemory("bf16") + compute_configs = compute("f16") + compute_configs += compute("bf16") + unet_configs = unet("f16") + unet_configs += unet("bf16") + + configs += [("llama13bmatvec", x) for x in llama13bmatvec_configs] + configs += [("llama70bmatvec", x) for x in llama70bmatvec_configs] + configs += [("llama13bskinny", x) for x in llama13bskinny_configs] + configs += [("llama70bskinny", x) for x in llama70bskinny_configs] + configs += [("gpt4compute", x) for x in gpt4compute_configs] + configs += [("llama70bmemory", x) for x in llama70bmemory_configs] + configs += [("compute", x) for x in compute_configs] + configs += [("unet", x) for x in unet_configs] + + return configs