Skip to content

Introduce benchgc: a benchmark tool for correctness check #161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,15 @@ jobs:
- name: Test
run: |
cmake --build build --target gc-check

- name: Build and install benchgc
working-directory: build
run: |
ninja benchgc
pip uninstall -y benchgc || true
pip install test/benchgc/dist/benchgc-*.whl
- name: Correctness Test
env:
LD_PRELOAD: /lib/x86_64-linux-gnu/libomp5.so
run: |
scripts/correctness.sh
21 changes: 21 additions & 0 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,24 @@ jobs:
run: |
clang-format --version
find . -name *.cpp -or -name *.hpp | xargs clang-format --dry-run --Werror -style=file

python_format:
runs-on: ubuntu-latest
steps:
- name: checkout base version
uses: actions/checkout@v4
with:
fetch-depth: 100
ref: ${{ github.event.pull_request.base.sha }}

- name: checkout head version
uses: actions/checkout@v4
with:
fetch-depth: 100
ref: ${{ github.event.pull_request.head.sha }}

- name: install darker
run: "python3 -m pip install darker darker[isort] darker[flynt]"

- name: check python format
run: "python3 -m darker --check -i -f --diff -r `git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}`...HEAD ."
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
build/
externals/
compile_commands.json
__pycache__
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ option(GC_ENABLE_IMEX "Enable Intel® Extension for MLIR" OFF)
option(GC_ENABLE_BINDINGS_PYTHON "Enable Graph Complier Python Binding" ON)
option(GC_DEV_LINK_LLVM_DYLIB "Link dynamic libraries of LLVM and MLIR. For developers only. Do not use it in packing the library." OFF)
option(GC_ENABLE_RUNTIME_NAIVE_BRGEMM "Use naive BRGEMM as runtime backend for debug purpose." OFF)
option(GC_BENCH_ENABLE "Build benchgc." ON)

if(GC_ENABLE_LEGACY)
add_subdirectory(legacy/core)
Expand Down
106 changes: 106 additions & 0 deletions scripts/correctness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#! /bin/bash

export CASE_DIR=$(pwd)/test/benchgc/cases

FAIL=0
set -e

# bf16
python3 -m benchgc --verbose 0 --driver linalg --case matmul --md 0:32x128xbf16 --md 1:128x64xbf16 --md 2:32x64xbf16 --cast cast_signed || FAIL=1

# f32

# misc
python3 -m benchgc --verbose 0 --driver linalg --case fill --md 0:f32 --md 1:32x4096xf32 --cmp 1:P:0:0 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case copy --md 0:1024x1024xf32 --md 1:1024x1024xbf16 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case broadcast --md 0:1024xf32 --md 1:2x32x1024xf32 --dimensions=0 --dimensions=1 || FAIL=1

# matmul
python3 -m benchgc --verbose 0 --driver linalg --case batch_matmul --md 0:16x512x64xf32 --md 1:16x64x32xf32 --md 2:16x512x32xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case batch_matmul_transpose_a --md 0:16x512x64xf32 --md 1:16x512x32xf32 --md 2:16x64x32xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case batch_matmul_transpose_b --md 0:16x512x64xf32 --md 1:16x128x64xf32 --md 2:16x512x128xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case batch_matvec --md 0:16x512x64xf32 --md 1:16x64xf32 --md 2:16x512xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case batch_mmt4d --md 0:4x4x8x4x2xf32 --md 1:4x8x8x4x2xf32 --md 2:4x4x8x4x4xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case batch_reduce_matmul --md 0:16x512x64xf32 --md 1:16x64x32xf32 --md 2:512x32xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case batch_vecmat --md 0:16x64xf32 --md 1:16x64x512xf32 --md 2:16x512xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case dot --md 0:4096xf32 --md 1:4096xf32 --md 2:0xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case matmul --md 0:1024x512xf32 --md 1:512x512xf32 --md 2:1024x512xf32 --cast cast_signed || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case matmul_transpose_a --md 0:1024x512xf32 --md 1:1024x512xf32 --md 2:512x512xf32 --cast cast_signed || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case matmul_transpose_b --md 0:1024x512xf32 --md 1:1024x512xf32 --md 2:1024x1024xf32 --cast cast_signed || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case matvec --md 0:512x64xf32 --md 1:64xf32 --md 2:512xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case mmt4d --md 0:4x8x4x2xf32 --md 1:8x8x4x2xf32 --md 2:4x8x4x4xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case vecmat --md 0:512xf32 --md 1:512x64xf32 --md 2:64xf32 || FAIL=1

# binary
python3 -m benchgc --verbose 0 --driver linalg --case add --md 0:1x32x4096xf32 --md 1:1x32x4096xf32 --md 2:1x32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case sub --md 0:1x32x4096xf32 --md 1:1x32x4096xf32 --md 2:1x32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case mul --md 0:1x32x4096xf32 --md 1:1x32x4096xf32 --md 2:1x32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case div --md 0:1x32x4096xf32 --md 1:1x32x4096xf32 --md 2:1x32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case max --md 0:1024x1024xf32 --md 1:1024x1024xf32 --md 2:1024x1024xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case min --md 0:1024x1024xf32 --md 1:1024x1024xf32 --md 2:1024x1024xf32 || FAIL=1

# element wise
python3 -m benchgc --verbose 0 --driver linalg --case abs --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case ceil --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case erf --md 0:1024x512xf32 --md 1:1024x512xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case floor --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case log --md 0:4096x32xf32 --md 1:4096x32xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case negf --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case exp --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case round --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
# python3 -m benchgc --verbose 0 --driver linalg --case rsqrt --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case sqrt --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case square --md 0:32x4096xf32 --md 1:32x4096xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case tanh --md 0:128x128xf32 --md 1:128x128xf32 || FAIL=1

# conv
python3 -m benchgc --verbose 0 --driver linalg --case conv_1d_ncw_fcw --md 0:4x4x32xf32 --md 1:8x4x4xf32 --md 2:4x8x13xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_1d_nwc_wcf --md 0:4x32x4xf32 --md 1:4x4x8xf32 --md 2:4x13x8xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_1d --md 0:32xf32 --md 1:4xf32 --md 2:29xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_2d_nchw_fchw --md 0:4x4x32x32xf32 --md 1:8x4x4x4xf32 --md 2:4x8x13x13xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_2d_ngchw_fgchw --md 0:4x2x2x32x32xf32 --md 1:4x2x2x4x4xf32 --md 2:4x2x4x13x13xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_2d_ngchw_gfchw --md 0:4x2x2x32x32xf32 --md 1:2x4x2x4x4xf32 --md 2:4x2x4x13x13xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_2d_nhwc_fhwc --md 0:4x32x32x4xf32 --md 1:8x4x4x4xf32 --md 2:4x13x13x8xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_2d_nhwc_hwcf --md 0:4x32x32x4xf32 --md 1:4x4x4x8xf32 --md 2:4x13x13x8xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_2d --md 0:32x32xf32 --md 1:4x4xf32 --md 2:29x29xf32 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_3d_ncdhw_fcdhw --md 0:4x4x32x32x32xf32 --md 1:8x4x4x4x4xf32 --md 2:4x8x13x13x13xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_3d_ndhwc_dhwcf --md 0:4x32x32x32x4xf32 --md 1:4x4x4x4x8xf32 --md 2:4x13x13x13x8xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case conv_3d --md 0:32x32x32xf32 --md 1:4x4x4xf32 --md 2:29x29x29xf32 || FAIL=1

# depthwise conv
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_1d_ncw_cw --md 0:4x4x32xf32 --md 1:4x4xf32 --md 2:4x4x13xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_1d_nwc_wc --md 0:4x32x4xf32 --md 1:4x4xf32 --md 2:4x13x4xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_1d_nwc_wcm --md 0:4x32x4xf32 --md 1:4x4x3xf32 --md 2:4x13x4x3xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_2d_nchw_chw --md 0:4x4x32x32xf32 --md 1:4x4x4xf32 --md 2:4x4x13x13xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_2d_nhwc_hwc --md 0:4x32x32x4xf32 --md 1:4x4x4xf32 --md 2:4x13x13x4xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_2d_nhwc_hwcm --md 0:4x32x32x4xf32 --md 1:4x4x4x3xf32 --md 2:4x13x13x4x3xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_3d_ncdhw_cdhw --md 0:4x4x32x32x32xf32 --md 1:4x4x4x4xf32 --md 2:4x4x13x13x13xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_3d_ndhwc_dhwc --md 0:4x32x32x32x4xf32 --md 1:4x4x4x4xf32 --md 2:4x13x13x13x4xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case depthwise_conv_3d_ndhwc_dhwcm --md 0:4x32x32x32x4xf32 --md 1:4x4x4x4x3xf32 --md 2:4x13x13x13x4x3xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1

# pool
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nchw_max --md 0:4x4x32x32xf32 --md 1:4x4xf32 --md 2:4x4x13x13xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nchw_sum --md 0:4x4x32x32xf32 --md 1:4x4xf32 --md 2:4x4x13x13xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_ncw_max --md 0:4x4x32xf32 --md 1:4xf32 --md 2:4x4x13xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_ncw_sum --md 0:4x4x32xf32 --md 1:4xf32 --md 2:4x4x13xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_ndhwc_max --md 0:4x32x32x32x4xf32 --md 1:4x4x4xf32 --md 2:4x13x13x13x4xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_ndhwc_sum --md 0:4x32x32x32x4xf32 --md 1:4x4x4xf32 --md 2:4x13x13x13x4xf32 --strides 2 --strides 2 --strides 2 --dilations 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nhwc_max --md 0:4x32x32x4xf32 --md 1:4x4xf32 --md 2:4x13x13x4xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nhwc_sum --md 0:4x32x32x4xf32 --md 1:4x4xf32 --md 2:4x13x13x4xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nhwc_min --md 0:4x32x32x4xf32 --md 1:4x4xf32 --md 2:4x13x13x4xf32 --strides 2 --strides 2 --dilations 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nwc_max --md 0:4x32x4xf32 --md 1:4xf32 --md 2:4x13x4xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nwc_sum --md 0:4x32x4xf32 --md 1:4xf32 --md 2:4x13x4xf32 --strides 2 --dilations 2 || FAIL=1
python3 -m benchgc --verbose 0 --driver linalg --case pooling_nwc_min --md 0:4x32x4xf32 --md 1:4xf32 --md 2:4x13x4xf32 --strides 2 --dilations 2 || FAIL=1

# generic / reduce
python3 -m benchgc --verbose 0 --driver mlir --case ${CASE_DIR}/generic.mlir || FAIL=1
python3 -m benchgc --verbose 0 --driver mlir --case ${CASE_DIR}/reduce.mlir || FAIL=1

# softmax
# python3 -m benchgc --verbose 0 --driver linalg --case softmax --md 0:32x4096xf32 --md 1:32x4096xf32 --dimension 1 || FAIL=1

# mlir
# python3 -m benchgc --verbose 0 --driver mlir --case ${CASE_DIR}/llama2.mlir || FAIL=1

set +e
exit $FAIL
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ endif ()
include(gtest)
add_subdirectory(dnnl)
add_subdirectory(mlir)

if(GC_BENCH_ENABLE)
add_subdirectory(benchgc)
endif()
5 changes: 5 additions & 0 deletions test/benchgc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
dist/
src/benchgc.egg-info/
build
benchgc.egg-info/
__pycache__
41 changes: 41 additions & 0 deletions test/benchgc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################

if(NOT GC_BENCH_ENABLE)
message(STATUS "Benchgc is not enabled")
return()
endif()

configure_file(setup.py ${CMAKE_BINARY_DIR}/test/benchgc/setup.py COPYONLY)

file(GLOB PYTHON_SCRIPTS "*.py")
foreach(PY_SCRIPT ${PYTHON_SCRIPTS})
configure_file(${PY_SCRIPT} ${CMAKE_BINARY_DIR} COPYONLY)
endforeach()

find_package(Python 3.10 REQUIRED COMPONENTS Interpreter NumPy REQUIRED)
add_custom_target(benchgc

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the installation of benchgc enabled as a default target of cmake? I think it should only be triggered when the user requests to.

Copy link
Contributor Author

@WangJialei-A WangJialei-A Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Menooker
I'm not quite sure about this. The configuration -DGC_BENCH_ENABLE is default ON
ninja will not build benchgc as default.
I'm curious about how to control it in cmake

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If running ninja will not build benchgc by default, that is what I wanted. We are good here. :)

COMMAND ${Python_EXECUTABLE} setup.py bdist_wheel
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/test/benchgc/"
DEPENDS GcPythonModules)

add_subdirectory("src/benchgc")
add_subdirectory("src/benchgc/arg")
add_subdirectory("src/benchgc/mlir")
add_subdirectory("src/benchgc/linalg")
add_subdirectory("src/benchgc/tensor")
add_subdirectory("src/benchgc/arith")
Loading