diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index e7dcb810e..17a122008 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -49,7 +49,10 @@ jobs: git remote add origin $REPO_ADDRESS git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME git reset --hard FETCH_HEAD - git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 + git -c submodule."third_party/torch-mlir".update=none \ + -c submodule."third_party/stablehlo".update=none \ + -c submodule."src/runtime_src/core/common/aiebu".update=none \ + submodule update --init --recursive --depth 1 --single-branch -j 10 - name: Install deps run: | @@ -61,6 +64,11 @@ jobs: run: | pip install "numpy<2" pyyaml "pybind11[global]==2.10.3" nanobind pytest + - name: Peano dep + run: | + bash build_tools/download_peano.sh + echo "PEANO_INSTALL_DIR=$PWD/llvm-aie" >> $GITHUB_ENV + - name: Run Pytest run: | pytest build_tools/ci diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index 2fcc702e7..c1b696d0a 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -60,7 +60,10 @@ jobs: git remote add origin $REPO_ADDRESS git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME git reset --hard FETCH_HEAD - git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 + git -c submodule."third_party/torch-mlir".update=none \ + -c submodule."third_party/stablehlo".update=none \ + -c submodule."src/runtime_src/core/common/aiebu".update=none \ + submodule update --init --recursive --depth 1 --single-branch -j 10 - name: Setup Cpp uses: aminya/setup-cpp@v1 @@ -86,6 +89,11 @@ jobs: key: ${{ env.CACHE_KEY }} restore-keys: windows-build-test-cpp- + - name: Peano dep + run: | + .\build_tools\download_peano.ps1 + Add-Content -Path $env:GITHUB_ENV -Value "PEANO_INSTALL_DIR=$PWD\llvm-aie" + - name: Build packages run: | $env:cache_dir = "${{ env.CACHE_DIR }}" diff --git a/build_tools/build_test_cpp.ps1 b/build_tools/build_test_cpp.ps1 index f7c3b7db2..4670dd9d5 100644 --- a/build_tools/build_test_cpp.ps1 +++ b/build_tools/build_test_cpp.ps1 @@ -80,10 +80,18 @@ $CMAKE_ARGS = @( "-DIREE_TARGET_BACKEND_DEFAULTS=OFF" "-DIREE_TARGET_BACKEND_LLVM_CPU=ON" "-DIREE_CMAKE_PLUGIN_PATHS=$repo_root" - "-DIREE_EXTERNAL_HAL_DRIVERS=xrt" + "-DIREE_EXTERNAL_HAL_DRIVERS=xrt;xrt-lite" "-DIREE_BUILD_PYTHON_BINDINGS=ON" + # iree/runtime/src/iree/hal/cts/cts_test_base.h:173:24: error: unused variable 'device_buffer' [-Werror,-Wunused-variable] + "-DIREE_ENABLE_WERROR_FLAG=OFF" ) +$peano_install_dir = "$env:PEANO_INSTALL_DIR" +if ($peano_install_dir -and (Test-Path "$peano_install_dir")) +{ + $CMAKE_ARGS += @("-DPEANO_INSTALL_DIR=$peano_install_dir") +} + if ($llvm_install_dir -and (Test-Path "$llvm_install_dir")) { echo "using existing llvm install @ $llvm_install_dir" @@ -121,7 +129,7 @@ echo "-----" # better have git-bash installed... $env:Path = "C:\Program Files\Git\bin;$env:Path" pushd $build_dir -& bash -l -c "ctest -R amd-aie --output-on-failure -j --repeat until-pass:5" +& bash -l -c "ctest -R amd-aie -E driver/xrt-lite --output-on-failure -j --repeat until-pass:5" popd if ($llvm_install_dir -and (Test-Path "$llvm_install_dir")) diff --git a/build_tools/build_test_cpp.sh b/build_tools/build_test_cpp.sh index d04bf8bb4..857c81f44 100644 --- a/build_tools/build_test_cpp.sh +++ b/build_tools/build_test_cpp.sh @@ -31,7 +31,7 @@ mkdir -p "${cache_dir}/pip" python="$(which python)" echo "Using python: $python" -if [[ "$OSTYPE" == "linux-gnu"* ]]; then +if [[ "$OSTYPE" == "linux"* ]]; then export CMAKE_TOOLCHAIN_FILE="$this_dir/linux_default_toolchain.cmake" export CC=clang export CXX=clang++ @@ -61,54 +61,73 @@ echo '{ }' > $iree_dir/CMakeUserPresets.json cd $iree_dir -CMAKE_ARGS="\ - -GNinja \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=$install_dir \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DIREE_ERROR_ON_MISSING_SUBMODULES=OFF \ - -DIREE_ENABLE_ASSERTIONS=ON \ - -DIREE_BUILD_SAMPLES=OFF \ - -DIREE_BUILD_PYTHON_BINDINGS=ON \ - -DIREE_BUILD_BINDINGS_TFLITE=OFF \ - -DIREE_HAL_DRIVER_DEFAULTS=OFF \ - -DIREE_HAL_DRIVER_LOCAL_SYNC=ON \ - -DIREE_HAL_DRIVER_LOCAL_TASK=ON \ - -DIREE_TARGET_BACKEND_DEFAULTS=OFF \ - -DIREE_TARGET_BACKEND_LLVM_CPU=ON \ - -DIREE_INPUT_TOSA=OFF \ - -DIREE_INPUT_STABLEHLO=OFF \ - -DIREE_INPUT_TORCH=OFF \ - -DCMAKE_OBJECT_PATH_MAX=4096 \ - -DIREE_CMAKE_PLUGIN_PATHS=$repo_root" - -if [ -d "${llvm_install_dir}" ]; then - CMAKE_ARGS="$CMAKE_ARGS \ - -DIREE_BUILD_BUNDLED_LLVM=OFF \ - -DClang_DIR=$llvm_install_dir/lib/cmake/clang \ - -DLLD_DIR=$llvm_install_dir/lib/cmake/lld \ - -DMLIR_DIR=$llvm_install_dir/lib/cmake/mlir \ - -DLLVM_DIR=$llvm_install_dir/lib/cmake/llvm" +CMAKE_ARGS=( + -GNinja + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX="$install_dir" + -DCMAKE_INSTALL_LIBDIR=lib + -DIREE_ERROR_ON_MISSING_SUBMODULES=OFF + -DIREE_ENABLE_ASSERTIONS=ON + -DIREE_BUILD_SAMPLES=OFF + -DIREE_BUILD_PYTHON_BINDINGS=ON + -DIREE_BUILD_BINDINGS_TFLITE=OFF + -DIREE_HAL_DRIVER_DEFAULTS=OFF + -DIREE_HAL_DRIVER_LOCAL_SYNC=ON + -DIREE_HAL_DRIVER_LOCAL_TASK=ON + -DIREE_TARGET_BACKEND_DEFAULTS=OFF + -DIREE_TARGET_BACKEND_LLVM_CPU=ON + -DIREE_INPUT_TOSA=OFF + -DIREE_INPUT_STABLEHLO=OFF + -DIREE_INPUT_TORCH=OFF + -DCMAKE_OBJECT_PATH_MAX=4096 + -DIREE_CMAKE_PLUGIN_PATHS="$repo_root" + # iree/runtime/src/iree/hal/cts/cts_test_base.h:173:24: error: unused variable 'device_buffer' [-Werror,-Wunused-variable] + -DIREE_ENABLE_WERROR_FLAG=OFF +) + +PEANO_INSTALL_DIR=${PEANO_INSTALL_DIR:-""} +if [ "$PEANO_INSTALL_DIR" != "" ] && [ -d "$PEANO_INSTALL_DIR" ]; then + CMAKE_ARGS+=(-DPEANO_INSTALL_DIR="$PEANO_INSTALL_DIR") fi -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - cmake $CMAKE_ARGS \ - -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" \ - -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" \ - -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" \ - -DCMAKE_C_COMPILER="${CC}" \ - -DCMAKE_CXX_COMPILER="${CXX}" \ - -DLLVM_TARGET_ARCH=X86 \ - -DLLVM_TARGETS_TO_BUILD=X86 \ - -DIREE_EXTERNAL_HAL_DRIVERS=xrt \ - -S $iree_dir -B $build_dir +if [ -d "$llvm_install_dir" ]; then + CMAKE_ARGS+=( + -DIREE_BUILD_BUNDLED_LLVM=OFF + -DClang_DIR="$llvm_install_dir/lib/cmake/clang" + -DLLD_DIR="$llvm_install_dir/lib/cmake/lld" + -DMLIR_DIR="$llvm_install_dir/lib/cmake/mlir" + -DLLVM_DIR="$llvm_install_dir/lib/cmake/llvm" + ) +fi + +if [[ "$OSTYPE" == "linux"* ]]; then + CMAKE_ARGS+=( + -DCMAKE_EXE_LINKER_FLAGS_INIT="-fuse-ld=lld" + -DCMAKE_SHARED_LINKER_FLAGS_INIT="-fuse-ld=lld" + -DCMAKE_MODULE_LINKER_FLAGS_INIT="-fuse-ld=lld" + -DCMAKE_C_COMPILER="${CC}" + -DCMAKE_CXX_COMPILER="${CXX}" + -DLLVM_TARGET_ARCH=X86 + -DLLVM_TARGETS_TO_BUILD=X86 + -DIREE_EXTERNAL_HAL_DRIVERS="xrt;xrt-lite" + -S + "$iree_dir" + -B + "$build_dir" + ) elif [[ "$OSTYPE" == "darwin"* ]]; then - cmake $CMAKE_ARGS \ - -DLLVM_TARGET_ARCH="X86;ARM" \ - -DLLVM_TARGETS_TO_BUILD="X86;ARM" \ - -S $iree_dir -B $build_dir + CMAKE_ARGS+=( + -DLLVM_TARGET_ARCH="X86;ARM" + -DLLVM_TARGETS_TO_BUILD="X86;ARM" + -S + "$iree_dir" + -B + "$build_dir" + ) fi +cmake "${CMAKE_ARGS[@]}" + echo "Building all" echo "------------" cmake --build "$build_dir" -- -k 0 @@ -123,8 +142,8 @@ cmake --build "$build_dir" --target iree-install-dist echo "CTest" echo "-----" -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j +if [[ "$OSTYPE" == "linux"* ]]; then + ctest --test-dir "$build_dir" -R amd-aie -E "driver/xrt-lite" --output-on-failure -j elif [[ "$OSTYPE" == "darwin"* ]]; then ctest --test-dir "$build_dir" -R amd-aie -E "matmul_pack_peel_air_e2e|matmul_elementwise_pack_peel_air_e2e|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5 fi diff --git a/build_tools/download_peano.ps1 b/build_tools/download_peano.ps1 index 89bd6808f..6589cc562 100644 --- a/build_tools/download_peano.ps1 +++ b/build_tools/download_peano.ps1 @@ -9,4 +9,7 @@ $ErrorActionPreference = 'Stop' $this_dir = Split-Path -Path $MyInvocation.MyCommand.Path -Parent $RELEASE = (Get-Content -Path "$this_dir/peano_commit.txt") pip download llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly -Expand-Archive (Get-ChildItem -Filter llvm*.whl).FullName -DestinationPath $PWD.Path +$peano = (Get-ChildItem -Filter llvm*.whl) +$new_name = ($peano.Basename + ".zip") +Rename-Item -Path $peano.Name -NewName $new_name +Expand-Archive $new_name -DestinationPath $PWD.Path diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h index e7f52afc3..d4d18d6b3 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h @@ -132,11 +132,11 @@ struct amdxdna_cu_config { * @pad: MBZ * @cu_configs: Array of CU configurations of struct amdxdna_cu_config */ -struct amdxdna_hwctx_param_config_cu { - __u16 num_cus; - __u16 pad[3]; - struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); -}; +// struct amdxdna_hwctx_param_config_cu { +// __u16 num_cus; +// __u16 pad[3]; +// struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); +// }; enum amdxdna_drm_config_hwctx_param { DRM_AMDXDNA_HWCTX_CONFIG_CU, diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.cpp index 3076a386a..739456ccf 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.cpp @@ -9,6 +9,7 @@ #include #include +#include "pcidev.h" #include "shim_debug.h" namespace { @@ -16,7 +17,7 @@ namespace { uint32_t alloc_drm_bo(const shim_xdna::pdev& dev, amdxdna_bo_type type, void* buf, size_t size) { amdxdna_drm_create_bo cbo = { - .type = type, + .type = static_cast(type), .vaddr = reinterpret_cast(buf), .size = size, }; @@ -38,7 +39,7 @@ void get_drm_bo_info(const shim_xdna::pdev& dev, uint32_t boh, void* map_parent_range(size_t size) { auto p = ::mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (!p) shim_err(errno, "mmap(len=%ld) failed", size); + if (!p) shim_xdna::shim_err(errno, "mmap(len=%ld) failed", size); return p; } @@ -101,7 +102,7 @@ bool is_power_of_two(size_t x) { return (x > 0) && ((x & (x - 1)) == 0); } void* addr_align(void* p, size_t align) { if (!is_power_of_two(align)) - shim_err(EINVAL, "Alignment 0x%lx is not power of two", align); + shim_xdna::shim_err(EINVAL, "Alignment 0x%lx is not power of two", align); return (void*)(((uintptr_t)p + align) & ~(align - 1)); } @@ -129,7 +130,8 @@ inline void clflush_data(const void* base, size_t offset, size_t len) { if (!cacheline_size) { long sz = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); - if (sz <= 0) shim_err(EINVAL, "Invalid cache line size: %ld", sz); + if (sz <= 0) + shim_xdna::shim_err(EINVAL, "Invalid cache line size: %ld", sz); cacheline_size = sz; } @@ -313,8 +315,7 @@ std::unique_ptr bo::share() const { amdxdna_bo_type bo::get_type() const { return m_type; } -bo::bo(const device& device, hw_ctx::slot_id ctx_id, size_t size, - uint64_t flags) +bo::bo(const device& device, uint32_t ctx_id, size_t size, uint64_t flags) : bo(device, ctx_id, size, flags, flag_to_type(flags)) { if (m_type == AMDXDNA_BO_INVALID) shim_err(EINVAL, "Invalid BO flags: 0x%lx", flags); @@ -323,8 +324,8 @@ bo::bo(const device& device, hw_ctx::slot_id ctx_id, size_t size, bo::bo(const device& device, size_t size, amdxdna_bo_type type) : bo(device, AMDXDNA_INVALID_CTX_HANDLE, size, 0, type) {} -bo::bo(const device& device, hw_ctx::slot_id ctx_id, size_t size, - uint64_t flags, amdxdna_bo_type type) +bo::bo(const device& device, uint32_t ctx_id, size_t size, uint64_t flags, + amdxdna_bo_type type) : m_pdev(device.get_pdev()), m_aligned_size(size), m_flags(flags), diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.h index 2f513ae8d..ea163db45 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/bo.h @@ -5,12 +5,11 @@ #define _BO_XDNA_H_ #include +#include +#include #include #include "amdxdna_accel.h" -#include "device.h" -#include "hwctx.h" -#include "pcidev.h" #include "shared.h" #include "shim_debug.h" @@ -62,6 +61,9 @@ struct xcl_bo_flags { }; }; +struct device; +struct pdev; + struct bo { // map_type - determines how a buffer is mapped enum class map_type { read, write }; @@ -87,10 +89,11 @@ struct bo { uint64_t kmhdl; // kernel mode handle }; - bo(const device& device, hw_ctx::slot_id ctx_id, size_t size, uint64_t flags, + using uint32_t = uint32_t; + bo(const device& device, uint32_t ctx_id, size_t size, uint64_t flags, amdxdna_bo_type type); - bo(const device& device, hw_ctx::slot_id ctx_id, size_t size, uint64_t flags); + bo(const device& device, uint32_t ctx_id, size_t size, uint64_t flags); bo(const device& device, shared_handle::export_handle ehdl); @@ -171,7 +174,7 @@ struct bo { // Used when exclusively assigned to a HW context. By default, BO is shared // among all HW contexts. - hw_ctx::slot_id m_owner_ctx_id = AMDXDNA_INVALID_CTX_HANDLE; + uint32_t m_owner_ctx_id = AMDXDNA_INVALID_CTX_HANDLE; void bind_at(size_t pos, const bo* bh, size_t offset, size_t size); diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp index 20ec767ea..69676385c 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp @@ -3,18 +3,15 @@ #include "device.h" -#include -#include - -#include #include #include "bo.h" #include "hwctx.h" +#include "pcidev.h" namespace shim_xdna { -device::device(const pdev& pdev, handle_type shim_handle) +device::device(const pdev& pdev, void* shim_handle) : m_pdev(pdev), m_handle(shim_handle) { shim_debug("Created KMQ device (%s) ...", get_pdev().m_sysfs_name.c_str()); } @@ -24,16 +21,16 @@ device::~device() { m_pdev.close(); } -std::unique_ptr device::create_hw_context( - const device& dev, const hw_ctx::qos_type& qos) const { - return std::make_unique(dev, qos); -} +// std::unique_ptr device::create_hw_context( +// const device& dev, const hw_ctx::qos_type& qos) const { +// return std::make_unique(dev, qos); +// } -std::unique_ptr device::alloc_bo(void* userptr, hw_ctx::slot_id ctx_id, +std::unique_ptr device::alloc_bo(void* userptr, uint32_t ctx_id, size_t size, uint64_t flags) { if (userptr) shim_not_supported_err("User ptr BO"); - auto b = bo(this->m_pdev, ctx_id, size, flags); + auto b = bo(*this, ctx_id, size, flags); return std::make_unique(*this, ctx_id, size, flags); } diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h index 8db0f2227..ca4f295f8 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.h @@ -5,13 +5,13 @@ #define PCIE_DEVICE_LINUX_XDNA_H #include +#include -#include "pcidev.h" +#include "shared.h" #include "shim_debug.h" namespace shim_xdna { -typedef void* xclDeviceHandle; #define XRT_NULL_HANDLE NULL // cuidx_type - encode cuidx and domain @@ -36,21 +36,22 @@ struct cuidx_type { using domain_index_type = uint16_t; }; -struct device { - // device index type - using id_type = unsigned int; - using slot_id = uint32_t; - using handle_type = xclDeviceHandle; +struct hw_ctx; +struct pdev; +struct bo; - device(const pdev& pdev, handle_type shim_handle); +struct device { + device(const pdev& pdev, void* shim_handle); ~device(); - std::unique_ptr alloc_bo(void* userptr, hw_ctx::slot_id ctx_id, - size_t size, uint64_t flags); + using qos_type = std::map; + enum class access_mode : uint8_t { exclusive = 0, shared = 1 }; + std::unique_ptr alloc_bo(void* userptr, uint32_t ctx_id, size_t size, + uint64_t flags); - std::unique_ptr create_hw_context(const device& dev, - const hw_ctx::qos_type& qos) const; + // std::unique_ptr create_hw_context(const device& dev, + // const qos_type& qos) const; std::unique_ptr import_bo(shared_handle::export_handle ehdl) const; @@ -60,8 +61,8 @@ struct device { std::unique_ptr import_bo(pid_t, shared_handle::export_handle); - std::unique_ptr create_hw_context(const hw_ctx::qos_type& qos, - hw_ctx::access_mode mode) const; + std::unique_ptr create_hw_context(const qos_type& qos, + access_mode mode) const; std::vector read_aie_mem(uint16_t col, uint16_t row, uint32_t offset, uint32_t size); @@ -76,7 +77,7 @@ struct device { const pdev& m_pdev; // The pcidev that this device object is derived from std::map m_bo_map; - xclDeviceHandle m_handle = XRT_NULL_HANDLE; + void* m_handle = XRT_NULL_HANDLE; mutable std::mutex m_mutex; }; diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/ert.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/ert.h index ac5858db4..bce5d1623 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/ert.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/ert.h @@ -447,7 +447,7 @@ struct config_sk_image_uuid { uint32_t num_cus; uint32_t sk_name[5]; unsigned char sk_uuid[16]; - uint32_t slot_id; + uint32_t uint32_t; }; /** diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.cpp index 4af239eb1..850e4198a 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.cpp @@ -6,6 +6,9 @@ #include #include "amdxdna_accel.h" +#include "fence.h" +#include "hwctx.h" +#include "pcidev.h" namespace { @@ -138,15 +141,6 @@ fence::fence(const device& device, shared_handle::export_handle ehdl) shim_debug("Fence imported: %d@%ld", m_syncobj_hdl, m_state); } -fence::fence(const fence& f) - : m_pdev(f.m_pdev), - m_import(f.share()), - m_syncobj_hdl(import_syncobj(m_pdev, m_import->get_export_handle())), - m_state{f.m_state}, - m_signaled{f.m_signaled} { - shim_debug("Fence cloned: %d@%ld", m_syncobj_hdl, m_state); -} - fence::~fence() { shim_debug("Fence going away: %d@%ld", m_syncobj_hdl, m_state); destroy_syncobj(m_pdev, m_syncobj_hdl); @@ -161,10 +155,6 @@ std::unique_ptr fence::share() const { uint64_t fence::get_next_state() const { return m_state + 1; } -std::unique_ptr fence::clone() const { - return std::make_unique(*this); -} - uint64_t fence::wait_next_state() const { std::lock_guard guard(m_lock); diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.h index 2b9067c1c..a5acd4c1d 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/fence.h @@ -6,13 +6,15 @@ #include -#include "device.h" -#include "hwctx.h" #include "shared.h" #include "shim_debug.h" namespace shim_xdna { +struct device; +struct hw_ctx; +struct pdev; + struct fence { using export_handle = shared_handle::export_handle; enum class access_mode : uint8_t { local, shared, process, hybrid }; @@ -23,8 +25,6 @@ struct fence { ~fence(); - std::unique_ptr clone() const; - std::unique_ptr share() const; void wait(uint32_t timeout_ms) const; diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp index a10abf3ae..23dd3b728 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.cpp @@ -4,30 +4,16 @@ #include "hwctx.h" #include "bo.h" +#include "device.h" #include "hwq.h" +#include "pcidev.h" -namespace { - -std::vector get_pdi(const xrt_core::xclbin::aie_partition_obj& aie, - uint16_t kernel_id) { - for (auto& pdi : aie.pdis) { - for (auto& cdo : pdi.cdo_groups) { - for (auto kid : cdo.kernel_ids) { - if (kid == kernel_id) return pdi.pdi; - } - } - } - shim_err(ENOENT, "PDI for kernel ID 0x%x not found", kernel_id); -} - -} // namespace namespace shim_xdna { hw_ctx::hw_ctx(const device& dev, const qos_type& qos, std::unique_ptr q) : m_device(dev), m_q(std::move(q)), m_doorbell(0), m_log_buf(nullptr) { shim_debug("Creating HW context..."); init_qos_info(qos); - parse_xclbin(xclbin); } hw_ctx::~hw_ctx() { @@ -35,9 +21,9 @@ hw_ctx::~hw_ctx() { shim_debug("Destroyed HW context (%d)...", m_handle); } -hw_ctx::slot_id hw_ctx::get_slotidx() const { return m_handle; } +uint32_t hw_ctx::get_slotidx() const { return m_handle; } -void hw_ctx::set_slotidx(slot_id id) { m_handle = id; } +void hw_ctx::set_slotidx(uint32_t id) { m_handle = id; } cuidx_type hw_ctx::open_cu_context(const std::string& cu_name) { for (uint32_t i = 0; i < m_cu_info.size(); i++) { @@ -82,20 +68,6 @@ void hw_ctx::init_qos_info(const qos_type& qos) { } } -void hw_ctx::print_xclbin_info() { - if (m_cu_info.empty()) { - shim_debug("CU INFO is empty"); - return; - } - - for (int idx = 0; idx < m_cu_info.size(); idx++) { - auto& e = m_cu_info[idx]; - shim_debug("index=%d, name=%s, func=%d, pdi(p=%p, sz=%ld)", idx, - e.m_name.c_str(), e.m_func, e.m_pdi.data(), e.m_pdi.size()); - } - shim_debug("OPs/cycle: %d", m_ops_per_cycle); -} - const device& hw_ctx::get_device() { return m_device; } const std::vector& hw_ctx::get_cu_info() const { @@ -107,10 +79,10 @@ void hw_ctx::create_ctx_on_device() { arg.qos_p = reinterpret_cast(&m_qos); arg.umq_bo = m_q->get_queue_bo(); arg.max_opc = m_ops_per_cycle; - arg.num_tiles = - m_num_cols * - xrt_core::device_query(&m_device) - .core_rows; + // arg.num_tiles = + // m_num_cols * + // xrt_core::device_query(&m_device) + // .core_rows; arg.log_buf_bo = m_log_bo ? static_cast(m_log_bo.get())->get_drm_bo_handle() : AMDXDNA_INVALID_BO_HANDLE; diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h index 446d64f58..9fec8cc6f 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwctx.h @@ -7,18 +7,19 @@ #include #include "amdxdna_accel.h" -#include "device.h" +#include "shared.h" #include "shim_debug.h" namespace shim_xdna { struct hw_q; // forward declaration +struct device; +struct bo; +struct cuidx_type; struct hw_ctx { using qos_type = std::map; enum class access_mode : uint8_t { exclusive = 0, shared = 1 }; - using access_mode = access_mode; - using slot_id = uint32_t; hw_ctx(const device& dev, const qos_type& qos, std::unique_ptr q); @@ -29,7 +30,7 @@ struct hw_ctx { void update_access_mode(access_mode) { shim_not_supported_err(__func__); } - slot_id get_slotidx() const; + uint32_t get_slotidx() const; hw_q* get_hw_queue(); @@ -57,7 +58,7 @@ struct hw_ctx { const std::vector& get_cu_info() const; - void set_slotidx(slot_id id); + void set_slotidx(uint32_t id); void set_doorbell(uint32_t db); @@ -68,7 +69,7 @@ struct hw_ctx { void fini_log_buf(); const device& m_device; - slot_id m_handle = AMDXDNA_INVALID_CTX_HANDLE; + uint32_t m_handle = AMDXDNA_INVALID_CTX_HANDLE; amdxdna_qos_info m_qos = {}; std::vector m_cu_info; std::unique_ptr m_q; diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.cpp index d52c2a8ac..25bc89ad0 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.cpp @@ -6,6 +6,8 @@ #include "bo.h" #include "ert.h" #include "fence.h" +#include "hwctx.h" +#include "pcidev.h" #include "shim_debug.h" namespace { @@ -22,7 +24,7 @@ int wait_cmd(const shim_xdna::pdev &pdev, const shim_xdna::hw_ctx *ctx, auto boh = static_cast(cmd); auto id = boh->get_cmd_id(); - shim_debug("Waiting for cmd (%ld)...", id); + shim_xdna::shim_debug("Waiting for cmd (%ld)...", id); amdxdna_drm_wait_cmd wcmd = { .hwctx = ctx->get_slotidx(), diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.h index 30b3cfbf9..98442c49c 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.h @@ -3,12 +3,15 @@ #ifndef _HWQ_XDNA_H_ #define _HWQ_XDNA_H_ - -#include "fence.h" -#include "hwctx.h" -#include "shim_debug.h" +#include +#include namespace shim_xdna { +struct device; +struct bo; +struct hw_ctx; +struct pdev; +struct fence; struct hw_q { hw_q(const device &device); @@ -25,7 +28,7 @@ struct hw_q { void submit_signal(const fence *); - virtual void bind_hwctx(const hw_ctx *ctx) = 0; + void bind_hwctx(const hw_ctx *ctx); void unbind_hwctx(); diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.cpp index e057c61ac..f078d3e00 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.cpp @@ -427,9 +427,8 @@ void* pdev::mmap(void* addr, size_t len, int prot, int flags, void pdev::munmap(void* addr, size_t len) const { ::munmap(addr, len); } -std::shared_ptr pdev::create_device(device::handle_type handle, - device::id_type id) const { - auto dev = std::make_shared(*this, handle, id); +std::shared_ptr pdev::create_device(void* handle) const { + auto dev = std::make_shared(*this, handle); // Alloc device memory on first device creation. // No locking is needed since driver will ensure only one heap BO is // created. diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.h index a2cb858fe..a84fa646c 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/pcidev.h @@ -39,8 +39,7 @@ struct pdev { std::string get_subdev_path(const std::string& subdev, uint32_t idx) const; - std::shared_ptr create_device(device::handle_type handle, - device::id_type id) const; + std::shared_ptr create_device(void* handle) const; void ioctl(unsigned long cmd, void* arg) const; diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.cpp b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.cpp index f57848458..d761a3995 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.cpp +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.cpp @@ -8,7 +8,7 @@ static std::recursive_mutex s_debug_mutex; -namespace { +namespace shim_xdna { struct debug_lock { std::lock_guard m_lk; debug_lock(); @@ -32,4 +32,4 @@ void debugf(const char* format, ...) { vprintf(format, args); va_end(args); } -} // namespace \ No newline at end of file +} // namespace shim_xdna \ No newline at end of file diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.h b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.h index fbc88dc09..e37dc2b55 100644 --- a/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.h +++ b/runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/shim_debug.h @@ -9,11 +9,10 @@ #include #include #include -#include #include "llvm/Support/Error.h" -namespace { +namespace shim_xdna { void debugf(const char* format, ...); @@ -52,6 +51,6 @@ void shim_info(const char* fmt, Args&&... args) { XRT_PRINTF(format.c_str(), getpid(), std::forward(args)...); } -} // namespace +} // namespace shim_xdna #endif // SHIM_DEBUG_H