Upstream the ONNX importer. (llvm#2636)

This is part 1 of 2, which will also include upstreaming the FX importer. I started with ONNX because it forces some project layout updates and is more self contained/easier as a first step. Deviating somewhat from the RFCs on project layout, I made the following decisions: * Locating the `onnx_importer.py` into `torch_mlir.extras` as Maks already has opened up that namespace and it seemed to fit. Better to have fewer things at that level. * Setup the build so that the root project only contains MLIR Python and pure Python deps (like the importers), but this can be augmented with the `projects/` adding more depending on which features are enabled. * The default build continues to build everything whereas in `TORCH_MLIR_ENABLE_ONLY_MLIR_PYTHON_BINDINGS=1` mode, it builds a `torch-mlir-core` wheel with the pure contents only. `onnx_importer.py` and `importer_smoke_test.py` are almost verbatim copies from SHARK-Turbine. I made some minor local alterations to adapt to paths and generalize the way they interact with the outer project. I expect I can copy these back to Turbine verbatim from here. I also updated the license boilerplate (they have the same license but slightly different project norms for the headers) but retained the correct copyright. Other updates: * Added the ONNX importer unit test (which also can generate test data) in lit, conditioned on the availability of the Python `onnx` package. In a followup once I know everything is stable, I'll add another env var that the CI can set to always enable this so we know conclusively if tests pass. * Moved the ONNX conversion readme to `docs/`. * Renamed CMake option `TORCH_MLIR_ENABLE_ONLY_MLIR_PYTHON_BINDINGS` -> `TORCH_MLIR_ENABLE_PYTORCH_EXTENSIONS` and inverted the sense. Made the JitIR importer and LTC options `cmake_dependent_options` for robustness.
Abhishek-TyRnT · Dec 13, 2023 · 74f7a0c · 74f7a0c
1 parent f67249d
commit 74f7a0c
Show file tree

Hide file tree

Showing 18 changed files with 1,208 additions and 149 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -25,6 +25,8 @@ project(torch-mlir LANGUAGES CXX C)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_CXX_STANDARD 17)
 
+include(CMakeDependentOption)
+
 #-------------------------------------------------------------------------------
 # Project options
 #-------------------------------------------------------------------------------
@@ -43,24 +45,11 @@ endif()
 
 option(TORCH_MLIR_OUT_OF_TREE_BUILD "Specifies an out of tree build" OFF)
 
-# PT1 options.
-option(TORCH_MLIR_ENABLE_PROJECT_PT1 "Enables the PyTorch1 project under projects/pt1" OFF)
-# TODO: Rename/scope these. They use historic names for now to ease migration
-# burden.
-option(TORCH_MLIR_ENABLE_JIT_IR_IMPORTER "Enables JIT IR Importer" ON)
-option(TORCH_MLIR_ENABLE_LTC "Enables LTC backend" OFF)
-option(TORCH_MLIR_ENABLE_ONLY_MLIR_PYTHON_BINDINGS "Build Torch dialect MLIR Python bindings but neither JIT IR Importer nor LTC backend" OFF)
-if(TORCH_MLIR_ENABLE_ONLY_MLIR_PYTHON_BINDINGS)
-  set(TORCH_MLIR_ENABLE_JIT_IR_IMPORTER OFF)
-  set(TORCH_MLIR_ENABLE_LTC OFF)
-endif()
-# Force enable the PT1 project if either the JIT_IR_IMPORTER or LTC is enabled.
-if(NOT TORCH_MLIR_ENABLE_PROJECT_PT1)
-  if(TORCH_MLIR_ENABLE_JIT_IR_IMPORTER OR TORCH_MLIR_ENABLE_LTC)
-    message(STATUS "Enabling projects/pt1 because features requiring it are enabled")
-    set(TORCH_MLIR_ENABLE_PROJECT_PT1 ON)
-  endif()
-endif()
+# PyTorch native extension gate. If OFF, then no features which depend on
+# native extensions will be built.
+option(TORCH_MLIR_ENABLE_PYTORCH_EXTENSIONS "Enables PyTorch native extension features" ON)
+cmake_dependent_option(TORCH_MLIR_ENABLE_JIT_IR_IMPORTER "Enables JIT IR Importer" ON TORCH_MLIR_ENABLE_PYTORCH_EXTENSIONS OFF)
+cmake_dependent_option(TORCH_MLIR_ENABLE_LTC "Enables LTC backend" OFF TORCH_MLIR_ENABLE_PYTORCH_EXTENSIONS OFF)
 
 #-------------------------------------------------------------------------------
 # Configure out-of-tree vs in-tree build
@@ -235,4 +224,16 @@ endif()
 # Sub-projects
 #-------------------------------------------------------------------------------
 
+# Sub-projects can bundle additional PyTorch extensions by adding them to this
+# source target. It is typically empty unless if features are enabled.
+if(MLIR_ENABLE_BINDINGS_PYTHON)
+  declare_mlir_python_sources(TorchMLIRPythonTorchExtensionsSources)
+endif()
+
+# Build projects first as it may populate additional Python deps.
 add_subdirectory(projects)
+
+# Finish with top-level Python bindings so it can handle additional deps.
+if(MLIR_ENABLE_BINDINGS_PYTHON)
+  add_subdirectory(python)
+endif()
diff --git a/build_tools/python_deploy/build_linux_packages.sh b/build_tools/python_deploy/build_linux_packages.sh
@@ -351,14 +351,14 @@ function setup_venv() {
       echo ":::: Using stable dependencies"
       python3 -m pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
       python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/build-requirements.txt
-      python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/test-requirements.txt
       ;;
     *)
       echo "Unrecognized torch version '$torch_version'"
       exit 1
       ;;
   esac
 
+  python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/test-requirements.txt
 }
 
 function build_out_of_tree() {

diff --git a/...lir/Conversion/TorchOnnxToTorch/README.md → docs/importers/onnx_importer.md b/...lir/Conversion/TorchOnnxToTorch/README.md → docs/importers/onnx_importer.md
@@ -3,11 +3,8 @@
 We enable the direct representation of many ONNX features directly in
 the `torch` dialect as `torch.operator` custom ops with names like
 `onnx.{OperatorName}`. The majority of ONNX operators are represented
-with a systematic transformation. See 
-[onnx_importer.py](https://github.com/nod-ai/SHARK-Turbine/blob/main/python/shark_turbine/importers/onnx_importer.py)
-for the reference importer which complies with the rules below
-(this is planned to be upstreamed to torch-mlir proper in the near
-future).
+with a systematic transformation. `torch_mlir.extras.onnx_importer`
+for the reference importer which complies with the rules below.
 
 ## Adding new ONNX operators
 
@@ -26,10 +23,11 @@ are relatively straight-forward to map, following this general procedure:
 * Open the corresponding implementation file `DefaultDomainXtoY.cpp`
   corresponding with the alphabetic sort of the op and add a conversion.
 * Generate successful test cases:
-  * Either run the Turbine importer to produce MLIR output for all
-    ops/models in the ONNX test suite or use a dump that someone has
-    generated:
-      * [2023-Nov-21](https://drive.google.com/file/d/1P6QaRXGnCeApjdjNmykLxWa-yqMmIO-d/view?usp=sharing)
+  * All `onnx_importer.py` tests are dumped to the test temp dir (success
+    or failure). This is typically located under 
+    `tools/torch-mlir/test/python/onnx_importer/Output`. The `.mlir` files
+    under there should provide good variants to drive lit test coverage of
+    conversion.
   * There are often many variants of tests for checking conformance of
     different historic ONNX encodings, but these are often not load bearing
     at the MLIR level.

diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt
@@ -1,7 +1,31 @@
 include(AddMLIRPython)
 
+################################################################################
+# PyTorch
 # Configure PyTorch if we have any features enabled which require it.
+################################################################################
 if(TORCH_MLIR_ENABLE_JIT_IR_IMPORTER OR TORCH_MLIR_ENABLE_LTC)
+
+  if (NOT TORCH_MLIR_USE_INSTALLED_PYTORCH)
+    # Source builds
+    message(STATUS "Building libtorch from source (features depend on it and NOT TORCH_MLIR_USE_INSTALLED_PYTORCH)")
+    set(ENV{TORCH_MLIR_SRC_PYTORCH_REPO} ${TORCH_MLIR_SRC_PYTORCH_REPO})
+    set(ENV{TORCH_MLIR_SRC_PYTORCH_BRANCH} ${TORCH_MLIR_SRC_PYTORCH_BRANCH})
+    set(ENV{TM_PYTORCH_INSTALL_WITHOUT_REBUILD} ${TM_PYTORCH_INSTALL_WITHOUT_REBUILD})
+    set(ENV{MACOSX_DEPLOYMENT_TARGET} ${MACOSX_DEPLOYMENT_TARGET})
+    set(ENV{CMAKE_OSX_ARCHITECTURES} ${CMAKE_OSX_ARCHITECTURES})
+    set(ENV{CMAKE_C_COMPILER_LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER})
+    set(ENV{CMAKE_CXX_COMPILER_LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER})
+    execute_process(
+            COMMAND ${TORCH_MLIR_SOURCE_DIR}/build_tools/build_libtorch.sh
+            RESULT_VARIABLE _result
+    )
+    if(_result)
+      message(FATAL_ERROR "Failed to run `build_libtorch.sh`")
+    endif()
+    set(TORCH_INSTALL_PREFIX "libtorch")
+  endif()
+
   message(STATUS "Enabling PyTorch C++ dep (features depend on it)")
   include(TorchMLIRPyTorch)
 
@@ -48,6 +72,6 @@ if(TORCH_MLIR_ENABLE_LTC)
 endif()
 
 # Include overall PT1 project.
-if(TORCH_MLIR_ENABLE_PROJECT_PT1)
+if(TORCH_MLIR_ENABLE_PYTORCH_EXTENSIONS)
   add_subdirectory(pt1)
 endif()
diff --git a/projects/pt1/python/CMakeLists.txt b/projects/pt1/python/CMakeLists.txt
@@ -7,79 +7,22 @@ set(CMAKE_PLATFORM_NO_VERSIONED_SONAME ON)
 # argument.
 set(TORCH_MLIR_PYTHON_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/torch_mlir")
 
-
 # We vendor our own MLIR instance in the `torch_mlir` namespace.
 add_compile_definitions("MLIR_PYTHON_PACKAGE_PREFIX=torch_mlir.")
 
-################################################################################
-# PyTorch
-################################################################################
-
-if (NOT TORCH_MLIR_USE_INSTALLED_PYTORCH)
-  # Source builds
-  set(ENV{TORCH_MLIR_SRC_PYTORCH_REPO} ${TORCH_MLIR_SRC_PYTORCH_REPO})
-  set(ENV{TORCH_MLIR_SRC_PYTORCH_BRANCH} ${TORCH_MLIR_SRC_PYTORCH_BRANCH})
-  set(ENV{TM_PYTORCH_INSTALL_WITHOUT_REBUILD} ${TM_PYTORCH_INSTALL_WITHOUT_REBUILD})
-  set(ENV{MACOSX_DEPLOYMENT_TARGET} ${MACOSX_DEPLOYMENT_TARGET})
-  set(ENV{CMAKE_OSX_ARCHITECTURES} ${CMAKE_OSX_ARCHITECTURES})
-  set(ENV{CMAKE_C_COMPILER_LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER})
-  set(ENV{CMAKE_CXX_COMPILER_LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER})
-  execute_process(
-          COMMAND ${TORCH_MLIR_SOURCE_DIR}/build_tools/build_libtorch.sh
-          RESULT_VARIABLE _result
-  )
-  if(_result)
-    message(FATAL_ERROR "Failed to run `build_libtorch.sh`")
-  endif()
-  set(TORCH_INSTALL_PREFIX "libtorch")
-endif()
-
-################################################################################
-# Sources
-################################################################################
-
-declare_mlir_python_sources(TorchMLIRPythonSources)
-declare_mlir_python_sources(TorchMLIRPythonExtensions)
-
-if (NOT TORCH_MLIR_ENABLE_ONLY_MLIR_PYTHON_BINDINGS)
-  declare_mlir_python_sources(TorchMLIRPythonSources.TopLevel
-    ROOT_DIR "${TORCH_MLIR_PYTHON_ROOT_DIR}"
-    ADD_TO_PARENT TorchMLIRPythonSources
-    SOURCES
-      __init__.py
-      _dynamo_fx_importer.py
-      compiler_utils.py
-      dynamo.py
-      _version.py
-  )
-endif()
-
-declare_mlir_python_sources(TorchMLIRPythonSources.Dialects
-  ROOT_DIR "${TORCH_MLIR_PYTHON_ROOT_DIR}"
-  ADD_TO_PARENT TorchMLIRPythonSources
-)
+# ################################################################################
+# # Sources
+# ################################################################################
 
-declare_mlir_dialect_python_bindings(
-  ADD_TO_PARENT TorchMLIRPythonSources.Dialects
+declare_mlir_python_sources(TorchMLIRPythonSources.TopLevel
   ROOT_DIR "${TORCH_MLIR_PYTHON_ROOT_DIR}"
-  TD_FILE dialects/TorchBinding.td
-  SOURCES dialects/torch/__init__.py
-  DIALECT_NAME torch
-)
-
-################################################################################
-# Extensions
-################################################################################
-
-declare_mlir_python_extension(TorchMLIRPythonExtensions.Main
-  MODULE_NAME _torchMlir
-  ADD_TO_PARENT TorchMLIRPythonExtensions
+  ADD_TO_PARENT TorchMLIRPythonTorchExtensionsSources
   SOURCES
-    TorchMLIRModule.cpp
-  EMBED_CAPI_LINK_LIBS
-    TorchMLIRCAPI
-  PRIVATE_LINK_LIBS
-    LLVMSupport
+    __init__.py
+    _dynamo_fx_importer.py
+    compiler_utils.py
+    dynamo.py
+    _version.py
 )
 
 ################################################################################
@@ -110,56 +53,23 @@ endif()
 
 # add_subdirectory(torch_mlir/_torch_mlir_custom_op_example)
 
-################################################################################
-# Generate packages and shared library
-# Downstreams typically will not use these, but they are useful for local
-# testing.
-################################################################################
-
-set(_source_components
-  # TODO: Core is now implicitly building/registering all dialects, increasing
-  # build burden by ~5x. Make it stop.
-  # TODO: Reduce dependencies. We need ExecutionEngine and a bunch of passes
-  # for the reference backend, but logically they can be separate. But seemingly
-  # the only way to handle that is to create a separate mlir python package
-  # tree, which seems excessive.
-  MLIRPythonSources
-  MLIRPythonExtension.Core
-  MLIRPythonExtension.RegisterEverything
-  TorchMLIRPythonSources
-  TorchMLIRPythonExtensions
-)
-
-add_mlir_python_common_capi_library(TorchMLIRAggregateCAPI
-  INSTALL_COMPONENT TorchMLIRPythonModules
-  INSTALL_DESTINATION python_packages/torch_mlir/torch_mlir/_mlir_libs
-  OUTPUT_DIRECTORY "${TORCH_MLIR_PYTHON_PACKAGES_DIR}/torch_mlir/torch_mlir/_mlir_libs"
-  RELATIVE_INSTALL_ROOT "../../../.."
-  DECLARED_SOURCES ${_source_components}
-)
-
-add_mlir_python_modules(TorchMLIRPythonModules
-  ROOT_PREFIX "${TORCH_MLIR_PYTHON_PACKAGES_DIR}/torch_mlir/torch_mlir"
-  INSTALL_PREFIX "python_packages/torch_mlir/torch_mlir"
-  DECLARED_SOURCES ${_source_components}
-  COMMON_CAPI_LINK_LIBS
-    TorchMLIRAggregateCAPI
-  )
-
 # TODO: Find a cleaner way to do this.
 # Can we build the JIT IR importer with `declare_mlir_python_extension`?
 # Then it would "just work".
 if(TORCH_MLIR_ENABLE_JIT_IR_IMPORTER)
-  add_dependencies(TorchMLIRPythonModules TorchMLIRJITIRImporter)
-  add_dependencies(TorchMLIRPythonModules TorchMLIRJITIRImporterPybind)
-  # Build the E2E Tests (which depend on the JIT IR importer now).
-  add_dependencies(TorchMLIRPythonModules TorchMLIRE2ETestPythonModules)
+  add_dependencies(TorchMLIRPythonTorchExtensionsSources 
+    TorchMLIRJITIRImporter
+    TorchMLIRJITIRImporterPybind
+    TorchMLIRE2ETestPythonModules
+  )
 endif()
 
 if(TORCH_MLIR_ENABLE_LTC)
   # Add Torch-MLIR LTC backend as dependency
-  add_dependencies(TorchMLIRPythonModules torch_mlir_ltc_backend)
-  add_dependencies(TorchMLIRPythonModules reference_lazy_backend)
+  add_dependencies(TorchMLIRPythonTorchExtensionsSources 
+    torch_mlir_ltc_backend
+    reference_lazy_backend
+  )
 endif()
 
 add_subdirectory(test)
diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/CMakeLists.txt b/projects/pt1/python/torch_mlir/jit_ir_importer/CMakeLists.txt
@@ -4,9 +4,9 @@
 
 ## Declare the sources of the Python module.
 
-declare_mlir_python_sources(TorchMLIRPythonSources.JitIRImporter
+declare_mlir_python_sources(TorchMLIRPythonTorchExtensionsSources.JitIRImporter
   ROOT_DIR "${TORCH_MLIR_PYTHON_ROOT_DIR}"
-  ADD_TO_PARENT TorchMLIRPythonSources
+  ADD_TO_PARENT TorchMLIRPythonTorchExtensionsSources
   SOURCES_GLOB
     jit_ir_importer/*.py
 )