From 9b43a646fc70f3d4a2d033c73380e0bae75f4586 Mon Sep 17 00:00:00 2001
From: Andrew Reusch <areusch@octoml.ai>
Date: Mon, 29 Mar 2021 17:07:21 -0700
Subject: [PATCH] Rename GraphRuntime to GraphExecutor (#7653)

---
 CMakeLists.txt                                |  44 ++-
 .../Camera2BasicFragment.java                 |  28 +-
 .../app/src/main/jni/tvm_runtime.h            |   2 +-
 apps/android_camera/models/prepare_model.py   |   2 +-
 .../apache/tvm/android/demo/MainActivity.java |  28 +-
 .../app/src/main/jni/tvm_runtime.h            |   2 +-
 .../app/src/main/jni/tvm_runtime.h            |   4 +-
 apps/benchmark/arm_cpu_imagenet_bench.py      |   2 +-
 apps/benchmark/gpu_imagenet_bench.py          |   2 +-
 apps/benchmark/mobile_gpu_imagenet_bench.py   |   2 +-
 apps/bundle_deploy/Makefile                   |  12 +-
 apps/bundle_deploy/README.md                  |   8 +-
 apps/bundle_deploy/bundle.c                   |  32 +-
 apps/bundle_deploy/bundle.cc                  |   2 +-
 apps/bundle_deploy/bundle_static.c            |  34 +-
 apps/bundle_deploy/runtime.cc                 |   2 +-
 apps/howto_deploy/cpp_deploy.cc               |   8 +-
 apps/howto_deploy/tvm_runtime_pack.cc         |   6 +-
 apps/ios_rpc/tests/ios_rpc_mobilenet.py       |   4 +-
 apps/ios_rpc/tvmrpc/TVMRuntime.mm             |   4 +-
 .../reference-vm/zephyr/rebuild-tvm.sh        |   2 +-
 cmake/config.cmake                            |  14 +-
 cmake/modules/CUDA.cmake                      |  10 +-
 cmake/modules/LibInfo.cmake                   |   6 +-
 cmake/modules/StandaloneCrt.cmake             |   6 +-
 cmake/modules/contrib/ArmComputeLib.cmake     |  22 +-
 cmake/modules/contrib/TensorRT.cmake          |   2 +-
 conda/recipe/bld.bat                          |   2 +-
 conda/recipe/build.sh                         |   2 +-
 docker/Dockerfile.demo_android                |   2 +-
 docker/install/install_tvm_cpu.sh             |   2 +-
 .../{graph_runtime.rst => graph_executor.rst} |   4 +-
 docs/api/python/index.rst                     |   2 +-
 docs/api/python/relay/backend.rst             |   2 +-
 docs/deploy/arm_compute_lib.rst               |  12 +-
 docs/deploy/bnns.rst                          |   4 +-
 docs/deploy/tensorrt.rst                      |   2 +-
 docs/deploy/vitis_ai.rst                      |  14 +-
 docs/dev/debugger.rst                         |  14 +-
 docs/dev/index.rst                            |   2 +-
 docs/dev/microtvm_design.rst                  |  22 +-
 docs/dev/virtual_machine.rst                  |  10 +-
 docs/install/from_source.rst                  |   2 +-
 golang/sample/complex.go                      |  10 +-
 golang/src/function_test.go                   |   2 +-
 golang/src/tvm_runtime_pack.cc                |   4 +-
 include/tvm/runtime/crt/error_codes.h         |  10 +-
 .../crt/{graph_runtime.h => graph_executor.h} |  54 +--
 ...ntime_module.h => graph_executor_module.h} |  14 +-
 .../{GraphRuntime.java => GraphExecutor.java} |   6 +-
 .../org/apache/tvm/contrib/GraphModule.java   |   4 +-
 ...untimeTest.java => GraphExecutorTest.java} |   8 +-
 ...raph_runtime.py => test_graph_executor.py} |   2 +-
 .../tvm/auto_scheduler/relay_integration.py   |   6 +-
 python/tvm/autotvm/task/relay_integration.py  |   8 +-
 ...raph_runtime.py => cuda_graph_executor.py} |  30 +-
 python/tvm/contrib/debugger/debug_executor.py | 239 +++++++++++++
 python/tvm/contrib/debugger/debug_runtime.py  | 228 +------------
 python/tvm/contrib/graph_executor.py          | 306 +++++++++++++++++
 python/tvm/contrib/graph_runtime.py           | 295 +---------------
 python/tvm/driver/tvmc/compiler.py            |   2 +-
 python/tvm/driver/tvmc/runner.py              |  10 +-
 python/tvm/micro/__init__.py                  |   4 +-
 python/tvm/micro/model_library_format.py      |   6 +-
 python/tvm/micro/session.py                   |  28 +-
 python/tvm/relay/analysis/analysis.py         |   2 +-
 ...e_codegen.py => graph_executor_codegen.py} |  10 +-
 ...e_factory.py => graph_executor_factory.py} |  14 +-
 python/tvm/relay/build_module.py              |  26 +-
 python/tvm/relay/frontend/common.py           |   4 +-
 .../tvm/relay/op/contrib/arm_compute_lib.py   |   2 +-
 python/tvm/relay/op/contrib/tensorrt.py       |   2 +-
 python/tvm/relay/quantize/_calibrate.py       |   4 +-
 rust/tvm-graph-rt/Cargo.toml                  |   2 +-
 rust/tvm-graph-rt/src/graph.rs                |   2 +-
 rust/tvm/README.md                            |   2 +-
 rust/tvm/examples/resnet/src/build_resnet.py  |   6 +-
 rust/tvm/src/lib.rs                           |   2 +-
 rust/tvm/src/runtime/graph_rt.rs              |  20 +-
 src/relay/analysis/get_calibration_data.cc    |   4 +-
 src/relay/backend/build_module.cc             |   6 +-
 src/relay/backend/compile_engine.cc           |   2 +-
 .../contrib/arm_compute_lib/codegen.cc        |   6 +-
 src/relay/backend/contrib/tensorrt/codegen.cc |  12 +-
 ...e_codegen.cc => graph_executor_codegen.cc} |  26 +-
 src/relay/backend/graph_plan_memory.cc        |   2 +-
 src/relay/transforms/partition_graph.cc       |   2 +-
 .../contrib/arm_compute_lib/acl_runtime.cc    |   8 +-
 .../contrib/tensorrt/tensorrt_runtime.cc      |   4 +-
 src/runtime/crt/Makefile                      |   4 +-
 src/runtime/crt/common/crt_runtime_api.c      |   2 +-
 .../graph_executor.c}                         | 321 +++++++++---------
 .../load_json.c                               |   2 +-
 .../graph_executor_module.c}                  | 122 +++----
 src/runtime/crt/host/main.cc                  |  10 +-
 .../graph_executor.h}                         |  66 ++--
 .../load_json.h                               |  14 +-
 .../cuda_graph/graph_runtime_cuda_graph.cc    |  37 +-
 .../debug/graph_executor_debug.cc}            |  40 +--
 .../graph_executor.cc}                        |  73 ++--
 .../graph_executor.h}                         |  22 +-
 .../graph_executor_factory.cc}                |  91 ++---
 .../graph_executor_factory.h}                 |  50 +--
 src/runtime/metadata_module.cc                |   2 +-
 ...raph_runtime.cc => utvm_graph_executor.cc} |  16 +-
 ..._graph_runtime.h => utvm_graph_executor.h} |  16 +-
 src/runtime/micro/standalone/utvm_runtime.cc  |  14 +-
 src/support/libinfo.cc                        |  18 +-
 src/target/metadata_module.cc                 |   2 +-
 tests/azure-pipelines/main.yml                |   6 +-
 tests/cpp/build_module_test.cc                |   8 +-
 tests/cpp/relay_build_module_test.cc          |   2 +-
 tests/micro/test_runtime_micro_on_arm.py      |  16 +-
 tests/micro/zephyr/test_zephyr.py             |   6 +-
 .../test_arm_compute_lib/infrastructure.py    |   4 +-
 .../contrib/test_bnns/infrastructure.py       |   4 +-
 .../contrib/test_bnns/test_onnx_topologies.py |   4 +-
 tests/python/contrib/test_coreml_codegen.py   |   2 +-
 .../contrib/test_ethosn/infrastructure.py     |   4 +-
 tests/python/contrib/test_tensorrt.py         |   8 +-
 .../contrib/test_vitis_ai/infrastructure.py   |   4 +-
 tests/python/frontend/caffe/test_forward.py   |   4 +-
 tests/python/frontend/caffe2/test_forward.py  |   4 +-
 tests/python/frontend/coreml/test_forward.py  |   8 +-
 tests/python/frontend/darknet/test_forward.py |   4 +-
 tests/python/frontend/keras/test_forward.py   |   4 +-
 tests/python/frontend/mxnet/test_forward.py   |   8 +-
 .../frontend/mxnet/test_qnn_ops_utils.py      |   6 +-
 tests/python/frontend/onnx/test_forward.py    |   4 +-
 tests/python/frontend/pytorch/qnn_test.py     |   2 +-
 tests/python/frontend/pytorch/test_forward.py |   6 +-
 .../frontend/tensorflow/test_bn_dynamic.py    |   4 +-
 .../frontend/tensorflow/test_forward.py       |  16 +-
 tests/python/frontend/tflite/test_forward.py  |   8 +-
 .../test_quantization_accuracy.py             |   2 +-
 .../python/relay/benchmarking/benchmark_vm.py |  12 +-
 ..._auto_scheduler_layout_rewrite_networks.py |   4 +-
 .../relay/test_auto_scheduler_tuning.py       |   4 +-
 ...time.py => test_backend_graph_executor.py} |   8 +-
 tests/python/relay/test_cpp_build_module.py   |   6 +-
 tests/python/relay/test_external_codegen.py   |  10 +-
 tests/python/relay/test_json_runtime.py       |   8 +-
 tests/python/relay/test_op_fast_math.py       |   4 +-
 tests/python/relay/test_op_level2.py          |   4 +-
 tests/python/relay/test_op_qnn_concatenate.py |   2 +-
 .../relay/test_op_qnn_conv2_transpose.py      |   4 +-
 tests/python/relay/test_op_qnn_conv2d.py      |  10 +-
 tests/python/relay/test_op_qnn_dense.py       |   4 +-
 tests/python/relay/test_op_qnn_dequantize.py  |   6 +-
 tests/python/relay/test_op_qnn_mul.py         |   2 +-
 tests/python/relay/test_op_qnn_quantize.py    |   6 +-
 tests/python/relay/test_op_qnn_requantize.py  |   4 +-
 .../relay/test_op_qnn_simulated_dequantize.py |   4 +-
 .../relay/test_op_qnn_simulated_quantize.py   |   4 +-
 tests/python/relay/test_param_dict.py         |   8 +-
 .../python/relay/test_pass_annotate_target.py |   6 +-
 tests/python/relay/test_pass_annotation.py    |  16 +-
 tests/python/relay/test_pass_legalize.py      |   2 +-
 .../relay/test_pass_legalize_tensorcore.py    |   2 +-
 .../python/relay/test_pass_partition_graph.py |   6 +-
 tests/python/relay/test_pass_qnn_legalize.py  |   2 +-
 .../relay/test_simplify_fc_transpose.py       |   4 +-
 .../python/relay/test_sparse_dense_convert.py |   4 +-
 tests/python/topi/python/test_topi_qnn.py     |   6 +-
 tests/python/unittest/test_crt.py             |   6 +-
 tests/python/unittest/test_link_params.py     |  20 +-
 .../test_micro_model_library_format.py        |   4 +-
 tests/python/unittest/test_runtime_graph.py   |  14 +-
 .../unittest/test_runtime_graph_cuda_graph.py |   6 +-
 .../unittest/test_runtime_graph_debug.py      |   6 +-
 .../unittest/test_runtime_heterogeneous.py    |   8 +-
 .../test_runtime_module_based_interface.py    |  66 ++--
 .../unittest/test_target_codegen_blob.py      |   6 +-
 .../unittest/test_tir_transform_hoist_if.py   |   4 +-
 tests/scripts/task_config_build_gpu.sh        |   2 +-
 tests/scripts/task_java_unittest.sh           |   2 +-
 tests/scripts/task_rust.sh                    |   4 +-
 tutorials/auto_scheduler/tune_network_arm.py  |   6 +-
 tutorials/auto_scheduler/tune_network_cuda.py |   6 +-
 tutorials/auto_scheduler/tune_network_mali.py |   6 +-
 tutorials/auto_scheduler/tune_network_x86.py  |   6 +-
 tutorials/autotvm/tune_relay_arm.py           |   2 +-
 tutorials/autotvm/tune_relay_cuda.py          |   2 +-
 tutorials/autotvm/tune_relay_mobile_gpu.py    |   2 +-
 tutorials/autotvm/tune_relay_x86.py           |   2 +-
 tutorials/frontend/build_gcn.py               |   6 +-
 tutorials/frontend/deploy_model_on_android.py |   4 +-
 tutorials/frontend/deploy_model_on_rasp.py    |   2 +-
 tutorials/frontend/deploy_prequantized.py     |   2 +-
 .../frontend/deploy_prequantized_tflite.py    |   4 +-
 tutorials/frontend/deploy_sparse.py           |   4 +-
 tutorials/frontend/deploy_ssd_gluoncv.py      |   4 +-
 tutorials/frontend/from_caffe2.py             |   4 +-
 tutorials/frontend/from_coreml.py             |   4 +-
 tutorials/frontend/from_darknet.py            |   4 +-
 tutorials/frontend/from_mxnet.py              |   4 +-
 tutorials/frontend/from_pytorch.py            |   4 +-
 tutorials/frontend/from_tensorflow.py         |   4 +-
 tutorials/frontend/from_tflite.py             |   2 +-
 tutorials/frontend/using_external_lib.py      |   2 +-
 tutorials/get_started/relay_quick_start.py    |   8 +-
 tutorials/micro/micro_tflite.py               |   4 +-
 vta/scripts/tune_resnet.py                    |   8 +-
 vta/tutorials/autotvm/tune_relay_vta.py       |   6 +-
 .../frontend/deploy_classification.py         |  12 +-
 .../frontend/legacy/deploy_detection.py       |  10 +-
 web/emcc/wasm_runtime.cc                      |   2 +-
 web/src/runtime.ts                            |  18 +-
 208 files changed, 1727 insertions(+), 1641 deletions(-)
 rename docs/api/python/{graph_runtime.rst => graph_executor.rst} (92%)
 rename include/tvm/runtime/crt/{graph_runtime.h => graph_executor.h} (61%)
 rename include/tvm/runtime/crt/{graph_runtime_module.h => graph_executor_module.h} (71%)
 rename jvm/core/src/main/java/org/apache/tvm/contrib/{GraphRuntime.java => GraphExecutor.java} (95%)
 rename jvm/core/src/test/java/org/apache/tvm/contrib/{GraphRuntimeTest.java => GraphExecutorTest.java} (93%)
 rename jvm/core/src/test/scripts/{test_graph_runtime.py => test_graph_executor.py} (98%)
 rename python/tvm/contrib/cuda_graph/{cuda_graph_runtime.py => cuda_graph_executor.py} (78%)
 create mode 100644 python/tvm/contrib/debugger/debug_executor.py
 create mode 100644 python/tvm/contrib/graph_executor.py
 rename python/tvm/relay/backend/{graph_runtime_codegen.py => graph_executor_codegen.py} (91%)
 rename python/tvm/relay/backend/{graph_runtime_factory.py => graph_executor_factory.py} (88%)
 rename src/relay/backend/{graph_runtime_codegen.cc => graph_executor_codegen.cc} (96%)
 rename src/runtime/crt/{graph_runtime/graph_runtime.c => graph_executor/graph_executor.c} (76%)
 rename src/runtime/crt/{graph_runtime => graph_executor}/load_json.c (99%)
 rename src/runtime/crt/{graph_runtime_module/graph_runtime_module.c => graph_executor_module/graph_executor_module.c} (51%)
 rename src/runtime/crt/include/tvm/runtime/crt/internal/{graph_runtime/graph_runtime.h => graph_executor/graph_executor.h} (58%)
 rename src/runtime/crt/include/tvm/runtime/crt/internal/{graph_runtime => graph_executor}/load_json.h (90%)
 rename src/runtime/{graph => graph_executor}/cuda_graph/graph_runtime_cuda_graph.cc (76%)
 rename src/runtime/{graph/debug/graph_runtime_debug.cc => graph_executor/debug/graph_executor_debug.cc} (89%)
 rename src/runtime/{graph/graph_runtime.cc => graph_executor/graph_executor.cc} (88%)
 rename src/runtime/{graph/graph_runtime.h => graph_executor/graph_executor.h} (95%)
 rename src/runtime/{graph/graph_runtime_factory.cc => graph_executor/graph_executor_factory.cc} (65%)
 rename src/runtime/{graph/graph_runtime_factory.h => graph_executor/graph_executor_factory.h} (68%)
 rename src/runtime/micro/standalone/{utvm_graph_runtime.cc => utvm_graph_executor.cc} (96%)
 rename src/runtime/micro/standalone/{utvm_graph_runtime.h => utvm_graph_executor.h} (90%)
 rename tests/python/relay/{test_backend_graph_runtime.py => test_backend_graph_executor.py} (97%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6d37bd4e6e44..277fe4a9bfbc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,9 +34,9 @@ tvm_option(USE_RPC "Build with RPC" ON)
 tvm_option(USE_THREADS "Build with thread support" ON)
 tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF)
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
-tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON)
-tvm_option(USE_GRAPH_RUNTIME_CUDA_GRAPH "Build with tiny graph runtime with CUDA Graph for GPUs" OFF)
-tvm_option(USE_PROFILER "Build profiler for the VM and graph runtime" ON)
+tvm_option(USE_GRAPH_EXECUTOR "Build with tiny graph executor" ON)
+tvm_option(USE_GRAPH_EXECUTOR_CUDA_GRAPH "Build with tiny graph executor with CUDA Graph for GPUs" OFF)
+tvm_option(USE_PROFILER "Build profiler for the VM and graph executor" ON)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
 tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
 tvm_option(USE_RTTI "Build with RTTI" ON)
@@ -79,7 +79,7 @@ tvm_option(USE_COREML "Build with coreml support" OFF)
 tvm_option(USE_BNNS "Build with BNNS support" OFF)
 tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
 tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
-tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR "Build with Arm Compute Library graph executor" OFF)
 tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF)
 tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF)
 tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF)
@@ -307,16 +307,30 @@ else()
   list(APPEND COMPILER_SRCS ${STACKVM_RUNTIME_SRCS})
 endif(USE_STACKVM_RUNTIME)
 
-if(USE_GRAPH_RUNTIME)
-  message(STATUS "Build with Graph runtime support...")
-  file(GLOB RUNTIME_GRAPH_SRCS src/runtime/graph/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_SRCS})
+# NOTE(areusch): USE_GRAPH_RUNTIME will be deleted in a future release
+if(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
+  message(WARNING "USE_GRAPH_RUNTIME renamed to USE_GRAPH_EXECUTOR. Please update your config.cmake")
+  set(USE_GRAPH_EXECUTOR ${USE_GRAPH_RUNTIME})
+  unset(USE_GRAPH_RUNTIME CACHE)
+endif(USE_GRAPH_RUNTIME AND NOT DEFINED USE_GRAPH_EXECUTOR)
+
+# NOTE(areusch): USE_GRAPH_RUNTIME_DEBUG will be deleted in a future release
+if(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_GRAPH_EXECUTOR_DEBUG)
+  message(WARNING "USE_GRAPH_RUNTIME_DEBUG renamed to USE_GRAPH_EXECUTOR_DEBUG. Please update your config.cmake")
+  set(USE_GRAPH_EXECUTOR_DEBUG ${USE_GRAPH_RUNTIME_DEBUG})
+  unset(USE_GRAPH_RUNTIME_DEBUG CACHE)
+endif(USE_GRAPH_RUNTIME_DEBUG AND NOT DEFINED USE_GRAPH_EXECUTOR_DEBUG)
+
+if(USE_GRAPH_EXECUTOR)
+  message(STATUS "Build with Graph Executor support...")
+  file(GLOB RUNTIME_GRAPH_EXECUTOR_SRCS src/runtime/graph_executor/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_SRCS})
 
-endif(USE_GRAPH_RUNTIME)
+endif(USE_GRAPH_EXECUTOR)
 
 # convert old options for profiler
-if(USE_GRAPH_RUNTIME_DEBUG)
-  unset(USE_GRAPH_RUNTIME_DEBUG CACHE)
+if(USE_GRAPH_EXECUTOR_DEBUG)
+  unset(USE_GRAPH_EXECUTOR_DEBUG CACHE)
   set(USE_PROFILER ON)
 endif()
 if(USE_VM_PROFILER)
@@ -327,10 +341,10 @@ endif()
 if(USE_PROFILER)
   message(STATUS "Build with profiler...")
 
-  file(GLOB RUNTIME_GRAPH_DEBUG_SRCS src/runtime/graph/debug/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_DEBUG_SRCS})
-  set_source_files_properties(${RUNTIME_GRAPH_SRCS}
-    PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_DEBUG")
+  file(GLOB RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS src/runtime/graph_executor/debug/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_EXECUTOR_DEBUG_SRCS})
+  set_source_files_properties(${RUNTIME_GRAPH_EXECUTOR_SRCS}
+    PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_EXECUTOR_DEBUG")
 
   file(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc)
   list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
diff --git a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
index 53913ef306dc..8a5f54a3e399 100644
--- a/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
+++ b/apps/android_camera/app/src/main/java/org/apache/tvm/android/androidcamerademo/Camera2BasicFragment.java
@@ -111,7 +111,7 @@ public class Camera2BasicFragment extends Fragment {
     private AppCompatTextView mInfoView;
     private ListView mModelView;
     private AssetManager assetManager;
-    private Module graphRuntimeModule;
+    private Module graphExecutorModule;
     private JSONObject labels;
     private ListenableFuture<ProcessCameraProvider> cameraProviderFuture;
     private PreviewView previewView;
@@ -187,21 +187,21 @@ private String[] getModels() {
     private String[] inference(float[] chw) {
         NDArray inputNdArray = NDArray.empty(new long[]{1, IMG_CHANNEL, MODEL_INPUT_SIZE, MODEL_INPUT_SIZE}, new TVMType("float32"));
         inputNdArray.copyFrom(chw);
-        Function setInputFunc = graphRuntimeModule.getFunction("set_input");
+        Function setInputFunc = graphExecutorModule.getFunction("set_input");
         setInputFunc.pushArg(INPUT_NAME).pushArg(inputNdArray).invoke();
         // release tvm local variables
         inputNdArray.release();
         setInputFunc.release();
 
         // get the function from the module(run it)
-        Function runFunc = graphRuntimeModule.getFunction("run");
+        Function runFunc = graphExecutorModule.getFunction("run");
         runFunc.invoke();
         // release tvm local variables
         runFunc.release();
 
         // get the function from the module(get output data)
         NDArray outputNdArray = NDArray.empty(new long[]{1, 1000}, new TVMType("float32"));
-        Function getOutputFunc = graphRuntimeModule.getFunction("get_output");
+        Function getOutputFunc = graphExecutorModule.getFunction("get_output");
         getOutputFunc.pushArg(OUTPUT_INDEX).pushArg(outputNdArray).invoke();
         float[] output = outputNdArray.asFloatArray();
         // release tvm local variables
@@ -272,8 +272,8 @@ public void onActivityCreated(Bundle savedInstanceState) {
     @Override
     public void onDestroy() {
         // release tvm local variables
-        if (null != graphRuntimeModule)
-            graphRuntimeModule.release();
+        if (null != graphExecutorModule)
+            graphExecutorModule.release();
         super.onDestroy();
     }
 
@@ -516,7 +516,7 @@ private void setInputName(String modelName) {
     }
 
     /*
-       Load precompiled model on TVM graph runtime and init the system.
+       Load precompiled model on TVM graph executor and init the system.
     */
     private class LoadModelAsyncTask extends AsyncTask<Void, Void, Integer> {
 
@@ -581,11 +581,11 @@ protected Integer doInBackground(Void... args) {
             Module modelLib = Module.load(libCacheFilePath);
 
 
-            // get global function module for graph runtime
-            Log.i(TAG, "getting graph runtime create handle...");
+            // get global function module for graph executor
+            Log.i(TAG, "getting graph executor create handle...");
 
-            Function runtimeCreFun = Function.getFunction("tvm.graph_runtime.create");
-            Log.i(TAG, "creating graph runtime...");
+            Function runtimeCreFun = Function.getFunction("tvm.graph_executor.create");
+            Log.i(TAG, "creating graph executor...");
 
             Log.i(TAG, "device type: " + tvmDev.deviceType);
             Log.i(TAG, "device id: " + tvmDev.deviceId);
@@ -597,10 +597,10 @@ protected Integer doInBackground(Void... args) {
                     .invoke();
 
             Log.i(TAG, "as module...");
-            graphRuntimeModule = runtimeCreFunRes.asModule();
-            Log.i(TAG, "getting graph runtime load params handle...");
+            graphExecutorModule = runtimeCreFunRes.asModule();
+            Log.i(TAG, "getting graph executor load params handle...");
             // get the function from the module(load parameters)
-            Function loadParamFunc = graphRuntimeModule.getFunction("load_params");
+            Function loadParamFunc = graphExecutorModule.getFunction("load_params");
             Log.i(TAG, "loading params...");
             loadParamFunc.pushArg(modelParams).invoke();
             // release tvm local variables
diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h
index 406effa34aaa..f3c7efd08b5c 100644
--- a/apps/android_camera/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h
@@ -37,7 +37,7 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_executor.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/logging.cc"
 #include "../src/runtime/module.cc"
diff --git a/apps/android_camera/models/prepare_model.py b/apps/android_camera/models/prepare_model.py
index f155d46c31a4..d767b2ef88fc 100644
--- a/apps/android_camera/models/prepare_model.py
+++ b/apps/android_camera/models/prepare_model.py
@@ -25,7 +25,7 @@
 
 import tvm
 import tvm.relay as relay
-from tvm.contrib import utils, ndk, graph_runtime as runtime
+from tvm.contrib import utils, ndk, graph_executor as runtime
 from tvm.contrib.download import download_testdata, download
 
 target = "llvm -mtriple=arm64-linux-android"
diff --git a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
index 38c135a1edc4..85cc7a277b4d 100644
--- a/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
+++ b/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java
@@ -90,7 +90,7 @@ public class MainActivity extends AppCompatActivity {
     private ImageView mImageView;
     private TextView mResultView;
     private AssetManager assetManager;
-    private Module graphRuntimeModule;
+    private Module graphExecutorModule;
     private Vector<String> labels = new Vector<String>();
 
     @Override
@@ -119,7 +119,7 @@ public void onClick(View v) {
     }
 
     /*
-        Load precompiled model on TVM graph runtime and init the system.
+        Load precompiled model on TVM graph executor and init the system.
      */
     private class LoadModleAsyncTask extends AsyncTask<Void, Void, Integer> {
         ProgressDialog dialog = new ProgressDialog(MainActivity.this);
@@ -183,17 +183,17 @@ protected Integer doInBackground(Void... args) {
             // tvm module for compiled functions
             Module modelLib = Module.load(libCacheFilePath);
 
-            // get global function module for graph runtime
-            Function runtimeCreFun = Function.getFunction("tvm.graph_runtime.create");
+            // get global function module for graph executor
+            Function runtimeCreFun = Function.getFunction("tvm.graph_executor.create");
             TVMValue runtimeCreFunRes = runtimeCreFun.pushArg(modelGraph)
                     .pushArg(modelLib)
                     .pushArg(tvmDev.deviceType)
                     .pushArg(tvmDev.deviceId)
                     .invoke();
-            graphRuntimeModule = runtimeCreFunRes.asModule();
+            graphExecutorModule = runtimeCreFunRes.asModule();
 
             // get the function from the module(load parameters)
-            Function loadParamFunc = graphRuntimeModule.getFunction("load_params");
+            Function loadParamFunc = graphExecutorModule.getFunction("load_params");
             loadParamFunc.pushArg(modelParams).invoke();
 
             // release tvm local variables
@@ -224,14 +224,14 @@ protected void onPostExecute(Integer status) {
     }
 
     /*
-        Execute prediction for processed decode input bitmap image content on TVM graph runtime.
+        Execute prediction for processed decode input bitmap image content on TVM graph executor.
      */
     private class ModelRunAsyncTask extends AsyncTask<Bitmap, Void, Integer> {
         ProgressDialog dialog = new ProgressDialog(MainActivity.this);
 
         @Override
         protected Integer doInBackground(Bitmap... bitmaps) {
-            if (null != graphRuntimeModule) {
+            if (null != graphExecutorModule) {
                 int count  = bitmaps.length;
                 for (int i = 0 ; i < count ; i++) {
                     long processingTimeMs = SystemClock.uptimeMillis();
@@ -283,7 +283,7 @@ protected Integer doInBackground(Bitmap... bitmaps) {
                     Log.i(TAG, "set input data");
                     NDArray inputNdArray = NDArray.empty(new long[]{1, IMG_CHANNEL, MODEL_INPUT_SIZE, MODEL_INPUT_SIZE}, new TVMType("float32"));;
                     inputNdArray.copyFrom(imgRgbTranValues);
-                    Function setInputFunc = graphRuntimeModule.getFunction("set_input");
+                    Function setInputFunc = graphExecutorModule.getFunction("set_input");
                     setInputFunc.pushArg(INPUT_NAME).pushArg(inputNdArray).invoke();
                     // release tvm local variables
                     inputNdArray.release();
@@ -291,7 +291,7 @@ protected Integer doInBackground(Bitmap... bitmaps) {
 
                     // get the function from the module(run it)
                     Log.i(TAG, "run function on target");
-                    Function runFunc = graphRuntimeModule.getFunction("run");
+                    Function runFunc = graphExecutorModule.getFunction("run");
                     runFunc.invoke();
                     // release tvm local variables
                     runFunc.release();
@@ -299,7 +299,7 @@ protected Integer doInBackground(Bitmap... bitmaps) {
                     // get the function from the module(get output data)
                     Log.i(TAG, "get output data");
                     NDArray outputNdArray = NDArray.empty(new long[]{1, 1000}, new TVMType("float32"));
-                    Function getOutputFunc = graphRuntimeModule.getFunction("get_output");
+                    Function getOutputFunc = graphExecutorModule.getFunction("get_output");
                     getOutputFunc.pushArg(OUTPUT_INDEX).pushArg(outputNdArray).invoke();
                     float[] output = outputNdArray.asFloatArray();
                     // release tvm local variables
@@ -343,7 +343,7 @@ protected void onPostExecute(Integer status) {
                 dialog.dismiss();
             }
             if (status != 0) {
-                showDialog("Error", "Fail to predict image, GraphRuntime exception");
+                showDialog("Error", "Fail to predict image, GraphExecutor exception");
             }
         }
     }
@@ -351,8 +351,8 @@ protected void onPostExecute(Integer status) {
     @Override
     protected void onDestroy() {
         // release tvm local variables
-        if (null != graphRuntimeModule)
-            graphRuntimeModule.release();
+        if (null != graphExecutorModule)
+            graphExecutorModule.release();
         super.onDestroy();
     }
 
diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
index a89475440714..725b5e1d3b7a 100644
--- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
@@ -32,7 +32,7 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph/graph_runtime.cc"
+#include "../src/runtime/graph_executor/graph_executor.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/logging.cc"
 #include "../src/runtime/module.cc"
diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
index 9503f02f39ef..5dcd823929ca 100644
--- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
@@ -37,8 +37,8 @@
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/dso_library.cc"
 #include "../src/runtime/file_utils.cc"
-#include "../src/runtime/graph/graph_runtime.cc"
-#include "../src/runtime/graph/graph_runtime_factory.cc"
+#include "../src/runtime/graph_executor/graph_executor.cc"
+#include "../src/runtime/graph_executor/graph_executor_factory.cc"
 #include "../src/runtime/library_module.cc"
 #include "../src/runtime/logging.cc"
 #include "../src/runtime/module.cc"
diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py
index 915f2303b9ed..656735ec6c05 100644
--- a/apps/benchmark/arm_cpu_imagenet_bench.py
+++ b/apps/benchmark/arm_cpu_imagenet_bench.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import te
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 from tvm import relay
 
 from util import get_network, print_progress
diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py
index 6d91aff74fde..6407f766cb76 100644
--- a/apps/benchmark/gpu_imagenet_bench.py
+++ b/apps/benchmark/gpu_imagenet_bench.py
@@ -24,7 +24,7 @@
 
 import tvm
 from tvm import te
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 from tvm import relay
 
 from util import get_network
diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py
index 3144aee080dc..4eff259875ca 100644
--- a/apps/benchmark/mobile_gpu_imagenet_bench.py
+++ b/apps/benchmark/mobile_gpu_imagenet_bench.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import te
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 from tvm import relay
 
 from util import get_network, print_progress
diff --git a/apps/bundle_deploy/Makefile b/apps/bundle_deploy/Makefile
index 8e23a92afa93..b533030c4b82 100644
--- a/apps/bundle_deploy/Makefile
+++ b/apps/bundle_deploy/Makefile
@@ -84,8 +84,8 @@ test_static: $(build_dir)/test_static $(build_dir)/test_data_c.bin $(build_dir)/
 $(build_dir)/crt/libcommon.a: $(CRT_SRCS)
 	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" common
 
-$(build_dir)/crt/libgraph_runtime.a: $(CRT_SRCS)
-	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" graph_runtime
+$(build_dir)/crt/libgraph_executor.a: $(CRT_SRCS)
+	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" graph_executor
 
 $(build_dir)/crt/libmemory.a: $(CRT_SRCS)
 	$(QUIET)cd $(CRT_ROOT) && make QUIET= BUILD_DIR=$(abspath $(build_dir))/crt CRT_CONFIG=$(abspath crt_config/crt_config.h) "EXTRA_CFLAGS=$(PKG_COMPILE_OPTS)" memory
@@ -98,11 +98,11 @@ $(build_dir)/test_dynamic: test.cc ${build_dir}/test_graph_c.json ${build_dir}/t
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)g++ $(PKG_CXXFLAGS) -o $@ test.cc $(BACKTRACE_OBJS) $(BACKTRACE_LDFLAGS)
 
-$(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a ${build_dir}/graph_c.json.c ${build_dir}/params_c.bin.c $(BACKTRACE_OBJS)
+$(build_dir)/demo_static: demo_static.c ${build_dir}/bundle_static.o $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a ${build_dir}/graph_c.json.c ${build_dir}/params_c.bin.c $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS)
 
-$(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
+$(build_dir)/test_static: test_static.c ${build_dir}/bundle_static.o $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc $(PKG_CFLAGS) -o $@ $^ $(BACKTRACE_LDFLAGS)
 
@@ -140,7 +140,7 @@ $(build_dir)/bundle.so: bundle.cc runtime.cc $(build_dir)/model_cpp.o
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)g++ -shared $(PKG_CXXFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS)
 
-$(build_dir)/bundle_c.so: bundle.c $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
+$(build_dir)/bundle_c.so: bundle.c $(MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc -shared $(PKG_CFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS)
 
@@ -148,7 +148,7 @@ $(build_dir)/test_bundle.so: bundle.cc runtime.cc $(build_dir)/test_model_cpp.o
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)g++ -shared $(PKG_CXXFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS)
 
-$(build_dir)/test_bundle_c.so: bundle.c $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_runtime.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
+$(build_dir)/test_bundle_c.so: bundle.c $(TEST_MODEL_OBJ) ${build_dir}/crt/libmemory.a ${build_dir}/crt/libgraph_executor.a ${build_dir}/crt/libcommon.a $(BACKTRACE_OBJS)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)gcc -shared $(PKG_CFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS) $(BACKTRACE_LDFLAGS) $(BACKTRACE_CFLAGS)
 
diff --git a/apps/bundle_deploy/README.md b/apps/bundle_deploy/README.md
index a52d3a78f9c9..619a2d7d05cc 100644
--- a/apps/bundle_deploy/README.md
+++ b/apps/bundle_deploy/README.md
@@ -20,9 +20,9 @@ How to Bundle TVM Modules
 =========================
 
 This folder contains an example on how to bundle a TVM module (with the required
-interpreter runtime modules such as `runtime::GraphRuntime`, the graph JSON, and
+interpreter runtime modules such as `runtime::GraphExecutor`, the graph JSON, and
 the params) into a single, self-contained shared object (`bundle.so`) which
-exposes a C API wrapping the appropriate `runtime::GraphRuntime` instance.
+exposes a C API wrapping the appropriate `runtime::GraphExecutor` instance.
 
 This is useful for cases where we'd like to avoid deploying the TVM runtime
 components to the target host in advance - instead, we simply deploy the bundled
@@ -49,8 +49,8 @@ This will:
 - Build a `bundle.so` shared object containing the model specification and
   parameters
 - Build a `demo_dynamic` executable that `dlopen`'s `bundle.so` (or `bundle_c.so` in 
-  terms of the MISRA-C runtime), instantiates the contained graph runtime,
-  and invokes the `GraphRuntime::Run` function on a cat image, then prints
+  terms of the MISRA-C runtime), instantiates the contained graph executor,
+  and invokes the `GraphExecutor::Run` function on a cat image, then prints
   the output results.
 
 Type the following command to run the sample code with static linking.
diff --git a/apps/bundle_deploy/bundle.c b/apps/bundle_deploy/bundle.c
index 84740aa25130..9083f7b5f48b 100644
--- a/apps/bundle_deploy/bundle.c
+++ b/apps/bundle_deploy/bundle.c
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/crt/crt.h>
-#include <tvm/runtime/crt/graph_runtime.h>
+#include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/packed_func.h>
 
@@ -75,30 +75,30 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
 
   // run modules
-  TVMGraphRuntime* graph_runtime = NULL;
-  TVM_CCALL(TVMGraphRuntime_Create(json_data, mod_syslib, &dev, &graph_runtime));
-  TVM_CCALL(TVMGraphRuntime_LoadParams(graph_runtime, params.data, params.size));
+  TVMGraphExecutor* graph_executor = NULL;
+  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_executor));
+  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_executor, params.data, params.size));
 
-  return graph_runtime;
+  return graph_executor;
 }
 
-TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphRuntime_Release((TVMGraphRuntime**)&runtime);
+TVM_DLL void tvm_runtime_destroy(void* executor) {
+  TVMGraphExecutor_Release((TVMGraphExecutor**)&executor);
 }
 
-TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_SetInput(graph_runtime, name, tensor);
+TVM_DLL void tvm_runtime_set_input(void* executor, const char* name, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_SetInput(graph_executor, name, tensor);
 }
 
-TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_Run(graph_runtime);
+TVM_DLL void tvm_runtime_run(void* executor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_Run(graph_executor);
 }
 
-TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_GetOutput(graph_runtime, index, tensor);
+TVM_DLL void tvm_runtime_get_output(void* executor, int32_t index, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_GetOutput(graph_executor, index, tensor);
 }
 
 void TVMLogf(const char* msg, ...) {
diff --git a/apps/bundle_deploy/bundle.cc b/apps/bundle_deploy/bundle.cc
index e3cc7d1730ce..435d0e41f3db 100644
--- a/apps/bundle_deploy/bundle.cc
+++ b/apps/bundle_deploy/bundle.cc
@@ -35,7 +35,7 @@ TVM_BUNDLE_FUNCTION void* tvm_runtime_create(const char* build_graph_json,
   int device_type = kDLCPU;
   int device_id = 0;
 
-  tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))(
+  tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(
       json_data, mod_syslib, device_type, device_id);
   TVMByteArray params;
   params.data = reinterpret_cast<const char*>(&build_params_bin[0]);
diff --git a/apps/bundle_deploy/bundle_static.c b/apps/bundle_deploy/bundle_static.c
index ca75b9e0b2e3..62e63d6b4fe2 100644
--- a/apps/bundle_deploy/bundle_static.c
+++ b/apps/bundle_deploy/bundle_static.c
@@ -21,7 +21,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <tvm/runtime/crt/crt.h>
-#include <tvm/runtime/crt/graph_runtime.h>
+#include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/packed_func.h>
 #include <unistd.h>
@@ -75,31 +75,31 @@ TVM_DLL void* tvm_runtime_create(const char* json_data, const char* params_data,
   TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
 
   // run modules
-  TVMGraphRuntime* graph_runtime = NULL;
-  TVM_CCALL(TVMGraphRuntime_Create(json_data, mod_syslib, &dev, &graph_runtime));
-  TVM_CCALL(TVMGraphRuntime_LoadParams(graph_runtime, params.data, params.size));
+  TVMGraphExecutor* graph_executor = NULL;
+  TVM_CCALL(TVMGraphExecutor_Create(json_data, mod_syslib, &dev, &graph_executor));
+  TVM_CCALL(TVMGraphExecutor_LoadParams(graph_executor, params.data, params.size));
 
-  return graph_runtime;
+  return graph_executor;
 }
 
-TVM_DLL void tvm_runtime_destroy(void* runtime) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_Release(&graph_runtime);
+TVM_DLL void tvm_runtime_destroy(void* executor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_Release(&graph_executor);
 }
 
-TVM_DLL void tvm_runtime_set_input(void* runtime, const char* name, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_SetInput(graph_runtime, name, tensor);
+TVM_DLL void tvm_runtime_set_input(void* executor, const char* name, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_SetInput(graph_executor, name, tensor);
 }
 
-TVM_DLL void tvm_runtime_run(void* runtime) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_Run(graph_runtime);
+TVM_DLL void tvm_runtime_run(void* executor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_Run(graph_executor);
 }
 
-TVM_DLL void tvm_runtime_get_output(void* runtime, int32_t index, DLTensor* tensor) {
-  TVMGraphRuntime* graph_runtime = (TVMGraphRuntime*)runtime;
-  TVMGraphRuntime_GetOutput(graph_runtime, index, tensor);
+TVM_DLL void tvm_runtime_get_output(void* executor, int32_t index, DLTensor* tensor) {
+  TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)executor;
+  TVMGraphExecutor_GetOutput(graph_executor, index, tensor);
 }
 
 void TVMLogf(const char* msg, ...) {
diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc
index 2f7e3848b4bf..7a2573b643f5 100644
--- a/apps/bundle_deploy/runtime.cc
+++ b/apps/bundle_deploy/runtime.cc
@@ -26,7 +26,7 @@
 #include "../../src/runtime/container.cc"
 #include "../../src/runtime/cpu_device_api.cc"
 #include "../../src/runtime/file_utils.cc"
-#include "../../src/runtime/graph/graph_runtime.cc"
+#include "../../src/runtime/graph_executor/graph_executor.cc"
 #include "../../src/runtime/library_module.cc"
 #include "../../src/runtime/module.cc"
 #include "../../src/runtime/ndarray.cc"
diff --git a/apps/howto_deploy/cpp_deploy.cc b/apps/howto_deploy/cpp_deploy.cc
index f58648c2fb7a..8500ddb5fabe 100644
--- a/apps/howto_deploy/cpp_deploy.cc
+++ b/apps/howto_deploy/cpp_deploy.cc
@@ -83,12 +83,12 @@ void DeploySingleOp() {
   Verify(mod_syslib, "addonesys");
 }
 
-void DeployGraphRuntime() {
-  LOG(INFO) << "Running graph runtime...";
+void DeployGraphExecutor() {
+  LOG(INFO) << "Running graph executor...";
   // load in the library
   DLDevice dev{kDLCPU, 0};
   tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile("lib/test_relay_add.so");
-  // create the graph runtime module
+  // create the graph executor module
   tvm::runtime::Module gmod = mod_factory.GetFunction("default")(dev);
   tvm::runtime::PackedFunc set_input = gmod.GetFunction("set_input");
   tvm::runtime::PackedFunc get_output = gmod.GetFunction("get_output");
@@ -119,6 +119,6 @@ void DeployGraphRuntime() {
 
 int main(void) {
   DeploySingleOp();
-  DeployGraphRuntime();
+  DeployGraphExecutor();
   return 0;
 }
diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc
index d6dd5876a994..c8778a380233 100644
--- a/apps/howto_deploy/tvm_runtime_pack.cc
+++ b/apps/howto_deploy/tvm_runtime_pack.cc
@@ -58,9 +58,9 @@
 #include "../../src/runtime/dso_library.cc"
 #include "../../src/runtime/system_library.cc"
 
-// Graph runtime
-#include "../../src/runtime/graph/graph_runtime.cc"
-#include "../../src/runtime/graph/graph_runtime_factory.cc"
+// Graph executor
+#include "../../src/runtime/graph_executor/graph_executor.cc"
+#include "../../src/runtime/graph_executor/graph_executor_factory.cc"
 
 // Uncomment the following lines to enable RPC
 // #include "../../src/runtime/rpc/rpc_session.cc"
diff --git a/apps/ios_rpc/tests/ios_rpc_mobilenet.py b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
index 50468d301134..ee6ab5fd8363 100644
--- a/apps/ios_rpc/tests/ios_rpc_mobilenet.py
+++ b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
@@ -22,7 +22,7 @@
 from tvm.relay import transform
 from tvm.relay.op.annotation import compiler_begin, compiler_end
 from tvm.relay.quantize.quantize import prerequisite_optimize
-from tvm.contrib import utils, xcode, graph_runtime, coreml_runtime
+from tvm.contrib import utils, xcode, graph_executor, coreml_runtime
 from tvm.contrib.target import coreml as _coreml
 
 import os
@@ -120,7 +120,7 @@ def run(mod, target):
         else:
             dev = remote.cpu(0)
         lib = remote.load_module("deploy.dylib")
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
 
         m.set_input("data", tvm.nd.array(image, dev))
         m.run()
diff --git a/apps/ios_rpc/tvmrpc/TVMRuntime.mm b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
index 87cb6f9b4c69..7ab9a4d2d219 100644
--- a/apps/ios_rpc/tvmrpc/TVMRuntime.mm
+++ b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
@@ -45,8 +45,8 @@
 #include "../../../src/runtime/rpc/rpc_server_env.cc"
 #include "../../../src/runtime/rpc/rpc_session.cc"
 #include "../../../src/runtime/rpc/rpc_socket_impl.cc"
-// Graph runtime
-#include "../../../src/runtime/graph/graph_runtime.cc"
+// Graph executor
+#include "../../../src/runtime/graph_executor/graph_executor.cc"
 // Metal
 #include "../../../src/runtime/metal/metal_device_api.mm"
 #include "../../../src/runtime/metal/metal_module.mm"
diff --git a/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh b/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
index 4672012e73f2..2eb55e385520 100755
--- a/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
+++ b/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
@@ -28,7 +28,7 @@ fi
 cp cmake/config.cmake "${BUILD_DIR}"
 cd "${BUILD_DIR}"
 sed -i 's/USE_MICRO OFF/USE_MICRO ON/' config.cmake
-sed -i 's/USE_GRAPH_RUNTIME_DEBUG OFF/USE_GRAPH_RUNTIME_DEBUG ON/' config.cmake
+sed -i 's/USE_GRAPH_EXECUTOR_DEBUG OFF/USE_GRAPH_EXECUTOR_DEBUG ON/' config.cmake
 sed -i 's/USE_LLVM OFF/USE_LLVM ON/' config.cmake
 cmake ..
 rm -rf standalone_crt host_standalone_crt  # remove stale generated files
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 98d1d9780103..7b29df648ac7 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -96,13 +96,13 @@ set(USE_CPP_RPC OFF)
 # Whether embed stackvm into the runtime
 set(USE_STACKVM_RUNTIME OFF)
 
-# Whether enable tiny embedded graph runtime.
-set(USE_GRAPH_RUNTIME ON)
+# Whether enable tiny embedded graph executor.
+set(USE_GRAPH_EXECUTOR ON)
 
-# Whether enable tiny graph runtime with CUDA Graph
-set(USE_GRAPH_RUNTIME_CUDA_GRAPH OFF)
+# Whether enable tiny graph executor with CUDA Graph
+set(USE_GRAPH_EXECUTOR_CUDA_GRAPH OFF)
 
-# Whether to enable the profiler for the graph runtime and vm
+# Whether to enable the profiler for the graph executor and vm
 set(USE_PROFILER ON)
 
 # Whether enable uTVM standalone runtime
@@ -207,10 +207,10 @@ set(USE_DNNL_CODEGEN OFF)
 #
 # USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported
 #                       operators to Arm Compute Library. OFF/ON
-# USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME - Run Arm Compute Library annotated functions via the ACL
+# USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR - Run Arm Compute Library annotated functions via the ACL
 #                                     runtime. OFF/ON/"path/to/ACL"
 set(USE_ARM_COMPUTE_LIB OFF)
-set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME OFF)
+set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR OFF)
 
 # Whether to build with Arm Ethos-N support
 # Possible values:
diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index 262a4e6e7123..1bdc5036f857 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -65,15 +65,15 @@ if(USE_CUDA)
     list(APPEND RUNTIME_SRCS ${CONTRIB_THRUST_SRC})
   endif(USE_THRUST)
 
-  if(USE_GRAPH_RUNTIME_CUDA_GRAPH)
-    if(NOT USE_GRAPH_RUNTIME)
-      message(FATAL_ERROR "CUDA Graph is only supported by graph runtime, please set USE_GRAPH_RUNTIME=ON")
+  if(USE_GRAPH_EXECUTOR_CUDA_GRAPH)
+    if(NOT USE_GRAPH_EXECUTOR)
+      message(FATAL_ERROR "CUDA Graph is only supported by graph executor, please set USE_GRAPH_EXECUTOR=ON")
     endif()
     if(CUDAToolkit_VERSION_MAJOR LESS "10")
       message(FATAL_ERROR "CUDA Graph requires CUDA 10 or above, got=" ${CUDAToolkit_VERSION})
     endif()
-    message(STATUS "Build with Graph runtime with CUDA Graph support...")
-    file(GLOB RUNTIME_CUDA_GRAPH_SRCS src/runtime/graph/cuda_graph/*.cc)
+    message(STATUS "Build with Graph executor with CUDA Graph support...")
+    file(GLOB RUNTIME_CUDA_GRAPH_SRCS src/runtime/graph_executor/cuda_graph/*.cc)
     list(APPEND RUNTIME_SRCS ${RUNTIME_CUDA_GRAPH_SRCS})
   endif()
 else(USE_CUDA)
diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
index 131dceeb345d..2a69d06970a8 100644
--- a/cmake/modules/LibInfo.cmake
+++ b/cmake/modules/LibInfo.cmake
@@ -42,8 +42,8 @@ function(add_lib_info src_file)
     TVM_INFO_USE_LLVM="${USE_LLVM}"
     TVM_INFO_LLVM_VERSION="${TVM_INFO_LLVM_VERSION}"
     TVM_INFO_USE_STACKVM_RUNTIME="${USE_STACKVM_RUNTIME}"
-    TVM_INFO_USE_GRAPH_RUNTIME="${USE_GRAPH_RUNTIME}"
-    TVM_INFO_USE_GRAPH_RUNTIME_DEBUG="${USE_GRAPH_RUNTIME_DEBUG}"
+    TVM_INFO_USE_GRAPH_EXECUTOR="${USE_GRAPH_EXECUTOR}"
+    TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG="${USE_GRAPH_EXECUTOR_DEBUG}"
     TVM_INFO_USE_OPENMP="${USE_OPENMP}"
     TVM_INFO_USE_RELAY_DEBUG="${USE_RELAY_DEBUG}"
     TVM_INFO_USE_RTTI="${USE_RTTI}"
@@ -73,7 +73,7 @@ function(add_lib_info src_file)
     TVM_INFO_USE_COREML="${USE_COREML}"
     TVM_INFO_USE_TARGET_ONNX="${USE_TARGET_ONNX}"
     TVM_INFO_USE_ARM_COMPUTE_LIB="${USE_ARM_COMPUTE_LIB}"
-    TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME="${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME}"
+    TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR="${USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR}"
     TVM_INFO_INDEX_DEFAULT_I64="${INDEX_DEFAULT_I64}"
     TVM_CXX_COMPILER_PATH="${CMAKE_CXX_COMPILER}"
   )
diff --git a/cmake/modules/StandaloneCrt.cmake b/cmake/modules/StandaloneCrt.cmake
index dc1b3b2665f2..fe6baf81c3e5 100644
--- a/cmake/modules/StandaloneCrt.cmake
+++ b/cmake/modules/StandaloneCrt.cmake
@@ -43,8 +43,8 @@ if(USE_MICRO)
          "src/runtime/crt Makefile -> ."
          "src/runtime/crt/include *.h -> include"
          "src/runtime/crt/common *.c -> src/runtime/crt/common"
-         "src/runtime/crt/graph_runtime *.c -> src/runtime/crt/graph_runtime"
-         "src/runtime/crt/graph_runtime_module *.c -> src/runtime/crt/graph_runtime_module"
+         "src/runtime/crt/graph_executor *.c -> src/runtime/crt/graph_executor"
+         "src/runtime/crt/graph_executor_module *.c -> src/runtime/crt/graph_executor_module"
          "src/runtime/crt/host crt_config.h -> template/host"
          "src/runtime/crt/host *.cc -> template/host"
          "src/runtime/crt/memory *.c -> src/runtime/crt/memory"
@@ -97,7 +97,7 @@ if(USE_MICRO)
     set(make_quiet )
     endif(${VERBOSE})
 
-    list(APPEND crt_libraries memory graph_runtime utvm_rpc_server utvm_rpc_common common)  # NOTE: listed in link order.
+    list(APPEND crt_libraries memory graph_executor utvm_rpc_server utvm_rpc_common common)  # NOTE: listed in link order.
     foreach(crt_lib_name IN LISTS crt_libraries)
       list(APPEND crt_library_paths "host_standalone_crt/lib${crt_lib_name}.a")
     endforeach()
diff --git a/cmake/modules/contrib/ArmComputeLib.cmake b/cmake/modules/contrib/ArmComputeLib.cmake
index ba082505125b..54ce917dfb50 100644
--- a/cmake/modules/contrib/ArmComputeLib.cmake
+++ b/cmake/modules/contrib/ArmComputeLib.cmake
@@ -23,17 +23,25 @@ if(USE_ARM_COMPUTE_LIB)
     file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc)
     file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc)
     list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
-    if(NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
+
+    if(NOT USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
         list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
     endif()
     message(STATUS "Build with Arm Compute Library support...")
 endif()
 
-if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
+if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME AND NOT DEFINED USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
+    message(WARNING "USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME renamed to USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR. "
+                    "Please update your config.cmake")
+    set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
+    unset(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME CACHE)
+endif(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME AND NOT DEFINED USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
+
+if(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR)
     set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
     # Detect custom ACL path.
-    if (NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME STREQUAL "ON")
-        set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
+    if (NOT USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR STREQUAL "ON")
+        set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR})
     endif()
 
     file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*)
@@ -60,11 +68,11 @@ if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB})
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
     list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC})
-    message(STATUS "Build with Arm Compute Library graph runtime support: "
+    message(STATUS "Build with Arm Compute Library graph executor support: "
             ${EXTERN_ACL_COMPUTE_LIB} ", \n"
             ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n"
             ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
 
-    # Set flag to detect ACL graph runtime support.
-    add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB)
+    # Set flag to detect ACL graph executor support.
+    add_definitions(-DTVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB)
 endif()
diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake
index 0c7e43c0fcf8..218f0b2e20fe 100644
--- a/cmake/modules/contrib/TensorRT.cmake
+++ b/cmake/modules/contrib/TensorRT.cmake
@@ -55,5 +55,5 @@ if(USE_TENSORRT_RUNTIME)
     list(APPEND RUNTIME_SRCS ${RUNTIME_TENSORRT_SRCS})
 
     # Set defines
-    add_definitions(-DTVM_GRAPH_RUNTIME_TENSORRT)
+    add_definitions(-DTVM_GRAPH_EXECUTOR_TENSORRT)
 endif()
diff --git a/conda/recipe/bld.bat b/conda/recipe/bld.bat
index 9fc0469febc6..e877b8fda1e1 100644
--- a/conda/recipe/bld.bat
+++ b/conda/recipe/bld.bat
@@ -28,7 +28,7 @@ cmake ^
       -DUSE_CPP_RPC=ON ^
       -DUSE_SORT=ON ^
       -DUSE_RANDOM=ON ^
-      -DUSE_GRAPH_RUNTIME_DEBUG=ON ^
+      -DUSE_GRAPH_EXECUTOR_DEBUG=ON ^
       -DINSTALL_DEV=ON ^
       %SRC_DIR%
 
diff --git a/conda/recipe/build.sh b/conda/recipe/build.sh
index 828e3c39488a..a94b9df72440 100755
--- a/conda/recipe/build.sh
+++ b/conda/recipe/build.sh
@@ -49,7 +49,7 @@ cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
       -DUSE_CPP_RPC=OFF \
       -DUSE_SORT=ON \
       -DUSE_RANDOM=ON \
-      -DUSE_GRAPH_RUNTIME_DEBUG=ON \
+      -DUSE_GRAPH_EXECUTOR_DEBUG=ON \
       -DUSE_LLVM=ON \
       -DINSTALL_DEV=ON \
       -DUSE_LIBBACKTRACE=AUTO \
diff --git a/docker/Dockerfile.demo_android b/docker/Dockerfile.demo_android
index 039439a937e9..f56f56728e70 100644
--- a/docker/Dockerfile.demo_android
+++ b/docker/Dockerfile.demo_android
@@ -61,7 +61,7 @@ RUN cd /usr && \
         -DUSE_LLVM=llvm-config-8 \
         -DUSE_RPC=ON \
         -DUSE_SORT=ON \
-        -DUSE_GRAPH_RUNTIME=ON \
+        -DUSE_GRAPH_EXECUTOR=ON \
         -DUSE_VULKAN=ON \
         .. && \
     make -j10
diff --git a/docker/install/install_tvm_cpu.sh b/docker/install/install_tvm_cpu.sh
index c3a15fa26b6d..48e6df3597db 100755
--- a/docker/install/install_tvm_cpu.sh
+++ b/docker/install/install_tvm_cpu.sh
@@ -27,7 +27,7 @@ cd /usr/tvm
 git checkout 4b13bf668edc7099b38d463e5db94ebc96c80470
 
 echo set\(USE_LLVM llvm-config-8\) >> config.cmake
-echo set\(USE_GRAPH_RUNTIME ON\) >> config.cmake
+echo set\(USE_GRAPH_EXECUTOR ON\) >> config.cmake
 echo set\(USE_BLAS openblas\) >> config.cmake
 mkdir -p build
 cd build
diff --git a/docs/api/python/graph_runtime.rst b/docs/api/python/graph_executor.rst
similarity index 92%
rename from docs/api/python/graph_runtime.rst
rename to docs/api/python/graph_executor.rst
index d82c7ce00e2e..3f8811553ba4 100644
--- a/docs/api/python/graph_runtime.rst
+++ b/docs/api/python/graph_executor.rst
@@ -15,7 +15,7 @@
     specific language governing permissions and limitations
     under the License.
 
-tvm.contrib.graph_runtime
+tvm.contrib.graph_executor
 -------------------------
-.. automodule:: tvm.contrib.graph_runtime
+.. automodule:: tvm.contrib.graph_executor
     :members:
diff --git a/docs/api/python/index.rst b/docs/api/python/index.rst
index a6179684413d..76322a1acfe2 100644
--- a/docs/api/python/index.rst
+++ b/docs/api/python/index.rst
@@ -44,6 +44,6 @@ Python API
    rpc
    micro
    contrib
-   graph_runtime
+   graph_executor
    topi
    vta/index
diff --git a/docs/api/python/relay/backend.rst b/docs/api/python/relay/backend.rst
index c30f226e8437..ffe8a9a8ce79 100644
--- a/docs/api/python/relay/backend.rst
+++ b/docs/api/python/relay/backend.rst
@@ -26,7 +26,7 @@ tvm.relay.backend
 .. automodule:: tvm.relay.backend.compile_engine
     :members:
 
-.. automodule:: tvm.relay.backend.graph_runtime_codegen
+.. automodule:: tvm.relay.backend.graph_executor_codegen
     :members:
 
 .. automodule:: tvm.relay.backend.vm
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
index 10a0d51e4f91..4e43682a240a 100644
--- a/docs/deploy/arm_compute_lib.rst
+++ b/docs/deploy/arm_compute_lib.rst
@@ -52,7 +52,7 @@ We recommend two different ways to build and install ACL:
       mv ./linux-<architecture-to-build-for>-neon/* .
 
 
-In both cases you will need to set USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME to the path where the ACL package
+In both cases you will need to set USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR to the path where the ACL package
 is located. Cmake will look in /path-to-acl/ along with /path-to-acl/lib and /path-to-acl/build for the
 required binaries. See the section below for more information on how to use these configuration options.
 
@@ -64,15 +64,15 @@ because ACL cannot be used on an x86 machine. However, we still want to be able
 runtime module on an x86 machine.
 
 * USE_ARM_COMPUTE_LIB=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
-* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to
+* USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR=ON/OFF/path-to-acl - Enabling this flag will allow the graph executor to
   compute the ACL offloaded functions.
 
 These flags can be used in different scenarios depending on your setup. For example, if you want
 to compile an ACL module on an x86 machine and then run the module on a remote Arm device via RPC, you will
-need to use USE_ARM_COMPUTE_LIB=ON on the x86 machine and USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON on the remote
+need to use USE_ARM_COMPUTE_LIB=ON on the x86 machine and USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR=ON on the remote
 AArch64 device.
 
-By default both options are set to OFF. Using USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON will mean that ACL
+By default both options are set to OFF. Using USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR=ON will mean that ACL
 binaries are searched for by cmake in the default locations
 (see https://cmake.org/cmake/help/v3.4/command/find_library.html). In addition to this,
 /path-to-tvm-project/acl/ will also be searched. It is likely that you will need to set your own path to
@@ -83,7 +83,7 @@ These flags should be set in your config.cmake file. For example:
 .. code:: cmake
 
     set(USE_ARM_COMPUTE_LIB ON)
-    set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME /path/to/acl)
+    set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR /path/to/acl)
 
 
 Usage
@@ -150,7 +150,7 @@ https://tvm.apache.org/docs/tutorials/get_started/cross_compilation_and_rpc.html
 
     dev = tvm.cpu(0)
     loaded_lib = tvm.runtime.load_module('lib_acl.so')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](dev))
+    gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
     d_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     map_inputs = {'data': d_data}
     gen_module.set_input(**map_inputs)
diff --git a/docs/deploy/bnns.rst b/docs/deploy/bnns.rst
index 6e20f3c2cdbb..7b62fb15a617 100644
--- a/docs/deploy/bnns.rst
+++ b/docs/deploy/bnns.rst
@@ -145,11 +145,11 @@ Load module and run inference on the target machine with TVM  built with ``USE_B
 
     import tvm
     import numpy as np
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
     dev = tvm.cpu(0)
     loaded_lib = tvm.runtime.load_module('compiled.dylib')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](dev))
+    gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
 
     dtype = "float32"
     input_shape = (1, 3, 224, 224)
diff --git a/docs/deploy/tensorrt.rst b/docs/deploy/tensorrt.rst
index 0732a32c01bf..308db4933ae8 100644
--- a/docs/deploy/tensorrt.rst
+++ b/docs/deploy/tensorrt.rst
@@ -126,7 +126,7 @@ have to be built.
 
     dev = tvm.gpu(0)
     loaded_lib = tvm.runtime.load_module('compiled.so')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](dev))
+    gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
     input_data = np.random.uniform(0, 1, input_shape).astype(dtype)
     gen_module.run(data=input_data)
 
diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst
index fc887dae968c..1ce89ebed9c2 100755
--- a/docs/deploy/vitis_ai.rst
+++ b/docs/deploy/vitis_ai.rst
@@ -449,7 +449,7 @@ TVM.
    import tvm
    import tvm.relay as relay
    from tvm.contrib.target import vitis_ai
-   from tvm.contrib import utils, graph_runtime
+   from tvm.contrib import utils, graph_executor
    from tvm.relay.build_module import bind_params_by_name
    from tvm.relay.op.contrib.vitis_ai import annotation
 
@@ -490,7 +490,7 @@ will take a substantial amount of time.
 
 .. code:: python
 
-   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
 
    # First N (default = 128) inputs are used for quantization calibration and will
    # be executed on the CPU
@@ -520,7 +520,7 @@ Load the module from compiled files and run inference
    # load the module into memory
    loaded_lib = tvm.runtime.load_module(lib_path)
 
-   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
    module.set_input(name, data)
    module.run()
 
@@ -551,7 +551,7 @@ TVM.
    import tvm
    import tvm.relay as relay
    from tvm.contrib.target import vitis_ai
-   from tvm.contrib import utils, graph_runtime
+   from tvm.contrib import utils, graph_executor
    from tvm.relay.build_module import bind_params_by_name
    from tvm.relay.op.contrib.vitis_ai import annotation
 
@@ -631,7 +631,7 @@ quantization on the host machine. This makes use of TVM inference calls
 
 .. code:: python
 
-   module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+   module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
 
    # First N (default = 128) inputs are used for quantization calibration and will
    # be executed on the CPU
@@ -694,7 +694,7 @@ as root (execute ``su`` in terminal to log into root).
 
    import pyxir
    import tvm
-   from tvm.contrib import graph_runtime
+   from tvm.contrib import graph_executor
 
    dev = tvm.cpu()
    
@@ -704,6 +704,6 @@ as root (execute ``su`` in terminal to log into root).
    # load the module into memory
    lib = tvm.runtime.load_module("tvm_dpu_arm.so")
 
-   module = graph_runtime.GraphModule(lib["default"](dev))
+   module = graph_executor.GraphModule(lib["default"](dev))
    module.set_input(input_name, input_data)
    module.run()
diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst
index 509cfd306a4a..f1bd004717b4 100644
--- a/docs/dev/debugger.rst
+++ b/docs/dev/debugger.rst
@@ -123,24 +123,24 @@ Example of loading the parameters
 How to use Debugger?
 ***************************************
 
-1. In ``config.cmake`` set the ``USE_GRAPH_RUNTIME_DEBUG`` flag to ``ON``
+1. In ``config.cmake`` set the ``USE_GRAPH_EXECUTOR_DEBUG`` flag to ``ON``
 
    ::
 
        # Whether enable additional graph debug functions
-       set(USE_GRAPH_RUNTIME_DEBUG ON)
+       set(USE_GRAPH_EXECUTOR_DEBUG ON)
 
 2. Do 'make' tvm, so that it will make the ``libtvm_runtime.so``
 
 3. In frontend script file instead of
-   ``from tvm.contrib import graph_runtime`` import the
-   ``debug_runtime``
-   ``from tvm.contrib.debugger import debug_runtime as graph_runtime``
+   ``from tvm.contrib import graph_executor`` import the
+   ``debug_executor``
+   ``from tvm.contrib.debugger import debug_executor as graph_executor``
 
 ::
 
-    from tvm.contrib.debugger import debug_runtime as graph_runtime
-    m = graph_runtime.create(graph, lib, dev, dump_root="/tmp/tvmdbg")
+    from tvm.contrib.debugger import debug_executor as graph_executor
+    m = graph_executor.create(graph, lib, dev, dump_root="/tmp/tvmdbg")
     # set inputs
     m.set_input('data', tvm.nd.array(data.astype(dtype)))
     m.set_input(**params)
diff --git a/docs/dev/index.rst b/docs/dev/index.rst
index 7ceed646087f..c297d32923fe 100644
--- a/docs/dev/index.rst
+++ b/docs/dev/index.rst
@@ -94,7 +94,7 @@ This process helps us to divide the original problem into two sub-problems:
 We use the low-level tir phase to compile and optimize each sub-functions. For specific targets, we may also directly go to the target translation
 phase and use external code generators.
 
-There are a few different ways(in relay/backend) to handle the calls into the overall execution problem. For simple models with known shapes and no control flow, we can lower to a graph runtime that stores the execution structure in a graph. We also support a virtual machine backend for dynamic executions. Finally, we plan to support ahead of time compilation that compiles the high-level execution structure into the executable and generated primitive functions. All of these execution modes are encapsulated by a unified **runtime.Module** interface, which we will discuss in the latter part of the guide.
+There are a few different ways(in relay/backend) to handle the calls into the overall execution problem. For simple models with known shapes and no control flow, we can lower to a graph executor that stores the execution structure in a graph. We also support a virtual machine backend for dynamic executions. Finally, we plan to support ahead of time compilation that compiles the high-level execution structure into the executable and generated primitive functions. All of these execution modes are encapsulated by a unified **runtime.Module** interface, which we will discuss in the latter part of the guide.
 
 **tir/transform** contains transformation passes for TIR level functions. Many tir passes serve the purpose of lowering. For example, there are passes to flatten multi-dimensional access to one-dimensional pointer access, to expand the intrinsics into target-specific ones, and to decorate the function entry to meet the runtime calling convention. Of course, there are also optimizations passes, such as access index simplification and dead code elimination.
 
diff --git a/docs/dev/microtvm_design.rst b/docs/dev/microtvm_design.rst
index 2c3eeb2faea3..885ef2c8fc0d 100644
--- a/docs/dev/microtvm_design.rst
+++ b/docs/dev/microtvm_design.rst
@@ -68,7 +68,7 @@ The parts of this process are described below:
 
 #. **Deployment**. The project is built and the residual firmware binary is flashed onto the device.
    Model inference is driven either by TVM using an on-device RPC server, or on the device using the
-   on-device Graph Runtime.
+   on-device Graph Executor.
 
 Design Goals
 ============
@@ -189,14 +189,14 @@ The TVM compiler traditionally outputs three pieces:
 2. A model execution graph, encoded as JSON; and
 3. Simplified parameters.
 
-To correctly execute the model, a Graph Runtime needs to reconstruct the graph in memory, load the
+To correctly execute the model, a Graph Executor needs to reconstruct the graph in memory, load the
 parameters, and then invoke the operator implementations in the correct order.
 
 microTVM supports two ways to do this:
 
-1. **Host-Driven**. The Graph Runtime can run on the host and carry out execution by issuing
+1. **Host-Driven**. The Graph Executor can run on the host and carry out execution by issuing
    commands to the device using an RPC link with a UART-like transport.
-2. **Standalone**. A C Graph Runtime is available to be compiled on-device, but it is not
+2. **Standalone**. A C Graph Executor is available to be compiled on-device, but it is not
    particularly memory efficient. This way enables standalone execution without any attached host.
 
 Host-Driven is designed for experimenting with models on-device and, like AutoTVM, uses the RPC server to
@@ -213,8 +213,8 @@ In Host-Driven execution, the firmware binary is the following:
 4. The TVM RPC server.
 5. (optional) Simplified Parameters.
 
-This firmware image is flashed onto the device and a GraphRuntime instance is created on the host.
-The GraphRuntime drives execution by sending RPC commands over a UART:
+This firmware image is flashed onto the device and a GraphExecutor instance is created on the host.
+The GraphExecutor drives execution by sending RPC commands over a UART:
 
 .. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/dev/microtvm_host_driven.svg
    :align: center
@@ -223,7 +223,7 @@ The GraphRuntime drives execution by sending RPC commands over a UART:
 Standalone Execution
 ^^^^^^^^^^^^^^^^^^^^
 
-In Standalone execution, the GraphRuntime is instantiated on device:
+In Standalone execution, the GraphExecutor is instantiated on device:
 
 .. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/dev/microtvm_standalone.svg
    :align: center
@@ -248,7 +248,7 @@ When configuring for host-driven inference or AutoTVM, the remaining tasks are w
 When configuring for standalone deployment, the firmware needs to:
 
 1. Instantiate the system library by calling the ``runtime.SystemLib`` PackedFunc.
-2. Instantiate a GraphRuntime passing the system library module.
+2. Instantiate a GraphExecutor passing the system library module.
 3. Configure parameters and inputs as needed.
 4. Run the model.
 
@@ -267,7 +267,7 @@ For Host-driven model execution, firmware also needs:
 
 For Standalone model execution, firmware also needs:
 
-4. The TVM C GraphRuntime library, supplied by TVM as a static library.
+4. The TVM C GraphExecutor library, supplied by TVM as a static library.
 5. The remaining compiler outputs (Simplified Parameters and Graph JSON).
 
 The Automated Build Flow
@@ -323,11 +323,11 @@ Future Work
 Ahead-of-Time Runtime
 ----------------------
 
-A limitation of the Graph Runtime is the amount of memory overhead required in parsing the JSON.
+A limitation of the Graph Executor is the amount of memory overhead required in parsing the JSON.
 The current implementation contributes significantly to the dynamic memory usage of microTVM,
 limiting its utility. An ahead-of-time runtime can avoid the need for any Graph JSON parsing and
 improve inference speed by generating C code to call the generated operator implementations directly
-rather than relying on a data-driven approach with the Graph Runtime.
+rather than relying on a data-driven approach with the Graph Executor.
 
 Memory Planning
 ----------------
diff --git a/docs/dev/virtual_machine.rst b/docs/dev/virtual_machine.rst
index 9081d50b92ef..7826f68b71dd 100644
--- a/docs/dev/virtual_machine.rst
+++ b/docs/dev/virtual_machine.rst
@@ -32,9 +32,9 @@ There are further challenges in compiling dynamic code, such as dynamic scheduli
 fully dynamic tensor shapes, and control flow. The interpreter offers simple solutions
 for these, but none is sufficiently compelling or optimized.
 
-The second execution mechanism is the existing graph runtime. In order to target Relay
+The second execution mechanism is the existing graph executor. In order to target Relay
 programs to this, we compile a small subset of them to the old graph format and execute
-them on the runtime. Graph runtime provides a fast execution experience but only for a very limited
+them on the runtime. Graph executor provides a fast execution experience but only for a very limited
 subset of Relay programs.
 
 An alternative but not-standard approach is Relay's ahead-of-time compiler,
@@ -64,7 +64,7 @@ micro-optimizations present in scalar VMs are dramatically less important.
 
 TVM has provided strong support for vision models,
 but we want to grow to support a wider variety of models.
-The graph runtime is able to utilize the fully static nature of the input graphs to perform
+The graph executor is able to utilize the fully static nature of the input graphs to perform
 aggressive optimization such as fully static allocation, and optimal memory reuse.
 When we introduce models which make use of control flow, recursion, dynamic shapes, and dynamic
 allocation, we must change how execution works. A virtual machine for Relay is a natural choice.
@@ -354,7 +354,7 @@ Serialization
 
 Serializing and deserializing the executable generated by the Relay VM compiler is a must as
 we may want to save the model to the disk and perform inference later. Previously, Relay has produced
-a serialized form in a json file for the graph runtime. However, the same format is not directly
+a serialized form in a json file for the graph executor. However, the same format is not directly
 applicable to the VM as it emits bytecode instead of graph-style programs.
 Serialization of an executable essentially needs to handle both model specific
 (i.e. weights and kernels) and VM related (i.e. bytecode and global function names) data.
@@ -376,7 +376,7 @@ components in a binary format that is organized with the following sections in o
 - Code section. The VM functions, including bytecode, are sitting in this section. The dispatching
   loop iterates through this section to fetch instructions for execution.
 
-Hence, unlike the graph runtime artifact that contains weight (.params), graph json (.json),
+Hence, unlike the graph executor artifact that contains weight (.params), graph json (.json),
 and compiled kernel library (.so), the serialized executable artifact is composed of the Relay
 object file (.ro) and the compiled kernel library (.so).
 
diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
index f6be4e31af90..f0ebad1d9edc 100644
--- a/docs/install/from_source.rst
+++ b/docs/install/from_source.rst
@@ -88,7 +88,7 @@ The configuration of TVM can be modified by `config.cmake`.
   - On macOS, for some versions of Xcode, you need to add ``-lc++abi`` in the LDFLAGS or you'll get link errors.
   - Change ``set(USE_CUDA OFF)`` to ``set(USE_CUDA ON)`` to enable CUDA backend. Do the same for other backends and libraries
     you want to build for (OpenCL, RCOM, METAL, VULKAN, ...).
-  - To help with debugging, ensure the embedded graph runtime and debugging functions are enabled with ``set(USE_GRAPH_RUNTIME ON)`` and ``set(USE_GRAPH_RUNTIME_DEBUG ON)``
+  - To help with debugging, ensure the embedded graph executor and debugging functions are enabled with ``set(USE_GRAPH_EXECUTOR ON)`` and ``set(USE_PROFILER ON)``
 
 - TVM requires LLVM for for CPU codegen. We highly recommend you to build with the LLVM support on.
 
diff --git a/golang/sample/complex.go b/golang/sample/complex.go
index bbe74dc85e09..911d0a7a28c1 100644
--- a/golang/sample/complex.go
+++ b/golang/sample/complex.go
@@ -70,13 +70,13 @@ func main() {
     }
     jsonStr := string(bytes)
 
-    // Load module on tvm runtime - call tvm.graph_runtime.create
-    funp, err := gotvm.GetGlobalFunction("tvm.graph_runtime.create")
+    // Load module on tvm runtime - call tvm.graph_executor.create
+    funp, err := gotvm.GetGlobalFunction("tvm.graph_executor.create")
     if err != nil {
         fmt.Print(err)
         return
     }
-    fmt.Printf("Calling tvm.graph_runtime.create\n")
+    fmt.Printf("Calling tvm.graph_executor.create\n")
     // Call function
     graphrt, err := funp.Invoke(jsonStr, modp, (int64)(gotvm.KDLCPU), (int64)(0))
     if err != nil {
@@ -84,7 +84,7 @@ func main() {
         return
     }
     graphmod := graphrt.AsModule()
-    fmt.Printf("Graph runtime Created\n")
+    fmt.Printf("Graph executor Created\n")
 
     // Array allocation attributes
     tshapeIn  := []int64{1, 224, 224, 3}
@@ -105,7 +105,7 @@ func main() {
     }
     fmt.Printf("Input and Output Arrays allocated\n")
 
-    // Get module function from graph runtime : load_params
+    // Get module function from graph executor : load_params
     // Read params
     bytes, err = ioutil.ReadFile(modParams)
     if err != nil {
diff --git a/golang/src/function_test.go b/golang/src/function_test.go
index 17b1c9a6e1c0..0830d16419a2 100644
--- a/golang/src/function_test.go
+++ b/golang/src/function_test.go
@@ -46,7 +46,7 @@ func TestFunctionGlobals(t *testing.T) {
 
 // Check GetFunction API
 func TestFunctionGlobalGet(t *testing.T) {
-    funp, err := GetGlobalFunction("tvm.graph_runtime.create")
+    funp, err := GetGlobalFunction("tvm.graph_executor.create")
     if err != nil {
         t.Error(err.Error())
         return
diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc
index 7dd6dd5e94c5..430e046e39a8 100644
--- a/golang/src/tvm_runtime_pack.cc
+++ b/golang/src/tvm_runtime_pack.cc
@@ -42,8 +42,8 @@
 #include "src/runtime/dso_library.cc"
 #include "src/runtime/system_library.cc"
 
-// Graph runtime
-#include "src/runtime/graph/graph_runtime.cc"
+// Graph executor
+#include "src/runtime/graph_executor/graph_executor.cc"
 
 // Uncomment the following lines to enable RPC
 // #include "../../src/runtime/rpc/rpc_session.cc"
diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h
index 75e49e63e094..4cbfb0aab8e2 100644
--- a/include/tvm/runtime/crt/error_codes.h
+++ b/include/tvm/runtime/crt/error_codes.h
@@ -42,7 +42,7 @@ typedef enum {
   kTvmErrorCategorySession = 4,
   kTvmErrorCategoryPlatform = 5,
   kTvmErrorCategoryGenerated = 6,
-  kTvmErrorCategoryGraphRuntime = 7,
+  kTvmErrorCategoryGraphExecutor = 7,
   kTvmErrorCategoryFunctionCall = 8,
   kTvmErrorCategoryTimeEvaluator = 9,
 } tvm_crt_error_category_t;
@@ -83,10 +83,10 @@ typedef enum {
   // Common error codes returned from generated functions.
   kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0),
 
-  // Graph runtime
-  kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 0),
-  kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 1),
-  kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 2),
+  // Graph executor
+  kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 0),
+  kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 1),
+  kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphExecutor, 2),
 
   // Function Calls - common problems encountered calling functions.
   kTvmErrorFunctionCallNumArguments = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 0),
diff --git a/include/tvm/runtime/crt/graph_runtime.h b/include/tvm/runtime/crt/graph_executor.h
similarity index 61%
rename from include/tvm/runtime/crt/graph_runtime.h
rename to include/tvm/runtime/crt/graph_executor.h
index 02c7421d00e8..eb68ff56d230 100644
--- a/include/tvm/runtime/crt/graph_runtime.h
+++ b/include/tvm/runtime/crt/graph_executor.h
@@ -18,11 +18,11 @@
  */
 
 /*!
- * \file graph_runtime.h
- * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
+ * \file graph_executor.h
+ * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
-#ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -43,7 +43,7 @@ typedef struct TVMOpParam {
 } TVMOpParam;
 
 // Graph attribute
-typedef struct TVMGraphRuntimeGraphAttr {
+typedef struct TVMGraphExecutorGraphAttr {
   uint32_t storage_num_not_alloctaed;
   uint32_t* storage_id;
   uint32_t* device_index;
@@ -52,79 +52,79 @@ typedef struct TVMGraphRuntimeGraphAttr {
   int64_t* shape;
   uint32_t* ndim;
   uint32_t shape_count;
-} TVMGraphRuntimeGraphAttr;
+} TVMGraphExecutorGraphAttr;
 
-typedef struct TVMGraphRuntime TVMGraphRuntime;
+typedef struct TVMGraphExecutor TVMGraphExecutor;
 
 // public functions
 /*!
- * \brief Allocate a new GraphRuntime with TVMPlatformMemoryAllocate and initialize it.
+ * \brief Allocate a new GraphExecutor with TVMPlatformMemoryAllocate and initialize it.
  *
  * \param sym_json JSON-encoded graph.
  * \param module_handle TVM Module that exposes the functions to call.
  * \param devices runtime execution device.
- * \param runtime Pointer which receives a pointer to the newly-created instance.
+ * \param executor Pointer which receives a pointer to the newly-created instance.
  * \return 0 if successful.
  */
-int TVMGraphRuntime_Create(const char* sym_json, TVMModuleHandle module_handle,
-                           const DLDevice* devices, TVMGraphRuntime** runtime);
+int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
+                            const DLDevice* devices, TVMGraphExecutor** executor);
 
-int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name);
+int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* executor, const char* name);
 
 /*!
  * \brief get number of input tensors allocated.
  * \return integer number of tensors available to use.
  */
-int TVMGraphRuntime_GetNumInputs();
+int TVMGraphExecutor_GetNumInputs();
 
 /*!
  * \brief set input to the graph based on name.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \param name The name of the input.
  * \param data_in The input data.
  */
-void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in);
 
 /*!
  * \brief get number of output tensors allocated.
  * \return integer number of output tensors allocated.
  */
-int TVMGraphRuntime_GetNumOutputs();
+int TVMGraphExecutor_GetNumOutputs();
 
 /*!
  * \brief Return NDArray for given output index.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \param index The output index.
  * \param out The DLTensor corresponding to given output node index.
  * \return The result of this function execution.
  */
-int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t index, DLTensor* out);
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t index, DLTensor* out);
 
 /*!
  * \brief Load parameters from parameter blob.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \param param_blob A binary blob of parameter.
  * \param param_size The parameter size.
  * \return The result of this function execution.
  */
-int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
-                               const uint32_t param_size);
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
+                                const uint32_t param_size);
 
 /*!
  * \brief Execute the graph.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  */
-void TVMGraphRuntime_Run(TVMGraphRuntime* runtime);
+void TVMGraphExecutor_Run(TVMGraphExecutor* executor);
 
 /*!
- * \brief Release memory associated with the graph runtime.
- * \param runtime Pointer to graph runtime.
+ * \brief Release memory associated with the graph executor.
+ * \param executor Pointer to graph executor.
  * \return 0 if successful
  */
-int TVMGraphRuntime_Release(TVMGraphRuntime** runtime);
+int TVMGraphExecutor_Release(TVMGraphExecutor** executor);
 
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
-#endif  // TVM_RUNTIME_CRT_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_CRT_GRAPH_EXECUTOR_H_
diff --git a/include/tvm/runtime/crt/graph_runtime_module.h b/include/tvm/runtime/crt/graph_executor_module.h
similarity index 71%
rename from include/tvm/runtime/crt/graph_runtime_module.h
rename to include/tvm/runtime/crt/graph_executor_module.h
index 04e9184c8b8d..10a879e9ba30 100644
--- a/include/tvm/runtime/crt/graph_runtime_module.h
+++ b/include/tvm/runtime/crt/graph_executor_module.h
@@ -18,11 +18,11 @@
  */
 
 /*!
- * \file graph_runtime.h
- * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
+ * \file graph_executor.h
+ * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
-#ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
-#define TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
+#ifndef TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
+#define TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
 
 #ifdef __cplusplus
 extern "C" {
@@ -31,12 +31,12 @@ extern "C" {
 #include <tvm/runtime/crt/error_codes.h>
 
 /*!
- * \brief Register the "tvm.graph_runtime.create" constructor PackedFunc.
+ * \brief Register the "tvm.graph_executor.create" constructor PackedFunc.
  */
-tvm_crt_error_t TVMGraphRuntimeModule_Register();
+tvm_crt_error_t TVMGraphExecutorModule_Register();
 
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
-#endif  // TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_
+#endif  // TVM_RUNTIME_CRT_GRAPH_EXECUTOR_MODULE_H_
diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
similarity index 95%
rename from jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
rename to jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
index 5bbd2beb4644..30b2fb1acafb 100644
--- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
+++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphExecutor.java
@@ -29,7 +29,7 @@
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 
-public class GraphRuntime {
+public class GraphExecutor {
   /**
    * Create a runtime executor module given a graph and module.
    * @param graphJson The graph deployed in json format output by compiler.
@@ -38,9 +38,9 @@ public class GraphRuntime {
    * @return Runtime graph module that can be used to execute the graph.
    */
   public static GraphModule create(String graphJson, Module libmod, Device dev) {
-    Function fcreate = Function.getFunction("tvm.graph_runtime.create");
+    Function fcreate = Function.getFunction("tvm.graph_executor.create");
     if (fcreate == null) {
-      throw new RuntimeException("Cannot find global function tvm.graph_runtime.create."
+      throw new RuntimeException("Cannot find global function tvm.graph_executor.create."
           + "Did you compile tvm_runtime with correct version?");
     }
     Module graphModule = fcreate.pushArg(graphJson)
diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java
index 0e2583553ed3..a7a03d52740e 100644
--- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java
+++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphModule.java
@@ -147,7 +147,7 @@ public NDArray debugGetOutput(String node, NDArray out) {
     if (fdebugGetOutput != null) {
       fdebugGetOutput.pushArg(node).pushArg(out).invoke();
     } else {
-      throw new RuntimeException("Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0");
+      throw new RuntimeException("Please compile runtime with USE_GRAPH_EXECUTOR_DEBUG = 0");
     }
     return out;
   }
@@ -162,7 +162,7 @@ public NDArray debugGetOutput(int node, NDArray out) {
     if (fdebugGetOutput != null) {
       fdebugGetOutput.pushArg(node).pushArg(out).invoke();
     } else {
-      throw new RuntimeException("Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0");
+      throw new RuntimeException("Please compile runtime with USE_GRAPH_EXECUTOR_DEBUG = 0");
     }
     return out;
   }
diff --git a/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java b/jvm/core/src/test/java/org/apache/tvm/contrib/GraphExecutorTest.java
similarity index 93%
rename from jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java
rename to jvm/core/src/test/java/org/apache/tvm/contrib/GraphExecutorTest.java
index d1760454a468..0a5fa9a67e3a 100644
--- a/jvm/core/src/test/java/org/apache/tvm/contrib/GraphRuntimeTest.java
+++ b/jvm/core/src/test/java/org/apache/tvm/contrib/GraphExecutorTest.java
@@ -35,8 +35,8 @@
 
 import static org.junit.Assert.assertArrayEquals;
 
-public class GraphRuntimeTest {
-  private final Logger logger = LoggerFactory.getLogger(GraphRuntime.class);
+public class GraphExecutorTest {
+  private final Logger logger = LoggerFactory.getLogger(GraphExecutor.class);
   private static String loadingDir;
 
   @BeforeClass
@@ -52,7 +52,7 @@ public void test_add_one_local() throws IOException {
         .useDelimiter("\\Z").next();
 
     Device dev = Device.cpu();
-    GraphModule graph = GraphRuntime.create(graphJson, libmod, dev);
+    GraphModule graph = GraphExecutor.create(graphJson, libmod, dev);
 
     long[] shape = new long[]{4};
     NDArray arr = NDArray.empty(shape, dev);
@@ -92,7 +92,7 @@ public void test_add_one_remote() throws IOException {
       remote.upload(new File(libPath));
       Module mlib = remote.loadModule("graph_addone_lib.so");
 
-      GraphModule graph = GraphRuntime.create(graphJson, mlib, dev);
+      GraphModule graph = GraphExecutor.create(graphJson, mlib, dev);
 
       long[] shape = new long[]{4};
       NDArray arr = NDArray.empty(shape, dev);
diff --git a/jvm/core/src/test/scripts/test_graph_runtime.py b/jvm/core/src/test/scripts/test_graph_executor.py
similarity index 98%
rename from jvm/core/src/test/scripts/test_graph_runtime.py
rename to jvm/core/src/test/scripts/test_graph_executor.py
index 07a19fe50c1b..676b008205ca 100644
--- a/jvm/core/src/test/scripts/test_graph_runtime.py
+++ b/jvm/core/src/test/scripts/test_graph_executor.py
@@ -19,7 +19,7 @@
 import tvm
 from tvm import te
 import json
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 
 def dump_graph_lib(target_dir):
diff --git a/python/tvm/auto_scheduler/relay_integration.py b/python/tvm/auto_scheduler/relay_integration.py
index 366d3d021d9e..72e3e06f2047 100644
--- a/python/tvm/auto_scheduler/relay_integration.py
+++ b/python/tvm/auto_scheduler/relay_integration.py
@@ -47,7 +47,7 @@ def call_all_topi_funcs(mod, params, target):
     """Call all TOPI compute to extract auto_scheduler tasks in a Relay program"""
     # pylint: disable=import-outside-toplevel
     from tvm import relay
-    from tvm.relay.backend import graph_runtime_codegen
+    from tvm.relay.backend import graph_executor_codegen
 
     # Turn off AutoTVM config not found warnings
     old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent
@@ -63,11 +63,11 @@ def call_all_topi_funcs(mod, params, target):
     ):
         try:
             opt_mod, _ = relay.optimize(mod, target, params)
-            grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
+            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
             grc.codegen(opt_mod["main"])
         except tvm.TVMError:
             print(
-                "Get errors with GraphRuntimeCodegen for task extraction. "
+                "Get errors with GraphExecutorCodegen for task extraction. "
                 "Fallback to VMCompiler."
             )
             compiler = relay.vm.VMCompiler()
diff --git a/python/tvm/autotvm/task/relay_integration.py b/python/tvm/autotvm/task/relay_integration.py
index fe88d1741d60..12e057e01da6 100644
--- a/python/tvm/autotvm/task/relay_integration.py
+++ b/python/tvm/autotvm/task/relay_integration.py
@@ -36,14 +36,14 @@ def _lower(mod, target, params):
     """Helper to lower VTA properly."""
     # pylint: disable=import-outside-toplevel
     from tvm import relay
-    from tvm.relay.backend import graph_runtime_codegen
+    from tvm.relay.backend import graph_executor_codegen
 
     if hasattr(target, "device_name") and target.device_name == "vta":
         import vta
 
         with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
             mod, _ = relay.optimize(mod, target, params)
-            grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
+            grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
             grc.codegen(mod["main"])
             return
 
@@ -53,11 +53,11 @@ def _lower(mod, target, params):
     # TODO: Currently VM compiler is likely to stack overflow for large models.
     try:
         opt_mod, _ = relay.optimize(mod, target, params)
-        grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target)
+        grc = graph_executor_codegen.GraphExecutorCodegen(None, target)
         grc.codegen(opt_mod["main"])
     except tvm.TVMError as e:
         print(
-            "Get errors with GraphRuntimeCodegen for task extraction. "
+            "Get errors with GraphExecutorCodegen for task extraction. "
             "Fallback to VMCompiler. Error details:\n%s" % str(e)
         )
         compiler = relay.vm.VMCompiler()
diff --git a/python/tvm/contrib/cuda_graph/cuda_graph_runtime.py b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
similarity index 78%
rename from python/tvm/contrib/cuda_graph/cuda_graph_runtime.py
rename to python/tvm/contrib/cuda_graph/cuda_graph_executor.py
index c2f4455d881b..d047316eb564 100644
--- a/python/tvm/contrib/cuda_graph/cuda_graph_runtime.py
+++ b/python/tvm/contrib/cuda_graph/cuda_graph_executor.py
@@ -14,11 +14,11 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Graph runtime with CUDA Graph"""
+"""Graph executor with CUDA Graph"""
 import tvm._ffi
 
 from tvm._ffi.base import string_types
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 
 def create(graph_json_str, libmod, device):
@@ -40,34 +40,34 @@ def create(graph_json_str, libmod, device):
     Returns
     -------
     graph_module : GraphModuleCudaGraph
-        CUDA graph runtime module that can be used to execute the graph.
+        CUDA graph executor module that can be used to execute the graph.
 
     Note
     ----
-    See also :py:class:`tvm.contrib.cuda_graph.cuda_graph_runtime.GraphModuleCudaGraph`
+    See also :py:class:`tvm.contrib.cuda_graph.cuda_graph_executor.GraphModuleCudaGraph`
     for examples to directly construct a GraphModuleCudaGraph from an exported
     relay compiled library.
     """
     assert isinstance(graph_json_str, string_types)
     try:
-        dev, num_rpc_dev, device_type_id = graph_runtime.get_device(libmod, device)
+        dev, num_rpc_dev, device_type_id = graph_executor.get_device(libmod, device)
         if num_rpc_dev == len(dev):
-            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_runtime_cuda_graph.create")
+            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_executor_cuda_graph.create")
         else:
-            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cuda_graph.create")
+            fcreate = tvm._ffi.get_global_func("tvm.graph_executor_cuda_graph.create")
     except ValueError:
         raise ValueError(
             "To enable CUDA graph support (experimental), please set "
-            "'(USE_GRAPH_RUNTIME_CUGRAPH ON)' in config.cmake and rebuild TVM"
+            "'(USE_GRAPH_EXECUTOR_CUGRAPH ON)' in config.cmake and rebuild TVM"
         )
 
     return GraphModuleCudaGraph(fcreate(graph_json_str, libmod, *device_type_id))
 
 
-class GraphModuleCudaGraph(graph_runtime.GraphModule):
-    """CUDA graph runtime module.
+class GraphModuleCudaGraph(graph_executor.GraphModule):
+    """CUDA graph executor module.
 
-    This is a CUDA graph runtime wrapper over the TVM runtime.
+    This is a CUDA graph executor wrapper over the TVM runtime.
     Runtime interfaces are wrapped with CUDA graph functionalities.
 
     Parameters
@@ -81,7 +81,7 @@ def __init__(self, module):
         self._end_capture = module["end_capture"]
         self._run_cuda_graph = module["run_cuda_graph"]
         self._cuda_graph_captured = False
-        graph_runtime.GraphModule.__init__(self, module)
+        graph_executor.GraphModule.__init__(self, module)
 
     def capture_cuda_graph(self):
         """Capture a CUDA graph for tvm_op graph
@@ -99,13 +99,13 @@ def run_cuda_graph(self):
         """Run the CUDA graph for tvm_op graph
 
         Run the captured CUDA graph instance instead of the
-        for-loop kernel launch of default graph runtime
+        for-loop kernel launch of default graph executor
         """
         self._run_cuda_graph()
 
     def run(self, **input_dict):
         """A run wrapper for graph capture / launch, user can just
-        change default graph runtime to cuda graph runtime, and
+        change default graph executor to cuda graph executor, and
         the first call will capture a cuda graph for future launch
 
         Parameters
@@ -131,4 +131,4 @@ def debug_get_output(self, node, out):
         out : NDArray
             The output array container
         """
-        raise NotImplementedError("Please use debugger.debug_runtime as graph_runtime instead.")
+        raise NotImplementedError("Please use debugger.debug_executor as graph_executor instead.")
diff --git a/python/tvm/contrib/debugger/debug_executor.py b/python/tvm/contrib/debugger/debug_executor.py
new file mode 100644
index 000000000000..b27ae6533e38
--- /dev/null
+++ b/python/tvm/contrib/debugger/debug_executor.py
@@ -0,0 +1,239 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Graph debug runtime executes TVM debug packed functions."""
+
+import os
+import tempfile
+import shutil
+import tvm._ffi
+
+from tvm._ffi.base import string_types
+from tvm.contrib import graph_executor
+from tvm.runtime.ndarray import array
+from . import debug_result
+
+_DUMP_ROOT_PREFIX = "tvmdbg_"
+_DUMP_PATH_PREFIX = "_tvmdbg_"
+
+
+def create(graph_json_str, libmod, device, dump_root=None):
+    """Create a runtime executor module given a graph and module.
+
+    Parameters
+    ----------
+    graph_json_str : str
+        The graph to be deployed in json format output by graph compiler.
+        The graph can contain operator(tvm_op) that points to the name
+        of PackedFunc in the libmod.
+
+    libmod : tvm.Module
+        The module of the corresponding function.
+
+    device : Device
+        The device to deploy the module, can be local or remote.
+
+    dump_root : str
+        To select which folder the outputs should be kept.
+        None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
+    Returns
+    -------
+    graph_module : GraphModuleDebug
+        Debug Runtime graph module that can be used to execute the graph.
+    """
+    assert isinstance(graph_json_str, string_types)
+
+    try:
+        dev, num_rpc_dev, device_type_id = graph_executor.get_device(libmod, device)
+        if num_rpc_dev == len(dev):
+            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_executor_debug.create")
+        else:
+            fcreate = tvm._ffi.get_global_func("tvm.graph_executor_debug.create")
+    except ValueError:
+        raise ValueError(
+            "Please set '(USE_GRAPH_EXECUTOR_DEBUG ON)' in "
+            "config.cmake and rebuild TVM to enable debug mode"
+        )
+    func_obj = fcreate(graph_json_str, libmod, *device_type_id)
+    return GraphModuleDebug(func_obj, dev, graph_json_str, dump_root)
+
+
+class GraphModuleDebug(graph_executor.GraphModule):
+    """Graph debug runtime module.
+
+    This is a debug wrapper over the TVM runtime.
+    Runtime interfaces are wrapped with debug functionalities.
+    Manage the debug framework to format the debug data and
+    trigger the user interfaces.
+
+    Parameters
+    ----------
+    module : Module
+        The internal tvm module that holds the actual graph functions.
+
+    device : Device
+        The device that this module is under.
+
+    graph_json_str : str or graph class
+        Content of graph json file in string format
+
+    dump_root : str
+        To select which folder the outputs should be kept.
+        None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
+    """
+
+    def __init__(self, module, device, graph_json_str, dump_root):
+        self._dump_root = dump_root
+        self._dump_path = None
+        self._get_output_by_layer = module["get_output_by_layer"]
+        self._run_individual = module["run_individual"]
+        graph_executor.GraphModule.__init__(self, module)
+        self._create_debug_env(graph_json_str, device)
+
+    def _format_device(self, device):
+        return str(device[0]).upper().replace("(", ":").replace(")", "")
+
+    def _ensure_dir(self, directory):
+        """Create a directory if not exists
+
+        Parameters
+        ----------
+
+        directory : str
+            File path to create
+        """
+        if not os.path.exists(directory):
+            os.makedirs(directory, 0o700)
+
+    def _get_dump_path(self, device):
+        """Make the graph and tensor dump folder and return the path.
+
+        Parameters
+        ----------
+        device : Device
+            The device that this module is under.
+
+        Returns
+        -------
+        path : str
+            Directory path where the graph and node outputs will be stored.
+        """
+        # save to file
+        folder_name = _DUMP_PATH_PREFIX + "device_"
+        folder_name = folder_name + device.replace(":", "_")
+        path = os.path.join(self._dump_root, folder_name)
+        self._ensure_dir(path)
+        return path
+
+    def _remove_dump_root(self):
+        if os.path.isdir(self._dump_root):
+            shutil.rmtree(self._dump_root)
+
+    def _create_debug_env(self, graph_json, device):
+        """Create UI wrapper framework to handle multiple UI frontends for tvmdbg
+
+        Parameters
+        ----------
+        graph_json : json format
+            json formatted NNVM graph contain list of each node's name, shape and type.
+
+        nodes_list : list
+            List of all the nodes presented in the graph
+
+        device : Device
+            The device that this module is under.
+        """
+        # make the dump folder if not given
+        if not self._dump_root:
+            self._dump_root = tempfile.mkdtemp(prefix=_DUMP_ROOT_PREFIX)
+
+        # format the device
+        device = self._format_device(device)
+
+        # updates the dumping directories
+        self._dump_path = self._get_dump_path(device)
+
+        # init the debug dumping environment
+        self.debug_datum = debug_result.DebugResult(graph_json, self._dump_path)
+
+    def _run_debug(self):
+        """Execute the node specified with index will be executed.
+        Each debug output will be copied to the buffer
+        Time consumed for each execution will be set as debug output.
+
+        """
+        self.debug_datum._time_list = [[float(t)] for t in self.run_individual(10, 1, 1)]
+        for i, node in enumerate(self.debug_datum.get_graph_nodes()):
+            num_outputs = self.debug_datum.get_graph_node_output_num(node)
+            for j in range(num_outputs):
+                out_tensor = self._get_output_by_layer(i, j)
+                out_tensor = array(out_tensor)
+                self.debug_datum._output_tensor_list.append(out_tensor)
+
+    def debug_get_output(self, node, out=None):
+        """Run graph up to node and get the output to out
+
+        Parameters
+        ----------
+        node : int / str
+            The node index or name
+
+        out : NDArray
+            The output array container
+        """
+        if isinstance(node, str):
+            output_tensors = self.debug_datum.get_output_tensors()
+            try:
+                out = output_tensors[node]
+            except KeyError:
+                node_list = output_tensors.keys()
+                raise RuntimeError(
+                    "Node " + node + " not found, available nodes are: " + str(node_list) + "."
+                )
+        elif isinstance(node, int):
+            output_tensors = self.debug_datum._output_tensor_list
+            out = output_tensors[node]
+        else:
+            raise RuntimeError("Require node index or name only.")
+        return out
+
+    def run(self, **input_dict):
+        """Run forward execution of the graph with debug
+
+        Parameters
+        ----------
+        input_dict : dict of str to NDArray
+            List of input values to be feed to
+        """
+        if input_dict:
+            self.set_input(**input_dict)
+
+        # Step 1. Execute the graph
+        self._run_debug()
+        # Step 2. Dump the output tensors to the dump folder
+        self.debug_datum.dump_output_tensor()
+        # Step 3. Dump the Chrome trace to the dump folder
+        self.debug_datum.dump_chrome_trace()
+        # Step 4. Display the collected information
+        self.debug_datum.display_debug_result()
+
+    def run_individual(self, number, repeat=1, min_repeat_ms=0):
+        ret = self._run_individual(number, repeat, min_repeat_ms)
+        return ret.strip(",").split(",") if ret else []
+
+    def exit(self):
+        """Exits the dump folder and all its contents"""
+        self._remove_dump_root()
diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py
index f0e299728fbc..ebd903b47570 100644
--- a/python/tvm/contrib/debugger/debug_runtime.py
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -14,226 +14,16 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Graph debug runtime executes TVM debug packed functions."""
+"""Deprecated Python API for DebugExecutor."""
 
-import os
-import tempfile
-import shutil
-import tvm._ffi
+import warnings
 
-from tvm._ffi.base import string_types
-from tvm.contrib import graph_runtime
-from tvm.runtime.ndarray import array
-from . import debug_result
+from . import debug_executor
 
-_DUMP_ROOT_PREFIX = "tvmdbg_"
-_DUMP_PATH_PREFIX = "_tvmdbg_"
 
-
-def create(graph_json_str, libmod, device, dump_root=None):
-    """Create a runtime executor module given a graph and module.
-
-    Parameters
-    ----------
-    graph_json_str : str
-        The graph to be deployed in json format output by graph compiler.
-        The graph can contain operator(tvm_op) that points to the name
-        of PackedFunc in the libmod.
-
-    libmod : tvm.Module
-        The module of the corresponding function.
-
-    device : Device
-        The device to deploy the module, can be local or remote.
-
-    dump_root : str
-        To select which folder the outputs should be kept.
-        None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
-    Returns
-    -------
-    graph_module : GraphModuleDebug
-        Debug Runtime graph module that can be used to execute the graph.
-    """
-    assert isinstance(graph_json_str, string_types)
-
-    try:
-        dev, num_rpc_dev, device_type_id = graph_runtime.get_device(libmod, device)
-        if num_rpc_dev == len(dev):
-            fcreate = dev[0]._rpc_sess.get_function("tvm.graph_runtime_debug.create")
-        else:
-            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_debug.create")
-    except ValueError:
-        raise ValueError(
-            "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
-            "config.cmake and rebuild TVM to enable debug mode"
-        )
-    func_obj = fcreate(graph_json_str, libmod, *device_type_id)
-    return GraphModuleDebug(func_obj, dev, graph_json_str, dump_root)
-
-
-class GraphModuleDebug(graph_runtime.GraphModule):
-    """Graph debug runtime module.
-
-    This is a debug wrapper over the TVM runtime.
-    Runtime interfaces are wrapped with debug functionalities.
-    Manage the debug framework to format the debug data and
-    trigger the user interfaces.
-
-    Parameters
-    ----------
-    module : Module
-        The internal tvm module that holds the actual graph functions.
-
-    device : Device
-        The device that this module is under.
-
-    graph_json_str : str or graph class
-        Content of graph json file in string format
-
-    dump_root : str
-        To select which folder the outputs should be kept.
-        None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
-    """
-
-    def __init__(self, module, device, graph_json_str, dump_root):
-        self._dump_root = dump_root
-        self._dump_path = None
-        self._get_output_by_layer = module["get_output_by_layer"]
-        self._run_individual = module["run_individual"]
-        graph_runtime.GraphModule.__init__(self, module)
-        self._create_debug_env(graph_json_str, device)
-
-    def _format_device(self, device):
-        return str(device[0]).upper().replace("(", ":").replace(")", "")
-
-    def _ensure_dir(self, directory):
-        """Create a directory if not exists
-
-        Parameters
-        ----------
-
-        directory : str
-            File path to create
-        """
-        if not os.path.exists(directory):
-            os.makedirs(directory, 0o700)
-
-    def _get_dump_path(self, device):
-        """Make the graph and tensor dump folder and return the path.
-
-        Parameters
-        ----------
-        device : Device
-            The device that this module is under.
-
-        Returns
-        -------
-        path : str
-            Directory path where the graph and node outputs will be stored.
-        """
-        # save to file
-        folder_name = _DUMP_PATH_PREFIX + "device_"
-        folder_name = folder_name + device.replace(":", "_")
-        path = os.path.join(self._dump_root, folder_name)
-        self._ensure_dir(path)
-        return path
-
-    def _remove_dump_root(self):
-        if os.path.isdir(self._dump_root):
-            shutil.rmtree(self._dump_root)
-
-    def _create_debug_env(self, graph_json, device):
-        """Create UI wrapper framework to handle multiple UI frontends for tvmdbg
-
-        Parameters
-        ----------
-        graph_json : json format
-            json formatted NNVM graph contain list of each node's name, shape and type.
-
-        nodes_list : list
-            List of all the nodes presented in the graph
-
-        device : Device
-            The device that this module is under.
-        """
-        # make the dump folder if not given
-        if not self._dump_root:
-            self._dump_root = tempfile.mkdtemp(prefix=_DUMP_ROOT_PREFIX)
-
-        # format the device
-        device = self._format_device(device)
-
-        # updates the dumping directories
-        self._dump_path = self._get_dump_path(device)
-
-        # init the debug dumping environment
-        self.debug_datum = debug_result.DebugResult(graph_json, self._dump_path)
-
-    def _run_debug(self):
-        """Execute the node specified with index will be executed.
-        Each debug output will be copied to the buffer
-        Time consumed for each execution will be set as debug output.
-
-        """
-        self.debug_datum._time_list = [[float(t)] for t in self.run_individual(10, 1, 1)]
-        for i, node in enumerate(self.debug_datum.get_graph_nodes()):
-            num_outputs = self.debug_datum.get_graph_node_output_num(node)
-            for j in range(num_outputs):
-                out_tensor = self._get_output_by_layer(i, j)
-                out_tensor = array(out_tensor)
-                self.debug_datum._output_tensor_list.append(out_tensor)
-
-    def debug_get_output(self, node, out=None):
-        """Run graph up to node and get the output to out
-
-        Parameters
-        ----------
-        node : int / str
-            The node index or name
-
-        out : NDArray
-            The output array container
-        """
-        if isinstance(node, str):
-            output_tensors = self.debug_datum.get_output_tensors()
-            try:
-                out = output_tensors[node]
-            except KeyError:
-                node_list = output_tensors.keys()
-                raise RuntimeError(
-                    "Node " + node + " not found, available nodes are: " + str(node_list) + "."
-                )
-        elif isinstance(node, int):
-            output_tensors = self.debug_datum._output_tensor_list
-            out = output_tensors[node]
-        else:
-            raise RuntimeError("Require node index or name only.")
-        return out
-
-    def run(self, **input_dict):
-        """Run forward execution of the graph with debug
-
-        Parameters
-        ----------
-        input_dict : dict of str to NDArray
-            List of input values to be feed to
-        """
-        if input_dict:
-            self.set_input(**input_dict)
-
-        # Step 1. Execute the graph
-        self._run_debug()
-        # Step 2. Dump the output tensors to the dump folder
-        self.debug_datum.dump_output_tensor()
-        # Step 3. Dump the Chrome trace to the dump folder
-        self.debug_datum.dump_chrome_trace()
-        # Step 4. Display the collected information
-        self.debug_datum.display_debug_result()
-
-    def run_individual(self, number, repeat=1, min_repeat_ms=0):
-        ret = self._run_individual(number, repeat, min_repeat_ms)
-        return ret.strip(",").split(",") if ret else []
-
-    def exit(self):
-        """Exits the dump folder and all its contents"""
-        self._remove_dump_root()
+def create(*args, **kwargs):
+    warnings.warn(
+        "This function has been moved to tvm.contrib.graph_executor and will be removed "
+        "in the next TVM release"
+    )
+    return debug_executor.create(*args, **kwargs)
diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
new file mode 100644
index 000000000000..a4bc85905f5e
--- /dev/null
+++ b/python/tvm/contrib/graph_executor.py
@@ -0,0 +1,306 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Minimum graph executor that executes graph containing TVM PackedFunc."""
+import numpy as np
+import tvm._ffi
+
+from tvm.rpc import _ffi_api as _rpc_ffi_api
+from tvm.rpc import base as rpc_base
+from tvm._ffi.base import string_types
+from tvm._ffi.runtime_ctypes import Device
+
+
+def create(graph_json_str, libmod, device):
+    """Create a runtime executor module given a graph and module.
+
+    Parameters
+    ----------
+    graph_json_str : str
+        The graph to be deployed in json format output by json graph.
+        The graph can contain operator(tvm_op) that points to the name
+        of PackedFunc in the libmod.
+
+    libmod : tvm.runtime.Module
+        The module of the corresponding function
+
+    device : Device or list of Device
+        The device to deploy the module. It can be local or remote when there
+        is only one Device. Otherwise, the first device in the list will
+        be used as this purpose. All device should be given for heterogeneous
+        execution.
+
+    Returns
+    -------
+    graph_module : GraphModule
+        Runtime graph module that can be used to execute the graph.
+
+    Note
+    ----
+    See also :py:class:`tvm.contrib.graph_executor.GraphModule`
+    for examples to directly construct a GraphModule from an exported
+    relay compiled library.
+    """
+    assert isinstance(graph_json_str, string_types)
+
+    dev, num_rpc_dev, device_type_id = get_device(libmod, device)
+
+    if num_rpc_dev == len(dev):
+        fcreate = dev[0]._rpc_sess.get_function("tvm.graph_executor.create")
+    else:
+        fcreate = tvm._ffi.get_global_func("tvm.graph_executor.create")
+
+    return GraphModule(fcreate(graph_json_str, libmod, *device_type_id))
+
+
+def get_device(libmod, device):
+    """Parse and validate all the device(s).
+
+    Parameters
+    ----------
+    libmod : tvm.runtime.Module
+        The module of the corresponding function
+
+    device : Device or list of Device
+
+    Returns
+    -------
+    device : list of Device
+    num_rpc_dev : Number of rpc devices
+    device_type_id : List of device type and device id
+    """
+
+    if isinstance(device, Device):
+        device = [device]
+    elif not isinstance(device, (list, tuple)):
+        raise ValueError("dev has to be the type of Device or a list of Device")
+    for cur_dev in device:
+        if not isinstance(cur_dev, Device):
+            raise ValueError("dev has to be the type of Device or a list of Device")
+
+    # device_type_id[0], device_type_id[1] are used as the primary/fallback
+    # device type and id. All other ones are used as device for
+    # heterogeneous execution.
+    num_rpc_dev = 0
+    device_type_id = []
+    for cur_dev in device:
+        device_type = cur_dev.device_type
+        if device_type >= rpc_base.RPC_SESS_MASK:
+            assert libmod.type_key == "rpc"
+            assert _rpc_ffi_api.SessTableIndex(libmod) == cur_dev._rpc_sess._tbl_index
+            num_rpc_dev += 1
+            device_type = cur_dev.device_type % rpc_base.RPC_SESS_MASK
+        device_type_id.append(device_type)
+        device_type_id.append(cur_dev.device_id)
+
+    if 0 < num_rpc_dev < len(device):
+        raise ValueError("Either all or none of the devices should be rpc.")
+    return device, num_rpc_dev, device_type_id
+
+
+class GraphModule(object):
+    """Wrapper runtime module.
+
+    This is a thin wrapper of the underlying TVM module.
+    you can also directly call set_input, run, and get_output
+    of underlying module functions
+
+    Parameters
+    ----------
+    module : tvm.runtime.Module
+        The internal tvm module that holds the actual graph functions.
+
+    Attributes
+    ----------
+    module : tvm.runtime.Module
+        The internal tvm module that holds the actual graph functions.
+
+    Examples
+    --------
+
+    .. code-block:: python
+
+        import tvm
+        from tvm import relay
+        from tvm.contrib import graph_executor
+
+        # build the library using graph executor
+        lib = relay.build(...)
+        lib.export_library("compiled_lib.so")
+        # load it back as a runtime
+        lib: tvm.runtime.Module = tvm.runtime.load_module("compiled_lib.so")
+        # Call the library factory function for default and create
+        # a new runtime.Module, wrap with graph module.
+        gmod = graph_executor.GraphModule(lib["default"](dev))
+        # use the graph module.
+        gmod.set_input("x", data)
+        gmod.run()
+    """
+
+    def __init__(self, module):
+        self.module = module
+        self._set_input = module["set_input"]
+        self._run = module["run"]
+        self._get_output = module["get_output"]
+        self._get_input = module["get_input"]
+        self._get_num_outputs = module["get_num_outputs"]
+        self._get_num_inputs = module["get_num_inputs"]
+        self._load_params = module["load_params"]
+        self._share_params = module["share_params"]
+
+    def set_input(self, key=None, value=None, **params):
+        """Set inputs to the module via kwargs
+
+        Parameters
+        ----------
+        key : int or str
+           The input key
+
+        value : the input value.
+           The input key
+
+        params : dict of str to NDArray
+           Additional arguments
+        """
+        if key is not None:
+            v = self._get_input(key)
+            if v is None:
+                raise RuntimeError("Could not find '%s' in graph's inputs" % key)
+            v.copyfrom(value)
+
+        if params:
+            # upload big arrays first to avoid memory issue in rpc mode
+            keys = list(params.keys())
+            keys.sort(key=lambda x: -np.prod(params[x].shape))
+            for k in keys:
+                # TODO(zhiics) Skip the weights for submodule in a better way.
+                # We should use MetadataModule for initialization and remove
+                # params from set_input
+                val = self._get_input(k)
+                if val:
+                    self._get_input(k).copyfrom(params[k])
+
+    def run(self, **input_dict):
+        """Run forward execution of the graph
+
+        Parameters
+        ----------
+        input_dict: dict of str to NDArray
+            List of input values to be feed to
+        """
+        if input_dict:
+            self.set_input(**input_dict)
+        self._run()
+
+    def get_num_outputs(self):
+        """Get the number of outputs from the graph
+
+        Returns
+        -------
+        count : int
+            The number of outputs.
+        """
+        return self._get_num_outputs()
+
+    def get_num_inputs(self):
+        """Get the number of inputs to the graph
+
+        Returns
+        -------
+        count : int
+            The number of inputs.
+        """
+        return self._get_num_inputs()
+
+    def get_input(self, index, out=None):
+        """Get index-th input to out
+
+        Parameters
+        ----------
+        index : int
+            The input index
+
+        out : NDArray
+            The output array container
+        """
+        if out:
+            self._get_input(index).copyto(out)
+            return out
+
+        return self._get_input(index)
+
+    def get_output(self, index, out=None):
+        """Get index-th output to out
+
+        Parameters
+        ----------
+        index : int
+            The output index
+
+        out : NDArray
+            The output array container
+        """
+        if out:
+            self._get_output(index, out)
+            return out
+
+        return self._get_output(index)
+
+    def debug_get_output(self, node, out):
+        """Run graph up to node and get the output to out
+
+        Parameters
+        ----------
+        node : int / str
+            The node index or name
+
+        out : NDArray
+            The output array container
+        """
+        raise NotImplementedError("Please use debugger.debug_executor as graph_executor instead.")
+
+    def load_params(self, params_bytes):
+        """Load parameters from serialized byte array of parameter dict.
+
+        Parameters
+        ----------
+        params_bytes : bytearray
+            The serialized parameter dict.
+        """
+        self._load_params(bytearray(params_bytes))
+
+    def share_params(self, other, params_bytes):
+        """Share parameters from pre-existing GraphExecutor instance.
+
+        Parameters
+        ----------
+        other: GraphExecutor
+            The parent GraphExecutor from which this instance should share
+            it's parameters.
+        params_bytes : bytearray
+            The serialized parameter dict (used only for the parameter names).
+        """
+        self._share_params(other.module, bytearray(params_bytes))
+
+    def __getitem__(self, key):
+        """Get internal module function
+
+        Parameters
+        ----------
+        key : str
+            The key to the module.
+        """
+        return self.module[key]
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py
index 2eea188be977..f8ecfdd70a5b 100644
--- a/python/tvm/contrib/graph_runtime.py
+++ b/python/tvm/contrib/graph_runtime.py
@@ -14,293 +14,16 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Minimum graph runtime that executes graph containing TVM PackedFunc."""
-import numpy as np
-import tvm._ffi
+"""Deprecated Python API for GraphExecutor."""
 
-from tvm.rpc import _ffi_api as _rpc_ffi_api
-from tvm.rpc import base as rpc_base
-from tvm._ffi.base import string_types
-from tvm._ffi.runtime_ctypes import Device
+import warnings
 
+from . import graph_executor
 
-def create(graph_json_str, libmod, device):
-    """Create a runtime executor module given a graph and module.
 
-    Parameters
-    ----------
-    graph_json_str : str
-        The graph to be deployed in json format output by json graph.
-        The graph can contain operator(tvm_op) that points to the name
-        of PackedFunc in the libmod.
-
-    libmod : tvm.runtime.Module
-        The module of the corresponding function
-
-    device : Device or list of Device
-        The device to deploy the module. It can be local or remote when there
-        is only one Device. Otherwise, the first device in the list will
-        be used as this purpose. All device should be given for heterogeneous
-        execution.
-
-    Returns
-    -------
-    graph_module : GraphModule
-        Runtime graph module that can be used to execute the graph.
-
-    Note
-    ----
-    See also :py:class:`tvm.contrib.graph_runtime.GraphModule`
-    for examples to directly construct a GraphModule from an exported
-    relay compiled library.
-    """
-    assert isinstance(graph_json_str, string_types)
-
-    dev, num_rpc_dev, device_type_id = get_device(libmod, device)
-
-    if num_rpc_dev == len(dev):
-        fcreate = dev[0]._rpc_sess.get_function("tvm.graph_runtime.create")
-    else:
-        fcreate = tvm._ffi.get_global_func("tvm.graph_runtime.create")
-
-    return GraphModule(fcreate(graph_json_str, libmod, *device_type_id))
-
-
-def get_device(libmod, device):
-    """Parse and validate all the device(s).
-
-    Parameters
-    ----------
-    libmod : tvm.runtime.Module
-        The module of the corresponding function
-
-    device : Device or list of Device
-
-    Returns
-    -------
-    device : list of Device
-    num_rpc_dev : Number of rpc devices
-    device_type_id : List of device type and device id
-    """
-
-    if isinstance(device, Device):
-        device = [device]
-    elif not isinstance(device, (list, tuple)):
-        raise ValueError("dev has to be the type of Device or a list of Device")
-    for cur_dev in device:
-        if not isinstance(cur_dev, Device):
-            raise ValueError("dev has to be the type of Device or a list of Device")
-
-    # device_type_id[0], device_type_id[1] are used as the primary/fallback
-    # device type and id. All other ones are used as device for
-    # heterogeneous execution.
-    num_rpc_dev = 0
-    device_type_id = []
-    for cur_dev in device:
-        device_type = cur_dev.device_type
-        if device_type >= rpc_base.RPC_SESS_MASK:
-            assert libmod.type_key == "rpc"
-            assert _rpc_ffi_api.SessTableIndex(libmod) == cur_dev._rpc_sess._tbl_index
-            num_rpc_dev += 1
-            device_type = cur_dev.device_type % rpc_base.RPC_SESS_MASK
-        device_type_id.append(device_type)
-        device_type_id.append(cur_dev.device_id)
-
-    if 0 < num_rpc_dev < len(device):
-        raise ValueError("Either all or none of the devices should be rpc.")
-    return device, num_rpc_dev, device_type_id
-
-
-class GraphModule(object):
-    """Wrapper runtime module.
-
-    This is a thin wrapper of the underlying TVM module.
-    you can also directly call set_input, run, and get_output
-    of underlying module functions
-
-    Parameters
-    ----------
-    module : tvm.runtime.Module
-        The internal tvm module that holds the actual graph functions.
-
-    Attributes
-    ----------
-    module : tvm.runtime.Module
-        The internal tvm module that holds the actual graph functions.
-
-    Examples
-    --------
-
-    .. code-block:: python
-
-        import tvm
-        from tvm import relay
-        from tvm.contrib import graph_runtime
-
-        # build the library using graph runtime
-        lib = relay.build(...)
-        lib.export_library("compiled_lib.so")
-        # load it back as a runtime
-        lib: tvm.runtime.Module = tvm.runtime.load_module("compiled_lib.so")
-        # Call the library factory function for default and create
-        # a new runtime.Module, wrap with graph module.
-        gmod = graph_runtime.GraphModule(lib["default"](dev))
-        # use the graph module.
-        gmod.set_input("x", data)
-        gmod.run()
-    """
-
-    def __init__(self, module):
-        self.module = module
-        self._set_input = module["set_input"]
-        self._run = module["run"]
-        self._get_output = module["get_output"]
-        self._get_input = module["get_input"]
-        self._get_num_outputs = module["get_num_outputs"]
-        self._get_num_inputs = module["get_num_inputs"]
-        self._load_params = module["load_params"]
-        self._share_params = module["share_params"]
-
-    def set_input(self, key=None, value=None, **params):
-        """Set inputs to the module via kwargs
-
-        Parameters
-        ----------
-        key : int or str
-           The input key
-
-        value : the input value.
-           The input key
-
-        params : dict of str to NDArray
-           Additional arguments
-        """
-        if key is not None:
-            v = self._get_input(key)
-            if v is None:
-                raise RuntimeError("Could not find '%s' in graph's inputs" % key)
-            v.copyfrom(value)
-
-        if params:
-            # upload big arrays first to avoid memory issue in rpc mode
-            keys = list(params.keys())
-            keys.sort(key=lambda x: -np.prod(params[x].shape))
-            for k in keys:
-                # TODO(zhiics) Skip the weights for submodule in a better way.
-                # We should use MetadataModule for initialization and remove
-                # params from set_input
-                val = self._get_input(k)
-                if val:
-                    self._get_input(k).copyfrom(params[k])
-
-    def run(self, **input_dict):
-        """Run forward execution of the graph
-
-        Parameters
-        ----------
-        input_dict: dict of str to NDArray
-            List of input values to be feed to
-        """
-        if input_dict:
-            self.set_input(**input_dict)
-        self._run()
-
-    def get_num_outputs(self):
-        """Get the number of outputs from the graph
-
-        Returns
-        -------
-        count : int
-            The number of outputs.
-        """
-        return self._get_num_outputs()
-
-    def get_num_inputs(self):
-        """Get the number of inputs to the graph
-
-        Returns
-        -------
-        count : int
-            The number of inputs.
-        """
-        return self._get_num_inputs()
-
-    def get_input(self, index, out=None):
-        """Get index-th input to out
-
-        Parameters
-        ----------
-        index : int
-            The input index
-
-        out : NDArray
-            The output array container
-        """
-        if out:
-            self._get_input(index).copyto(out)
-            return out
-
-        return self._get_input(index)
-
-    def get_output(self, index, out=None):
-        """Get index-th output to out
-
-        Parameters
-        ----------
-        index : int
-            The output index
-
-        out : NDArray
-            The output array container
-        """
-        if out:
-            self._get_output(index, out)
-            return out
-
-        return self._get_output(index)
-
-    def debug_get_output(self, node, out):
-        """Run graph up to node and get the output to out
-
-        Parameters
-        ----------
-        node : int / str
-            The node index or name
-
-        out : NDArray
-            The output array container
-        """
-        raise NotImplementedError("Please use debugger.debug_runtime as graph_runtime instead.")
-
-    def load_params(self, params_bytes):
-        """Load parameters from serialized byte array of parameter dict.
-
-        Parameters
-        ----------
-        params_bytes : bytearray
-            The serialized parameter dict.
-        """
-        self._load_params(bytearray(params_bytes))
-
-    def share_params(self, other, params_bytes):
-        """Share parameters from pre-existing GraphRuntime instance.
-
-        Parameters
-        ----------
-        other: GraphRuntime
-            The parent GraphRuntime from which this instance should share
-            it's parameters.
-        params_bytes : bytearray
-            The serialized parameter dict (used only for the parameter names).
-        """
-        self._share_params(other.module, bytearray(params_bytes))
-
-    def __getitem__(self, key):
-        """Get internal module function
-
-        Parameters
-        ----------
-        key : str
-            The key to the module.
-        """
-        return self.module[key]
+def create(*args, **kwargs):
+    warnings.warn(
+        "This function has been moved to tvm.contrib.graph_executor and will be removed "
+        "in the next TVM release"
+    )
+    return graph_executor.create(*args, **kwargs)
diff --git a/python/tvm/driver/tvmc/compiler.py b/python/tvm/driver/tvmc/compiler.py
index 83791e50f6d5..5bdb578f2c16 100644
--- a/python/tvm/driver/tvmc/compiler.py
+++ b/python/tvm/driver/tvmc/compiler.py
@@ -143,7 +143,7 @@ def compile_model(
 
     This function takes a union of the arguments of both frontends.load_model
     and compiler.compile_relay. The resulting TVM module can be executed using
-    the graph runtime.
+    the graph executor.
 
     Parameters
     ----------
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index 252647557ab5..b4c4e75aa37a 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -26,8 +26,8 @@
 import numpy as np
 from tvm import rpc
 from tvm.autotvm.measure import request_remote
-from tvm.contrib import graph_runtime as runtime
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib import graph_executor as runtime
+from tvm.contrib.debugger import debug_executor
 from tvm.relay import load_param_dict
 
 from . import common
@@ -77,7 +77,7 @@ def add_run_parser(subparsers):
         "--profile",
         action="store_true",
         help="generate profiling data from the runtime execution. "
-        "Using --profile requires the Graph Runtime Debug enabled on TVM. "
+        "Using --profile requires the Graph Executor Debug enabled on TVM. "
         "Profiling may also have an impact on inference time, "
         "making it take longer to be generated.",
     )
@@ -296,7 +296,7 @@ def run_module(
     repeat=1,
     profile=False,
 ):
-    """Run a compiled graph runtime module locally or remotely with
+    """Run a compiled graph executor module locally or remotely with
     optional input values.
 
     If input tensors are not specified explicitly, they can be filled
@@ -370,7 +370,7 @@ def run_module(
 
         if profile:
             logger.debug("creating runtime with profiling enabled")
-            module = debug_runtime.create(graph, lib, dev, dump_root="./prof")
+            module = debug_executor.create(graph, lib, dev, dump_root="./prof")
         else:
             logger.debug("creating runtime with profiling disabled")
             module = runtime.create(graph, lib, dev)
diff --git a/python/tvm/micro/__init__.py b/python/tvm/micro/__init__.py
index ade63f2da9e4..a70cb96d9b13 100644
--- a/python/tvm/micro/__init__.py
+++ b/python/tvm/micro/__init__.py
@@ -25,8 +25,8 @@
 from .micro_binary import MicroBinary
 from .model_library_format import export_model_library_format, UnsupportedInModelLibraryFormatError
 from .session import (
-    create_local_graph_runtime,
-    create_local_debug_runtime,
+    create_local_graph_executor,
+    create_local_debug_executor,
     Session,
     SessionTerminatedError,
 )
diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
index 4ce80be647c1..6768e03f4473 100644
--- a/python/tvm/micro/model_library_format.py
+++ b/python/tvm/micro/model_library_format.py
@@ -24,7 +24,7 @@
 import tarfile
 
 from ..contrib import utils
-from ..relay.backend import graph_runtime_factory
+from ..relay.backend import graph_executor_factory
 from ..relay import param_dict
 
 
@@ -117,7 +117,7 @@ def _build_memory_map(graph_json):
     return memory_map
 
 
-def export_model_library_format(mod: graph_runtime_factory.GraphRuntimeFactoryModule, file_name):
+def export_model_library_format(mod: graph_executor_factory.GraphExecutorFactoryModule, file_name):
     """Export the build artifact in Model Library Format.
 
     This function creates a .tar archive containing the build artifacts in a standardized
@@ -126,7 +126,7 @@ def export_model_library_format(mod: graph_runtime_factory.GraphRuntimeFactoryMo
 
     Parameters
     ----------
-    mod : tvm.relay.backend.graph_runtime_factory.GraphRuntimeFactoryModule
+    mod : tvm.relay.backend.graph_executor_factory.GraphExecutorFactoryModule
         The return value of tvm.relay.build, which will be exported into Model Library Format.
     file_name : str
         Path to the .tar archive to generate.
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
index 8987883abafb..78bf03379939 100644
--- a/python/tvm/micro/session.py
+++ b/python/tvm/micro/session.py
@@ -22,8 +22,8 @@
 
 from ..error import register_error
 from .._ffi import get_global_func
-from ..contrib import graph_runtime
-from ..contrib.debugger import debug_runtime
+from ..contrib import graph_executor
+from ..contrib.debugger import debug_executor
 from ..rpc import RPCSession
 from .transport import IoTimeoutError
 from .transport import TransportLogger
@@ -92,7 +92,7 @@ def __init__(
         self.timeout_override = timeout_override
 
         self._rpc = None
-        self._graph_runtime = None
+        self._graph_executor = None
 
     def get_system_lib(self):
         return self._rpc.get_function("runtime.SystemLib")()
@@ -192,8 +192,8 @@ def lookup_remote_linked_param(mod, storage_id, template_tensor, device):
     )
 
 
-def create_local_graph_runtime(graph_json_str, mod, device):
-    """Create a local graph runtime driving execution on the remote CPU device given.
+def create_local_graph_executor(graph_json_str, mod, device):
+    """Create a local graph executor driving execution on the remote CPU device given.
 
     Parameters
     ----------
@@ -208,17 +208,17 @@ def create_local_graph_runtime(graph_json_str, mod, device):
 
     Returns
     -------
-    tvm.contrib.GraphRuntime :
-         A local graph runtime instance that executes on the remote device.
+    tvm.contrib.GraphExecutor :
+         A local graph executor instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
-    fcreate = get_global_func("tvm.graph_runtime.create")
-    return graph_runtime.GraphModule(
+    fcreate = get_global_func("tvm.graph_executor.create")
+    return graph_executor.GraphModule(
         fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id)
     )
 
 
-def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
+def create_local_debug_executor(graph_json_str, mod, device, dump_root=None):
     """Create a local debug runtime driving execution on the remote CPU device given.
 
     Parameters
@@ -237,12 +237,12 @@ def create_local_debug_runtime(graph_json_str, mod, device, dump_root=None):
 
     Returns
     -------
-    tvm.contrib.GraphRuntime :
-         A local graph runtime instance that executes on the remote device.
+    tvm.contrib.GraphExecutor :
+         A local graph executor instance that executes on the remote device.
     """
     device_type_id = [device.device_type, device.device_id]
-    fcreate = get_global_func("tvm.graph_runtime_debug.create")
-    return debug_runtime.GraphModuleDebug(
+    fcreate = get_global_func("tvm.graph_executor_debug.create")
+    return debug_executor.GraphModuleDebug(
         fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id),
         [device],
         graph_json_str,
diff --git a/python/tvm/relay/analysis/analysis.py b/python/tvm/relay/analysis/analysis.py
index 3928f77b2607..661d7523ad77 100644
--- a/python/tvm/relay/analysis/analysis.py
+++ b/python/tvm/relay/analysis/analysis.py
@@ -405,7 +405,7 @@ def search_fc_transpose(expr):
 def get_calibration_data(mod, data):
     """Get the calibration data of a given relay graph
 
-    This pass uses the graph runtime to get the calibration data of a module, which
+    This pass uses the graph executor to get the calibration data of a module, which
     includes the input and output values of each function. The returned data uses
     the GlobalVar of each function as a key. Users can further access the inputs and
     outputs by using `inputs` or  `outputs` as the key.
diff --git a/python/tvm/relay/backend/graph_runtime_codegen.py b/python/tvm/relay/backend/graph_executor_codegen.py
similarity index 91%
rename from python/tvm/relay/backend/graph_runtime_codegen.py
rename to python/tvm/relay/backend/graph_executor_codegen.py
index ec679aee894c..f24bf2c2b55b 100644
--- a/python/tvm/relay/backend/graph_runtime_codegen.py
+++ b/python/tvm/relay/backend/graph_executor_codegen.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-A compiler from a Relay expression to TVM's graph runtime.
+A compiler from a Relay expression to TVM's graph executor.
 
 The compiler is built from a few pieces.
 
@@ -29,9 +29,9 @@
 graph langauge is composed of Node, NodeRef, InputNode, OpNode.
 This "little language" represents programs in TVM's graph format.
 
-To connect to the graph runtime, we use a printer that converts our graph format
+To connect to the graph executor, we use a printer that converts our graph format
 into TVM's JSON format. The resulting string can be loaded by
-contrib.graph_runtime or any other TVM runtime compatible systems.
+contrib.graph_executor or any other TVM runtime compatible systems.
 """
 from tvm.runtime.ndarray import empty
 from tvm.relay import _build_module
@@ -39,11 +39,11 @@
 from tvm.tir import expr as _expr
 
 
-class GraphRuntimeCodegen(object):
+class GraphExecutorCodegen(object):
     """The compiler from Relay to the TVM runtime system."""
 
     def __init__(self, mod, target):
-        self._mod = _build_module._GraphRuntimeCodegen()
+        self._mod = _build_module._GraphExecutorCodegen()
         self._init = self._mod["init"]
         self._codegen = self._mod["codegen"]
         self._get_graph_json = self._mod["get_graph_json"]
diff --git a/python/tvm/relay/backend/graph_runtime_factory.py b/python/tvm/relay/backend/graph_executor_factory.py
similarity index 88%
rename from python/tvm/relay/backend/graph_runtime_factory.py
rename to python/tvm/relay/backend/graph_executor_factory.py
index e92ae710ca0b..d6959d22e5c8 100644
--- a/python/tvm/relay/backend/graph_runtime_factory.py
+++ b/python/tvm/relay/backend/graph_executor_factory.py
@@ -14,16 +14,16 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Graph runtime factory."""
+"""Graph executor factory."""
 import warnings
 from ..._ffi.base import string_types
 from ..._ffi.registry import get_global_func
 from ...runtime import ndarray
 
 
-class GraphRuntimeFactoryModule:
-    """Graph runtime factory module.
-    This is a module of graph runtime factory
+class GraphExecutorFactoryModule:
+    """Graph executor factory module.
+    This is a module of graph executor factory
 
     Parameters
     ----------
@@ -43,7 +43,7 @@ class GraphRuntimeFactoryModule:
 
     def __init__(self, ir_mod, target, graph_json_str, libmod, libmod_name, params):
         assert isinstance(graph_json_str, string_types)
-        fcreate = get_global_func("tvm.graph_runtime_factory.create")
+        fcreate = get_global_func("tvm.graph_executor_factory.create")
         args = []
         for k, v in params.items():
             args.append(k)
@@ -77,9 +77,9 @@ def __getitem__(self, item):
 
     def __iter__(self):
         warnings.warn(
-            "legacy graph runtime behavior of producing json / lib / params will be "
+            "legacy graph executor behavior of producing json / lib / params will be "
             "removed in the next release."
-            " Please see documents of tvm.contrib.graph_runtime.GraphModule for the "
+            " Please see documents of tvm.contrib.graph_executor.GraphModule for the "
             " new recommended usage.",
             DeprecationWarning,
             2,
diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index efe495e816a2..4795a2d38685 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-Construct the necessary state for the TVM graph runtime
+Construct the necessary state for the TVM graph executor
 from a Relay expression.
 """
 import warnings
@@ -27,13 +27,13 @@
 from tvm.tir import expr as tvm_expr
 from .. import nd as _nd, autotvm, register_func
 from ..target import Target
-from ..contrib import graph_runtime as _graph_rt
+from ..contrib import graph_executor as _graph_rt
 from . import _build_module
 from . import ty as _ty
 from . import expr as _expr
 from . import function as _function
 from .transform import InferType
-from .backend import graph_runtime_factory as _graph_runtime_factory
+from .backend import graph_executor_factory as _graph_executor_factory
 from .backend import interpreter as _interpreter
 from .backend.vm import VMExecutor
 
@@ -70,7 +70,7 @@ def _convert_param_map(params):
 
 
 class BuildModule(object):
-    """Build an IR module to run on TVM graph runtime. This class is used
+    """Build an IR module to run on TVM graph executor. This class is used
     to expose the `RelayBuildModule` APIs implemented in C++.
     """
 
@@ -110,8 +110,8 @@ def build(self, mod, target=None, target_host=None, params=None):
 
         Returns
         -------
-        factory_module : tvm.relay.backend.graph_runtime_factory.GraphRuntimeFactoryModule
-            The runtime factory for the TVM graph runtime.
+        factory_module : tvm.relay.backend.graph_executor_factory.GraphExecutorFactoryModule
+            The runtime factory for the TVM graph executor.
         """
         target = _update_target(target)
 
@@ -211,7 +211,7 @@ def _build_module_no_factory(mod, target=None, target_host=None, params=None, mo
 def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"):
     # fmt: off
     # pylint: disable=line-too-long
-    """Helper function that builds a Relay function to run on TVM graph runtime.
+    """Helper function that builds a Relay function to run on TVM graph executor.
 
     Parameters
     ----------
@@ -241,7 +241,7 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     Returns
     -------
     graph_json : str
-        The json string that can be accepted by graph runtime.
+        The json string that can be accepted by graph executor.
 
     mod : tvm.Module
         The module containing necessary libraries.
@@ -281,10 +281,10 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default"
     with tophub_context:
         bld_mod = BuildModule()
         graph_json, runtime_mod, params = bld_mod.build(ir_mod, target, target_host, params)
-        runtime_mod = _graph_runtime_factory.GraphRuntimeFactoryModule(
+        executor_factory = _graph_executor_factory.GraphExecutorFactoryModule(
             ir_mod, target, graph_json, runtime_mod, mod_name, params
         )
-        return runtime_mod
+        return executor_factory
 
 
 def optimize(mod, target=None, params=None):
@@ -392,7 +392,9 @@ def _make_executor(self, expr=None):
         self.mod = InferType()(self.mod)
         ret_type = self.mod["main"].checked_type.ret_type
         if _ty.is_dynamic(ret_type):
-            raise ValueError("Graph Runtime only supports static graphs, got output type", ret_type)
+            raise ValueError(
+                "Graph Executor only supports static graphs, got output type", ret_type
+            )
         mod = build(self.mod, target=self.target)
         gmodule = _graph_rt.GraphModule(mod["default"](self.device))
 
@@ -444,7 +446,7 @@ def create_executor(kind="debug", mod=None, device=None, target="llvm"):
     ----------
     kind : str
         The type of executor. Avaliable options are `debug` for the
-        interpreter, `graph` for the graph runtime, and `vm` for the virtual
+        interpreter, `graph` for the graph executor, and `vm` for the virtual
         machine.
 
     mod : :py:class:`~tvm.IRModule`
diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py
index 7bb70cec5ee5..c2546205c571 100644
--- a/python/tvm/relay/frontend/common.py
+++ b/python/tvm/relay/frontend/common.py
@@ -530,13 +530,13 @@ def infer_value(input_val, params, mod=None):
     try:
         # TODO(kevinthesun): Use VM for all cases.
         # pylint: disable=import-outside-toplevel
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
         func = _function.Function(analysis.free_vars(input_val), input_val)
         with tvm.transform.PassContext(opt_level=0):
             lib = tvm.relay.build(func, target="llvm", params=params)
         dev = tvm.cpu(0)
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         m.run()
         return m.get_output(0)
     except Exception:
diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py
index fabb639845b6..17fdbf941e08 100644
--- a/python/tvm/relay/op/contrib/arm_compute_lib.py
+++ b/python/tvm/relay/op/contrib/arm_compute_lib.py
@@ -30,7 +30,7 @@
 
 
 def is_arm_compute_runtime_enabled():
-    """Check if the ACL graph runtime is present.
+    """Check if the ACL graph executor is present.
 
     Returns
     -------
diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index afdea9712342..a36b66c8f0dd 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -29,7 +29,7 @@
 
 
 def is_tensorrt_runtime_enabled():
-    """Check if the TensorRT graph runtime is present.
+    """Check if the TensorRT graph executor is present.
     Returns
     -------
     ret: bool
diff --git a/python/tvm/relay/quantize/_calibrate.py b/python/tvm/relay/quantize/_calibrate.py
index 20afa1909ac9..a906a98dccd4 100644
--- a/python/tvm/relay/quantize/_calibrate.py
+++ b/python/tvm/relay/quantize/_calibrate.py
@@ -29,7 +29,7 @@
 from .. import expr as _expr
 from .. import analysis as _analysis
 from .. import build_module as _build_module
-from ...contrib import graph_runtime
+from ...contrib import graph_executor
 from .kl_divergence import _find_scale_by_kl
 
 
@@ -46,7 +46,7 @@ def _get_profile_runtime(mod):
 
     with tvm.transform.PassContext(opt_level=3):
         lib = _build_module.build(func, target=target)
-    runtime = graph_runtime.GraphModule(lib["default"](dev))
+    runtime = graph_executor.GraphModule(lib["default"](dev))
 
     return runtime
 
diff --git a/rust/tvm-graph-rt/Cargo.toml b/rust/tvm-graph-rt/Cargo.toml
index 13837f62695d..5c492393a75e 100644
--- a/rust/tvm-graph-rt/Cargo.toml
+++ b/rust/tvm-graph-rt/Cargo.toml
@@ -19,7 +19,7 @@
 name = "tvm-graph-rt"
 version = "0.1.0"
 license = "Apache-2.0"
-description = "A static graph runtime for TVM."
+description = "A static graph executor for TVM."
 repository = "https://github.com/apache/tvm"
 readme = "README.md"
 keywords = ["tvm"]
diff --git a/rust/tvm-graph-rt/src/graph.rs b/rust/tvm-graph-rt/src/graph.rs
index 0174b0cacf3e..de2e7dddff5c 100644
--- a/rust/tvm-graph-rt/src/graph.rs
+++ b/rust/tvm-graph-rt/src/graph.rs
@@ -39,7 +39,7 @@ use crate::{errors::*, Module, Storage, Tensor};
 
 // @see `kTVMNDArrayMagic` in `ndarray.h`
 const _NDARRAY_MAGIC: u64 = 0xDD5E_40F0_96B4_A13F;
-// @see `kTVMNDArrayListMagic` in `graph_runtime.h`
+// @see `kTVMNDArrayListMagic` in `graph_executor.h`
 const _NDARRAY_LIST_MAGIC: u64 = 0xF7E5_8D4F_0504_9CB7;
 
 /// A TVM computation graph.
diff --git a/rust/tvm/README.md b/rust/tvm/README.md
index 75fabe7d9a1b..b518f93195b7 100644
--- a/rust/tvm/README.md
+++ b/rust/tvm/README.md
@@ -37,7 +37,7 @@ The Rust bindings are composed of a few crates:
 - The [tvm_rt](https://tvm.apache.org/docs/api/rust/tvm_rt/index.html) crate which exposes Rust
   bindings to the TVM runtime APIs.
 - The [tvm_sys] crate which provides raw bindings and linkage to the TVM C++ library.
-- The [tvm_graph_rt] crate which implements a version of the TVM graph runtime in Rust vs. C++.
+- The [tvm_graph_rt] crate which implements a version of the TVM graph executor in Rust vs. C++.
 
 These crates have been recently refactored and reflect a much different philosophy than
 previous bindings, as well as much increased support for more of the TVM API including
diff --git a/rust/tvm/examples/resnet/src/build_resnet.py b/rust/tvm/examples/resnet/src/build_resnet.py
index 6c99dee22bf5..13c499b54deb 100644
--- a/rust/tvm/examples/resnet/src/build_resnet.py
+++ b/rust/tvm/examples/resnet/src/build_resnet.py
@@ -29,7 +29,7 @@
 from tvm import te
 from tvm import relay, runtime
 from tvm.relay import testing
-from tvm.contrib import graph_runtime, cc
+from tvm.contrib import graph_executor, cc
 from PIL import Image
 from tvm.contrib.download import download_testdata
 from mxnet.gluon.model_zoo.vision import get_model
@@ -141,7 +141,7 @@ def test_build(build_dir):
     params = bytearray(open(osp.join(build_dir, "deploy_param.params"), "rb").read())
     input_data = get_cat_image()
     dev = tvm.cpu()
-    module = graph_runtime.create(graph, lib, dev)
+    module = graph_executor.create(graph, lib, dev)
     module.load_params(params)
     module.run(data=input_data)
     out = module.get_output(0).asnumpy()
@@ -151,7 +151,7 @@ def test_build(build_dir):
 
 
 if __name__ == "__main__":
-    logger.info("Compiling the model to graph runtime.")
+    logger.info("Compiling the model to graph executor.")
     build(build_dir)
     logger.info("Testing the model's predication on test data.")
     test_build(build_dir)
diff --git a/rust/tvm/src/lib.rs b/rust/tvm/src/lib.rs
index 047b2cf220e0..81abe338bd1b 100644
--- a/rust/tvm/src/lib.rs
+++ b/rust/tvm/src/lib.rs
@@ -24,7 +24,7 @@
 //! One particular use case is that given optimized deep learning model artifacts,
 //! (compiled with TVM) which include a shared library
 //! `lib.so`, `graph.json` and a byte-array `param.params`, one can load them
-//! in Rust idiomatically to create a TVM Graph Runtime and
+//! in Rust idiomatically to create a TVM Graph Executor and
 //! run the model for some inputs and get the
 //! desired predictions *all in Rust*.
 //!
diff --git a/rust/tvm/src/runtime/graph_rt.rs b/rust/tvm/src/runtime/graph_rt.rs
index cba2875c9952..421a00386cf5 100644
--- a/rust/tvm/src/runtime/graph_rt.rs
+++ b/rust/tvm/src/runtime/graph_rt.rs
@@ -22,19 +22,19 @@ use std::convert::TryInto;
 use crate::runtime::Function;
 use crate::{runtime::function::Result, runtime::ByteArray, Device, Module, NDArray};
 
-/// An instance of the C++ graph runtime.
+/// An instance of the C++ graph executor.
 ///
 /// An efficient and light weight runtime for static deep learning models.
 pub struct GraphRt {
-    /// The backing graph runtime module which exposes a set of packed functions
+    /// The backing graph executor module which exposes a set of packed functions
     /// which can be invoked by a client.
     ///
-    /// In the graph runtime module, it exposes create, load_params, set_input, get_output, and run.
+    /// In the graph executor module, it exposes create, load_params, set_input, get_output, and run.
     module: Module,
 }
 
 impl GraphRt {
-    /// Create a graph runtime directly from a runtime module.
+    /// Create a graph executor directly from a runtime module.
     pub fn from_module(module: Module, dev: Device) -> Result<GraphRt> {
         let default: Box<dyn Fn(Device) -> Result<Module>> =
             module.get_function("default", false)?.into();
@@ -44,9 +44,9 @@ impl GraphRt {
         })
     }
 
-    /// Create a graph runtime from the deprecated graph, lib, dev triple.
+    /// Create a graph executor from the deprecated graph, lib, dev triple.
     pub fn create_from_parts(graph: &str, lib: Module, dev: Device) -> Result<Self> {
-        let runtime_create_fn = Function::get("tvm.graph_runtime.create").unwrap();
+        let runtime_create_fn = Function::get("tvm.graph_executor.create").unwrap();
 
         let runtime_create_fn_ret = runtime_create_fn.invoke(vec![
             graph.into(),
@@ -55,9 +55,9 @@ impl GraphRt {
             // NOTE you must pass the device id in as i32 because that's what TVM expects
             (dev.device_id as i32).into(),
         ]);
-        let graph_runtime_module: Module = runtime_create_fn_ret?.try_into()?;
+        let graph_executor_module: Module = runtime_create_fn_ret?.try_into()?;
         Ok(Self {
-            module: graph_runtime_module,
+            module: graph_executor_module,
         })
     }
 
@@ -92,13 +92,13 @@ impl GraphRt {
         Ok(())
     }
 
-    /// Extract the ith output from the graph runtime and returns it.
+    /// Extract the ith output from the graph executor and returns it.
     pub fn get_output(&mut self, i: i64) -> Result<NDArray> {
         let get_output_fn = self.module.get_function("get_output", false)?;
         get_output_fn.invoke(vec![i.into()])?.try_into()
     }
 
-    /// Extract the ith output from the graph runtime and write the results into output.
+    /// Extract the ith output from the graph executor and write the results into output.
     pub fn get_output_into(&mut self, i: i64, output: NDArray) -> Result<()> {
         let get_output_fn = self.module.get_function("get_output", false)?;
         get_output_fn.invoke(vec![i.into(), output.into()])?;
diff --git a/src/relay/analysis/get_calibration_data.cc b/src/relay/analysis/get_calibration_data.cc
index 70fe2a68f21e..12bab1e38ddd 100644
--- a/src/relay/analysis/get_calibration_data.cc
+++ b/src/relay/analysis/get_calibration_data.cc
@@ -36,7 +36,7 @@ namespace relay {
 
 /*!
  * \brief This function returns a module that will be used by
- * the relay graph runtime for collecting the calibration data.
+ * the relay graph executor for collecting the calibration data.
  * To do that, we first make all inputs and outputs of each
  * function into the final output (i.e., the final output is a
  * tuple of tensors). Then, we change the compiler attribute of
@@ -106,7 +106,7 @@ IRModule GetCalibrateModule(IRModule module) {
       }
     }
   }
-  // reset the attribute of functions for running graph runtime
+  // reset the attribute of functions for running graph executor
   for (const auto& pair : glob_funcs) {
     if (auto* fn = pair.second.as<FunctionNode>()) {
       auto func = GetRef<Function>(fn);
diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 08846925bede..3995d5ab3568 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file relay/backend/build_module.cc
- * \brief Code generation for TVM's graph runtime.
+ * \brief Code generation for TVM's graph executor.
  */
 #include <tvm/driver/driver_api.h>
 #include <tvm/ir/expr.h>
@@ -60,7 +60,7 @@ struct BuildOutput {
 struct GraphCodegen {
  public:
   GraphCodegen() {
-    auto pf = GetPackedFunc("relay.build_module._GraphRuntimeCodegen");
+    auto pf = GetPackedFunc("relay.build_module._GraphExecutorCodegen");
     mod = (*pf)();
   }
   ~GraphCodegen() {}
@@ -228,7 +228,7 @@ class RelayBuildModule : public runtime::ModuleNode {
   const char* type_key() const final { return "RelayBuildModule"; }
 
   /*!
-   * \brief Build relay IRModule for graph runtime
+   * \brief Build relay IRModule for graph executor
    *
    * \param mod Relay IRModule
    * \param target Target device
diff --git a/src/relay/backend/compile_engine.cc b/src/relay/backend/compile_engine.cc
index f492b70565ac..0777b19ec557 100644
--- a/src/relay/backend/compile_engine.cc
+++ b/src/relay/backend/compile_engine.cc
@@ -262,7 +262,7 @@ class ScheduleGetter : public backend::MemoizedExprTranslator<Array<te::Tensor>>
       ICHECK(tuple_type) << "Expect output to be a tuple type";
       ICHECK_EQ(tuple_type->fields.size(), outputs.size());
     }
-    // Set the name to `__copy`. It will be detected in graph runtime to perform
+    // Set the name to `__copy`. It will be detected in graph executor to perform
     // data copy across devices.
     if (op == device_copy_op_) {
       readable_name_stream_.str(std::string());
diff --git a/src/relay/backend/contrib/arm_compute_lib/codegen.cc b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
index e0669ae64bdb..8098c8d51274 100644
--- a/src/relay/backend/contrib/arm_compute_lib/codegen.cc
+++ b/src/relay/backend/contrib/arm_compute_lib/codegen.cc
@@ -376,12 +376,12 @@ runtime::Module ACLCompiler(const ObjectRef& ref) {
 TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler);
 
 /*!
- * \brief Check whether ACL graph runtime is used.
+ * \brief Check whether ACL graph executor is used.
  *
- * \return True if ACL graph runtime is enabled, False if not.
+ * \return True if ACL graph executor is enabled, False if not.
  */
 inline constexpr bool IsACLRuntimeEnabled() {
-#if TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#if TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
   return true;
 #else
   return false;
diff --git a/src/relay/backend/contrib/tensorrt/codegen.cc b/src/relay/backend/contrib/tensorrt/codegen.cc
index 059dbc192a04..e121b6010ad8 100644
--- a/src/relay/backend/contrib/tensorrt/codegen.cc
+++ b/src/relay/backend/contrib/tensorrt/codegen.cc
@@ -32,7 +32,7 @@
 #include "../../utils.h"
 #include "../codegen_json/codegen_json.h"
 
-#if TVM_GRAPH_RUNTIME_TENSORRT
+#if TVM_GRAPH_EXECUTOR_TENSORRT
 #include "NvInfer.h"
 #endif
 
@@ -217,15 +217,15 @@ runtime::Module TensorRTCompiler(const ObjectRef& ref) {
 TVM_REGISTER_GLOBAL("relay.ext.tensorrt").set_body_typed(TensorRTCompiler);
 
 /*!
- * \brief Check whether TensorRT graph runtime is enabled.
+ * \brief Check whether TensorRT graph executor is enabled.
  * \return True if enabled, False if not.
  */
 inline constexpr bool IsTensorRTRuntimeEnabled() {
-#if TVM_GRAPH_RUNTIME_TENSORRT
+#if TVM_GRAPH_EXECUTOR_TENSORRT
   return true;
 #else
   return false;
-#endif  // TVM_GRAPH_RUNTIME_TENSORRT
+#endif  // TVM_GRAPH_EXECUTOR_TENSORRT
 }
 
 /*!
@@ -234,11 +234,11 @@ inline constexpr bool IsTensorRTRuntimeEnabled() {
  * runtime is not enabled.
  */
 Array<Integer> GetTensorRTVersion() {
-#if TVM_GRAPH_RUNTIME_TENSORRT
+#if TVM_GRAPH_EXECUTOR_TENSORRT
   return {Integer(NV_TENSORRT_MAJOR), Integer(NV_TENSORRT_MINOR), Integer(NV_TENSORRT_PATCH)};
 #else
   return {};
-#endif  // TVM_GRAPH_RUNTIME_TENSORRT
+#endif  // TVM_GRAPH_EXECUTOR_TENSORRT
 }
 
 TVM_REGISTER_GLOBAL("relay.op.is_tensorrt_runtime_enabled")
diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_executor_codegen.cc
similarity index 96%
rename from src/relay/backend/graph_runtime_codegen.cc
rename to src/relay/backend/graph_executor_codegen.cc
index 7ed150495104..72989b5ba46a 100644
--- a/src/relay/backend/graph_runtime_codegen.cc
+++ b/src/relay/backend/graph_executor_codegen.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file relay/backend/graph_codegen.cc
- * \brief Graph runtime codegen
+ * \brief Graph executor codegen
  */
 
 #include <dmlc/any.h>
@@ -181,10 +181,10 @@ class GraphOpNode : public GraphNode {
   const std::string op_type_name_{"tvm_op"};
 };
 
-/*! \brief Code generator for graph runtime */
-class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<GraphNodeRef>> {
+/*! \brief Code generator for graph executor */
+class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<GraphNodeRef>> {
  public:
-  GraphRuntimeCodegen(runtime::Module* mod, const TargetsMap& targets) : mod_(mod) {
+  GraphExecutorCodegen(runtime::Module* mod, const TargetsMap& targets) : mod_(mod) {
     compile_engine_ = CompileEngine::Global();
     targets_ = targets;
   }
@@ -541,7 +541,7 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<G
   TargetsMap targets_;
   /*!
    * \brief parameters (i.e. ConstantNodes found in the graph).
-   * These are take as inputs to the GraphRuntime.
+   * These are take as inputs to the GraphExecutor.
    * Maps param name to a pair of storage_id and NDArray. At runtime, the storage_id can be
    * used to lookup the parameter.
    */
@@ -557,9 +557,9 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator<std::vector<G
   CompileEngine compile_engine_;
 };
 
-class GraphRuntimeCodegenModule : public runtime::ModuleNode {
+class GraphExecutorCodegenModule : public runtime::ModuleNode {
  public:
-  GraphRuntimeCodegenModule() {}
+  GraphExecutorCodegenModule() {}
   virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) {
     if (name == "init") {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -573,8 +573,8 @@ class GraphRuntimeCodegenModule : public runtime::ModuleNode {
           ICHECK(dev_type);
           targets[dev_type->value] = it.second;
         }
-        codegen_ =
-            std::make_shared<GraphRuntimeCodegen>(reinterpret_cast<runtime::Module*>(mod), targets);
+        codegen_ = std::make_shared<GraphExecutorCodegen>(reinterpret_cast<runtime::Module*>(mod),
+                                                          targets);
       });
     } else if (name == "codegen") {
       return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -619,19 +619,19 @@ class GraphRuntimeCodegenModule : public runtime::ModuleNode {
     }
   }
 
-  const char* type_key() const final { return "RelayGraphRuntimeCodegenModule"; }
+  const char* type_key() const final { return "RelayGraphExecutorCodegenModule"; }
 
  private:
-  std::shared_ptr<GraphRuntimeCodegen> codegen_;
+  std::shared_ptr<GraphExecutorCodegen> codegen_;
   LoweredOutput output_;
 };
 
 runtime::Module CreateGraphCodegenMod() {
-  auto ptr = make_object<GraphRuntimeCodegenModule>();
+  auto ptr = make_object<GraphExecutorCodegenModule>();
   return runtime::Module(ptr);
 }
 
-TVM_REGISTER_GLOBAL("relay.build_module._GraphRuntimeCodegen")
+TVM_REGISTER_GLOBAL("relay.build_module._GraphExecutorCodegen")
     .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = CreateGraphCodegenMod(); });
 
 }  // namespace backend
diff --git a/src/relay/backend/graph_plan_memory.cc b/src/relay/backend/graph_plan_memory.cc
index 26bc77aa6ec5..4260f052d2c0 100644
--- a/src/relay/backend/graph_plan_memory.cc
+++ b/src/relay/backend/graph_plan_memory.cc
@@ -20,7 +20,7 @@
 /*!
  * \file relay/backend/graph_plan_memory.cc
  * \brief Memory index assignment pass for executing
- *   the program in the graph runtime.
+ *   the program in the graph executor.
  */
 #include <tvm/relay/analysis.h>
 #include <tvm/relay/expr.h>
diff --git a/src/relay/transforms/partition_graph.cc b/src/relay/transforms/partition_graph.cc
index 404c7efb10b0..94891c3c98ea 100644
--- a/src/relay/transforms/partition_graph.cc
+++ b/src/relay/transforms/partition_graph.cc
@@ -428,7 +428,7 @@ IRModule RemoveDefaultAnnotations(IRModule module) {
  *  could be a tuple output. Such tuple outputs needs to be flattened
  *  otherwise the function would create tuples of tuples. Moreover, tuple
  *  of tuples are valid relay, however they are not currently supported by
- *  graph runtime or relay VM.
+ *  graph executor or relay VM.
  */
 
 // New annotations would be required to be added for each flattened output
diff --git a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
index ed8f6adbd083..6562d1bfc62d 100644
--- a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
+++ b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
@@ -28,7 +28,7 @@
 #include "../json/json_node.h"
 #include "../json/json_runtime.h"
 
-#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#ifdef TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
 #include <arm_compute/core/Types.h>
 #include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
 #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
@@ -82,7 +82,7 @@ class ACLRuntime : public JSONRuntimeBase {
     BuildEngine();
   }
 
-#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#ifdef TVM_GRAPH_EXECUTOR_ARM_COMPUTE_LIB
   /*!
    * \brief Unpack inputs and outputs and run inference on a given layer.
    *
@@ -518,12 +518,12 @@ class ACLRuntime : public JSONRuntimeBase {
 #else
   void Run() override {
     LOG(FATAL) << "Cannot call run on Arm Compute Library module without runtime enabled. "
-               << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME.";
+               << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR.";
   }
 
   void BuildEngine() {
     LOG(WARNING) << "Arm Compute Library engine is not initialized. "
-                 << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME.";
+                 << "Please build with USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR.";
   }
 #endif
 };
diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index e6eb28c10af6..21031c67863f 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -32,7 +32,7 @@
 #include "../json/json_node.h"
 #include "../json/json_runtime.h"
 
-#ifdef TVM_GRAPH_RUNTIME_TENSORRT
+#ifdef TVM_GRAPH_EXECUTOR_TENSORRT
 #include "NvInfer.h"
 #include "tensorrt_builder.h"
 #endif
@@ -108,7 +108,7 @@ class TensorRTRuntime : public JSONRuntimeBase {
     }
   }
 
-#ifdef TVM_GRAPH_RUNTIME_TENSORRT
+#ifdef TVM_GRAPH_EXECUTOR_TENSORRT
   /*! \brief Destroy engines and contexts. */
   ~TensorRTRuntime() {
     for (auto& it : trt_engine_cache_) {
diff --git a/src/runtime/crt/Makefile b/src/runtime/crt/Makefile
index d707d0c63b81..8d3acab1858b 100644
--- a/src/runtime/crt/Makefile
+++ b/src/runtime/crt/Makefile
@@ -67,8 +67,8 @@ endef
 
 LIBS = \
 	src/runtime/crt/common \
-	src/runtime/crt/graph_runtime \
-	src/runtime/crt/graph_runtime_module \
+	src/runtime/crt/graph_executor \
+	src/runtime/crt/graph_executor_module \
 	src/runtime/crt/memory \
 	src/runtime/crt/utvm_rpc_common \
 	src/runtime/crt/utvm_rpc_server
diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c
index 6634cfa7db89..e7fa7bcb5d5e 100644
--- a/src/runtime/crt/common/crt_runtime_api.c
+++ b/src/runtime/crt/common/crt_runtime_api.c
@@ -30,7 +30,7 @@
 #include <tvm/runtime/crt/crt.h>
 #include <tvm/runtime/crt/func_registry.h>
 #include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/internal/graph_runtime/graph_runtime.h>
+#include <tvm/runtime/crt/internal/graph_executor/graph_executor.h>
 #include <tvm/runtime/crt/internal/memory/memory.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/platform.h>
diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_executor/graph_executor.c
similarity index 76%
rename from src/runtime/crt/graph_runtime/graph_runtime.c
rename to src/runtime/crt/graph_executor/graph_executor.c
index f0a1eb2da870..2fe9e73aeddc 100644
--- a/src/runtime/crt/graph_runtime/graph_runtime.c
+++ b/src/runtime/crt/graph_executor/graph_executor.c
@@ -20,12 +20,12 @@
 // LINT_C_FILE
 
 /*!
- * \file graph_runtime.c
- * \brief implement graph runtime in pure C
+ * \file graph_executor.c
+ * \brief implement graph executor in pure C
  */
 
 #include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/crt/internal/graph_runtime/graph_runtime.h>
+#include <tvm/runtime/crt/internal/graph_executor/graph_executor.h>
 #include <tvm/runtime/crt/logging.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/module.h>
@@ -49,7 +49,7 @@ uint32_t Shape_Accumulate(int64_t* shape, uint32_t ndim) {
   return accum;
 }
 
-int NodeEntry_Load(TVMGraphRuntimeNodeEntry* entry, JSONReader* reader) {
+int NodeEntry_Load(TVMGraphExecutorNodeEntry* entry, JSONReader* reader) {
   int status = 0;
   reader->BeginArray(reader);
   if (!(reader->NextArrayItem(reader))) {
@@ -74,8 +74,8 @@ int NodeEntry_Load(TVMGraphRuntimeNodeEntry* entry, JSONReader* reader) {
   return status;
 }
 
-void TVMGraphRuntimeNode_LoadAttrs(TVMGraphRuntimeNode* node, JSONReader* reader,
-                                   TVMOpParam* param) {
+void TVMGraphExecutorNode_LoadAttrs(TVMGraphExecutorNode* node, JSONReader* reader,
+                                    TVMOpParam* param) {
   int bitmask = 0;
   char key[20], value[120];
   memset(param, 0, sizeof(TVMOpParam));
@@ -109,7 +109,7 @@ void TVMGraphRuntimeNode_LoadAttrs(TVMGraphRuntimeNode* node, JSONReader* reader
   }
 }
 
-int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
+int TVMGraphExecutorNode_Load(TVMGraphExecutorNode* node, JSONReader* reader) {
   int status = 0;
   reader->BeginObject(reader);
   int bitmask = 0;
@@ -138,8 +138,8 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeNodeEntry) * num_inputs,
-                                                      dev, (void**)&node->inputs);
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(
+          sizeof(TVMGraphExecutorNodeEntry) * num_inputs, dev, (void**)&node->inputs);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         return -1;
@@ -150,7 +150,7 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
           return -1;
         }
 
-        TVMGraphRuntimeNodeEntry* inputs = node->inputs + count;
+        TVMGraphExecutorNodeEntry* inputs = node->inputs + count;
         reader->BeginArray(reader);
         if (!reader->NextArrayItem(reader)) {
           fprintf(stderr, "invalid json format\n");
@@ -181,7 +181,7 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
     } else if (!strcmp(key, "attr") || !strcmp(key, "attrs")) {
       TVMOpParam param;
 
-      TVMGraphRuntimeNode_LoadAttrs(node, reader, &param);
+      TVMGraphExecutorNode_LoadAttrs(node, reader, &param);
       memcpy(&node->param, &param, sizeof(param));
     } else if (!strcmp(key, "control_deps")) {
       fprintf(stderr, "do not support key %s", key);
@@ -201,15 +201,15 @@ int TVMGraphRuntimeNode_Load(TVMGraphRuntimeNode* node, JSONReader* reader) {
   return status;
 }
 
-TVMGraphRuntimeNode TVMGraphRuntimeNodeCreate() {
-  TVMGraphRuntimeNode node;
-  memset(&node, 0, sizeof(TVMGraphRuntimeNode));
-  node.LoadAttrs = TVMGraphRuntimeNode_LoadAttrs;
-  node.Load = TVMGraphRuntimeNode_Load;
+TVMGraphExecutorNode TVMGraphExecutorNodeCreate() {
+  TVMGraphExecutorNode node;
+  memset(&node, 0, sizeof(TVMGraphExecutorNode));
+  node.LoadAttrs = TVMGraphExecutorNode_LoadAttrs;
+  node.Load = TVMGraphExecutorNode_Load;
   return node;
 }
 
-int TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode* node) {
+int TVMGraphExecutorNodeRelease(TVMGraphExecutorNode* node) {
   if (!node) {
     return 0;
   }
@@ -225,7 +225,7 @@ int TVMGraphRuntimeNodeRelease(TVMGraphRuntimeNode* node) {
   return 0;
 }
 
-int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr* attr, JSONReader* reader) {
+int TVMGraphExecutorGraphAttr_Load(TVMGraphExecutorGraphAttr* attr, JSONReader* reader) {
   int status = 0;
   int bitmask = 0;
   char key[16], type[16];
@@ -520,7 +520,7 @@ int TVMGraphRuntimeGraphAttr_Load(TVMGraphRuntimeGraphAttr* attr, JSONReader* re
   return status;
 }
 
-int TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr* attr) {
+int TVMGraphExecutorGraphAttr_Release(TVMGraphExecutorGraphAttr* attr) {
   if (!attr) {
     return 0;
   }
@@ -568,7 +568,7 @@ int TVMGraphRuntimeGraphAttr_Release(TVMGraphRuntimeGraphAttr* attr) {
   return 0;
 }
 
-int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
+int TVMGraphExecutor_Load(TVMGraphExecutor* executor, JSONReader* reader) {
   int status = 0;
   reader->BeginObject(reader);
   int bitmask = 0;
@@ -583,30 +583,30 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeNode) * num_items, dev,
-                                                      (void**)&runtime->nodes);
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNode) * num_items, dev,
+                                                      (void**)&executor->nodes);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->nodes_count == num_items) {
+        if (executor->nodes_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        TVMGraphRuntimeNode* node = runtime->nodes + runtime->nodes_count;
-        status = TVMGraphRuntimeNode_Load(node, reader);
+        TVMGraphExecutorNode* node = executor->nodes + executor->nodes_count;
+        status = TVMGraphExecutorNode_Load(node, reader);
         if (status != 0) {
-          fprintf(stderr, "failed to load an element in `nodes` field in graph runtime node.\n");
+          fprintf(stderr, "failed to load an element in `nodes` field in graph executor node.\n");
           break;
 #if TVM_CRT_DEBUG
         } else {
-          printf("loading: node (%u) %s loaded.\n", runtime->nodes_count, node->name);
+          printf("loading: node (%u) %s loaded.\n", executor->nodes_count, node->name);
 #endif  // TVM_CRT_DEBUG
         }
-        runtime->nodes_count++;
+        executor->nodes_count++;
       }
       bitmask |= 1;
     } else if (!strcmp(key, "arg_nodes")) {
@@ -619,21 +619,22 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
       }
       DLDevice dev = {kDLCPU, 0};
       tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev,
-                                                      (void**)&runtime->input_nodes);
+                                                      (void**)&executor->input_nodes);
+
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->input_nodes_count == num_items) {
+        if (executor->input_nodes_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        uint32_t* node = runtime->input_nodes + runtime->input_nodes_count;
+        uint32_t* node = executor->input_nodes + executor->input_nodes_count;
         reader->ReadUnsignedInteger(reader, node);
-        runtime->input_nodes_count++;
+        executor->input_nodes_count++;
       }
       bitmask |= 2;
     } else if (!strcmp(key, "node_row_ptr")) {
@@ -646,22 +647,22 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
       }
       DLDevice dev = {kDLCPU, 0};
       tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(uint32_t) * num_items, dev,
-                                                      (void**)&runtime->node_row_ptr);
+                                                      (void**)&executor->node_row_ptr);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->node_row_ptr_count == num_items) {
+        if (executor->node_row_ptr_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        uint32_t count = runtime->node_row_ptr_count;
-        uint32_t* node = runtime->node_row_ptr + count;
+        uint32_t count = executor->node_row_ptr_count;
+        uint32_t* node = executor->node_row_ptr + count;
         reader->ReadUnsignedInteger(reader, node);
-        runtime->node_row_ptr_count++;
+        executor->node_row_ptr_count++;
       }
       bitmask |= 4;
     } else if (!strcmp(key, "heads")) {
@@ -673,32 +674,32 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
         break;
       }
       DLDevice dev = {kDLCPU, 0};
-      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeNodeEntry) * num_items,
-                                                      dev, (void**)&runtime->outputs);
+      tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorNodeEntry) * num_items,
+                                                      dev, (void**)&executor->outputs);
       if (err != kTvmErrorNoError) {
         fprintf(stderr, "memory allocate error: %08x", err);
         status = -1;
         break;
       }
       while (reader->NextArrayItem(reader)) {
-        if (runtime->outputs_count == num_items) {
+        if (executor->outputs_count == num_items) {
           fprintf(stderr, "array too big\n");
           status = -1;
           return status;
         }
-        TVMGraphRuntimeNodeEntry* entry = runtime->outputs + runtime->outputs_count;
+        TVMGraphExecutorNodeEntry* entry = executor->outputs + executor->outputs_count;
         status = NodeEntry_Load(entry, reader);
         if (status != 0) {
-          fprintf(stderr, "Fail to load an element in `heads` field in graph runtime node.\n");
+          fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
           break;
         }
-        runtime->outputs_count++;
+        executor->outputs_count++;
       }
       bitmask |= 8;
     } else if (!strcmp(key, "attrs")) {
-      status = TVMGraphRuntimeGraphAttr_Load(&(runtime->attrs), reader);
+      status = TVMGraphExecutorGraphAttr_Load(&(executor->attrs), reader);
       if (status != 0) {
-        fprintf(stderr, "Fail to load an element in `heads` field in graph runtime node.\n");
+        fprintf(stderr, "Fail to load an element in `heads` field in graph executor node.\n");
         break;
       }
       bitmask |= 16;
@@ -719,29 +720,31 @@ int TVMGraphRuntime_Load(TVMGraphRuntime* runtime, JSONReader* reader) {
   return status;
 }
 
-uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint32_t index) {
-  return runtime->node_row_ptr[nid] + index;
+uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, uint32_t index) {
+  return executor->node_row_ptr[nid] + index;
 }
 
 /*!
  * \brief Get the number of input tensors allocated.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \return the number of input tensors allocated.
  */
-int TVMGraphRuntime_GetNumInputs(TVMGraphRuntime* runtime) { return runtime->input_nodes_count; }
+int TVMGraphExecutor_GetNumInputs(TVMGraphExecutor* executor) {
+  return executor->input_nodes_count;
+}
 
 /*!
  * \brief Get the input index given the name of input.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \param name The name of the input.
  * \return The index of input.
  */
-int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name) {
+int TVMGraphExecutor_GetInputIndex(TVMGraphExecutor* executor, const char* name) {
   uint32_t i;
   int32_t rv = -1;
-  for (i = 0; i < runtime->input_nodes_count; ++i) {
-    uint32_t nid = runtime->input_nodes[i];
-    if (!strcmp(runtime->nodes[nid].name, name)) {
+  for (i = 0; i < executor->input_nodes_count; ++i) {
+    uint32_t nid = executor->input_nodes[i];
+    if (!strcmp(executor->nodes[nid].name, name)) {
       rv = i;
       break;
     }
@@ -752,28 +755,28 @@ int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name) {
 
 /*!
  * \brief set input to the graph based on name.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \param name The name of the input.
  * \param data_in The input data.
  */
-void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in) {
-  uint32_t index = TVMGraphRuntime_GetInputIndex(runtime, name);
-  if (index >= runtime->input_nodes_count) {
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in) {
+  uint32_t index = TVMGraphExecutor_GetInputIndex(executor, name);
+  if (index >= executor->input_nodes_count) {
     fprintf(stderr, "given index is greater than num of input nodes.\n");
   }
-  uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, runtime->input_nodes[index], 0);
-  runtime->data_entry[eid].dl_tensor.data = data_in->data;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(executor, executor->input_nodes[index], 0);
+  executor->data_entry[eid].dl_tensor.data = data_in->data;
 }
 
 /*!
  * \brief Load parameters from parameter blob.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \param param_blob A binary blob of parameter.
  * \param param_size The parameter size.
  * \return The result of this function execution.
  */
-int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
-                               const uint32_t param_size) {
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
+                                const uint32_t param_size) {
   int status = 0;
   const char* bptr = param_blob;
   uint64_t header, reserved;
@@ -790,13 +793,13 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
   char* names = NULL;
   DLDevice dev = {kDLCPU, 0};
   tvm_crt_error_t err =
-      TVMPlatformMemoryAllocate(TVM_CRT_STRLEN_NAME * runtime->nodes_count, dev, (void**)&names);
+      TVMPlatformMemoryAllocate(TVM_CRT_STRLEN_NAME * executor->nodes_count, dev, (void**)&names);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     status = -1;
     return status;
   }
-  memset(names, 0, TVM_CRT_STRLEN_NAME * runtime->nodes_count);
+  memset(names, 0, TVM_CRT_STRLEN_NAME * executor->nodes_count);
   uint64_t names_count;
   int idx;
   memcpy(&names_count, bptr, sizeof(names_count));
@@ -824,33 +827,33 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
   }
 
   for (idx = 0; idx < size; idx++) {
-    int32_t in_idx = TVMGraphRuntime_GetInputIndex(runtime, names + TVM_CRT_STRLEN_NAME * idx);
+    int32_t in_idx = TVMGraphExecutor_GetInputIndex(executor, names + TVM_CRT_STRLEN_NAME * idx);
     CHECK_GT(in_idx, 0, "Found param for non-existent input: %s\n",
              names + TVM_CRT_STRLEN_NAME * idx);
-    uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, runtime->input_nodes[in_idx], 0);
-    if (!(eid < runtime->data_entry_count)) {
+    uint32_t eid = TVMGraphExecutor_GetEntryId(executor, executor->input_nodes[in_idx], 0);
+    if (!(eid < executor->data_entry_count)) {
       fprintf(stderr, "`entry_id`=%d is greater than expected(%d).\n", eid,
-              runtime->data_entry_count);
+              executor->data_entry_count);
       status = -1;
     }
 
-    if (runtime->data_entry[eid].dl_tensor.shape) {
-      err = TVMPlatformMemoryFree(runtime->data_entry[eid].dl_tensor.shape, dev);
+    if (executor->data_entry[eid].dl_tensor.shape) {
+      err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.shape, dev);
       if (err != kTvmErrorNoError) {
         status = -1;
       }
-      runtime->data_entry[eid].dl_tensor.shape = 0;
+      executor->data_entry[eid].dl_tensor.shape = 0;
     }
-    if (runtime->data_entry[eid].dl_tensor.data) {
-      err = TVMPlatformMemoryFree(runtime->data_entry[eid].dl_tensor.data, dev);
+    if (executor->data_entry[eid].dl_tensor.data) {
+      err = TVMPlatformMemoryFree(executor->data_entry[eid].dl_tensor.data, dev);
       if (err != kTvmErrorNoError) {
         status = -1;
       }
-      runtime->data_entry[eid].dl_tensor.data = 0;
+      executor->data_entry[eid].dl_tensor.data = 0;
     }
-    status |= TVMNDArray_Load(&(runtime->data_entry[eid]), &bptr);
+    status |= TVMNDArray_Load(&(executor->data_entry[eid]), &bptr);
 #if TVM_CRT_DEBUG
-    TVMNDArray* entry = &(runtime->data_entry[eid]);
+    TVMNDArray* entry = &(executor->data_entry[eid]);
     printf("loading: param %s loaded, in_idx=%d, eid=%d, ndim=%d, data[0]=%f\n",
            names + TVM_CRT_STRLEN_NAME * idx, in_idx, eid, entry->dl_tensor.ndim,
            ((float*)entry->dl_tensor.data)[0]);  // NOLINT(*)
@@ -869,38 +872,38 @@ int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
 
 /*!
  * \brief Run all the operations one by one.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  */
-void TVMGraphRuntime_Run(TVMGraphRuntime* runtime) {
+void TVMGraphExecutor_Run(TVMGraphExecutor* executor) {
   // setup the array and requirements.
   uint32_t idx;
-  for (idx = 0; idx < runtime->op_execs_count; ++idx) {
-    if (runtime->op_execs[idx].fexec) {
+  for (idx = 0; idx < executor->op_execs_count; ++idx) {
+    if (executor->op_execs[idx].fexec) {
 #if TVM_CRT_DEBUG
-      printf("calling: %s (%d)\n", runtime->op_execs[idx].name, idx);
+      printf("calling: %s (%d)\n", executor->op_execs[idx].name, idx);
 #endif  // TVM_CRT_DEBUG
-      runtime->op_execs[idx].Call(&(runtime->op_execs[idx]));
+      executor->op_execs[idx].Call(&(executor->op_execs[idx]));
     }
   }
 }
 
 /*!
  * \brief Get the number of output tensors allocated.
- * \param runtime The graph runtime.
+ * \param executor The graph executor.
  * \return the number of output tensors allocated.
  */
-int TVMGraphRuntime_GetNumOutputs(TVMGraphRuntime* runtime) { return runtime->outputs_count; }
+int TVMGraphExecutor_GetNumOutputs(TVMGraphExecutor* executor) { return executor->outputs_count; }
 
-int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTensor* out) {
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t idx, DLTensor* out) {
   int status = 0;
-  uint32_t nid = runtime->outputs[idx].node_id;
-  uint32_t index = runtime->outputs[idx].index;
-  uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, nid, index);
+  uint32_t nid = executor->outputs[idx].node_id;
+  uint32_t index = executor->outputs[idx].index;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(executor, nid, index);
 
   // copy data section to allocated output tensor
   int32_t elem_bytes = out->dtype.bits / 8;
   int64_t size = Shape_Accumulate(out->shape, out->ndim);
-  DLTensor* tensor = &(runtime->data_entry[eid].dl_tensor);
+  DLTensor* tensor = &(executor->data_entry[eid].dl_tensor);
   CHECK(out->ndim == tensor->ndim);
   CHECK(out->dtype.bits == tensor->dtype.bits);
   CHECK(Shape_Accumulate(out->shape, out->ndim) == Shape_Accumulate(tensor->shape, tensor->ndim));
@@ -908,7 +911,7 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTen
   return status;
 }
 
-int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
+int TVMGraphExecutor_SetupStorage(TVMGraphExecutor* executor) {
   TVMPackedFunc lookup_linked_param;
   int lookup_linked_param_valid;
   uint32_t idx;
@@ -919,12 +922,12 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
     temp_args.tcodes[0] = kTVMArgInt;
     temp_args.values_count = 1;
     lookup_linked_param_valid =
-        (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, runtime->module_handle,
+        (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, executor->module_handle,
                                       "_lookup_linked_param", &temp_args) == 0);
   }
 
   // Grab saved optimization plan from graph.
-  TVMGraphRuntimeGraphAttr* attrs = &(runtime->attrs);
+  TVMGraphExecutorGraphAttr* attrs = &(executor->attrs);
   DLDataType* vtype = NULL;
   DLDevice alloc_dev = {kDLCPU, 0};
   tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(DLDataType) * attrs->dltype_count,
@@ -938,20 +941,20 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   }
 
   // Size and device type of each storage pool entry.
-  TVMGraphRuntimePoolEntry* pool_entry = NULL;
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count,
+  TVMGraphExecutorPoolEntry* pool_entry = NULL;
+  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorPoolEntry) * executor->nodes_count,
                                   alloc_dev, (void**)&pool_entry);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
-  memset(pool_entry, 0, sizeof(TVMGraphRuntimePoolEntry) * runtime->nodes_count);
+  memset(pool_entry, 0, sizeof(TVMGraphExecutorPoolEntry) * executor->nodes_count);
   uint32_t pool_entry_count = 0;
   // Find the maximum space size.
   for (idx = 0; idx < attrs->shape_count; idx++) {
     int storage_id = attrs->storage_id[idx];
     // Use the fallback device if no device index is available.
-    int device_type = runtime->devices[0].device_type;
+    int device_type = executor->devices[0].device_type;
     uint32_t size = Shape_Accumulate(attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx]);
     DLDataType t = vtype[idx];
     uint32_t bits = t.bits * t.lanes;
@@ -967,15 +970,15 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   }
 
   // Allocate the space.
-  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntimeStorageEntry) * pool_entry_count, alloc_dev,
-                                  (void**)&runtime->storage_pool);
+  err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutorStorageEntry) * pool_entry_count,
+                                  alloc_dev, (void**)&executor->storage_pool);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
   for (idx = 0; idx < pool_entry_count; idx++) {
-    TVMGraphRuntimePoolEntry pit = pool_entry[idx];
-    DLDevice dev = runtime->devices[0];
+    TVMGraphExecutorPoolEntry pit = pool_entry[idx];
+    DLDevice dev = executor->devices[0];
     uint8_t did_find_linked_param = 0;
     if (lookup_linked_param_valid) {
       lookup_linked_param.args.values[0].v_int64 = idx;
@@ -983,8 +986,8 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
 
       void* linked_param_data = lookup_linked_param.ret_value.values[0].v_handle;
       if (linked_param_data != NULL) {
-        runtime->storage_pool[runtime->storage_pool_count].is_linked_param = 1;
-        DLTensor* tensor = &runtime->storage_pool[runtime->storage_pool_count].array.dl_tensor;
+        executor->storage_pool[executor->storage_pool_count].is_linked_param = 1;
+        DLTensor* tensor = &executor->storage_pool[executor->storage_pool_count].array.dl_tensor;
         tensor->data = linked_param_data;
         tensor->device = dev;
         tensor->ndim = attrs->ndim[pit.entry_id];
@@ -1001,28 +1004,28 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
       };
       shape[0] = (pit.size + 3) / 4;
       int status = TVMNDArray_Empty(1, shape, dtype, dev,
-                                    &runtime->storage_pool[runtime->storage_pool_count].array);
+                                    &executor->storage_pool[executor->storage_pool_count].array);
       CHECK_EQ(status, 0, "fail to create storage_pool with idx=%d\n", idx);
     }
-    runtime->storage_pool_count++;
+    executor->storage_pool_count++;
   }
 
   // Assign the pooled entries. A unified memory pool is used to simplifiy
   // memory assignment for each node entry. The allocated memory on each device
   // is mapped to this pool.
-  runtime->data_entry_count = runtime->node_row_ptr[runtime->node_row_ptr_count - 1];
-  err = TVMPlatformMemoryAllocate(sizeof(TVMNDArray) * runtime->data_entry_count, alloc_dev,
-                                  (void**)&runtime->data_entry);
+  executor->data_entry_count = executor->node_row_ptr[executor->node_row_ptr_count - 1];
+  err = TVMPlatformMemoryAllocate(sizeof(TVMNDArray) * executor->data_entry_count, alloc_dev,
+                                  (void**)&executor->data_entry);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
-  for (idx = 0; idx < runtime->data_entry_count; ++idx) {
+  for (idx = 0; idx < executor->data_entry_count; ++idx) {
     uint32_t storage_id = attrs->storage_id[idx];
-    CHECK(storage_id < runtime->storage_pool_count);
-    int status = TVMNDArray_CreateView(&(runtime->storage_pool[storage_id].array),
+    CHECK(storage_id < executor->storage_pool_count);
+    int status = TVMNDArray_CreateView(&(executor->storage_pool[storage_id].array),
                                        attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx],
-                                       vtype[idx], &runtime->data_entry[idx]);
+                                       vtype[idx], &executor->data_entry[idx]);
     CHECK_EQ(status, 0, "fail to create for node with idx=%d, storage_id=%u\n", idx, storage_id);
   }
 
@@ -1042,32 +1045,32 @@ int TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) {
   return 0;
 }
 
-int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime* runtime) {
+int TVMGraphExecutor_SetupOpExecs(TVMGraphExecutor* executor) {
   int status = 0;
   uint32_t nid, idx;
-  runtime->op_execs_count = runtime->nodes_count;
+  executor->op_execs_count = executor->nodes_count;
   DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMPackedFunc) * runtime->op_execs_count,
-                                                  dev, (void**)&runtime->op_execs);
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMPackedFunc) * executor->op_execs_count,
+                                                  dev, (void**)&executor->op_execs);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     status = -1;
     return status;
   }
-  for (nid = 0; nid < runtime->nodes_count; nid++) {
-    const TVMGraphRuntimeNode* inode = runtime->nodes + nid;
+  for (nid = 0; nid < executor->nodes_count; nid++) {
+    const TVMGraphExecutorNode* inode = executor->nodes + nid;
     if (strcmp(inode->op_type, "null")) {
       DLTensorPtr args[TVM_CRT_MAX_ARGS];
       uint32_t args_count = 0;
       for (idx = 0; idx < inode->inputs_count; idx++) {
-        const TVMGraphRuntimeNodeEntry* entry = inode->inputs + idx;
-        uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, entry->node_id, entry->index);
-        args[idx] = &(runtime->data_entry[eid].dl_tensor);
+        const TVMGraphExecutorNodeEntry* entry = inode->inputs + idx;
+        uint32_t eid = TVMGraphExecutor_GetEntryId(executor, entry->node_id, entry->index);
+        args[idx] = &(executor->data_entry[eid].dl_tensor);
         args_count++;
       }
       for (idx = 0; idx < inode->param.num_outputs; idx++) {
-        uint32_t eid = TVMGraphRuntime_GetEntryId(runtime, nid, idx);
-        args[args_count] = &(runtime->data_entry[eid].dl_tensor);
+        uint32_t eid = TVMGraphExecutor_GetEntryId(executor, nid, idx);
+        args[args_count] = &(executor->data_entry[eid].dl_tensor);
         args_count++;
       }
       if (strcmp(inode->op_type, "tvm_op")) {
@@ -1085,9 +1088,9 @@ int TVMGraphRuntime_SetupOpExecs(TVMGraphRuntime* runtime) {
       printf("tvm_op: creating %s with node_id=%d\n", inode->param.func_name, nid);
 #endif  // TVM_CRT_DEBUG
       TVMPackedFunc pf;
-      TVMGraphRuntime_CreateTVMOp(runtime, &(inode->param), args, args_count, inode->inputs_count,
-                                  &pf);
-      runtime->op_execs[nid] = pf;
+      TVMGraphExecutor_CreateTVMOp(executor, &(inode->param), args, args_count, inode->inputs_count,
+                                   &pf);
+      executor->op_execs[nid] = pf;
     }
   }
   return status;
@@ -1104,9 +1107,9 @@ typedef struct TVMOpArgs {
   uint32_t shape_data_count;
 } TVMOpArgs;
 
-int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam* param,
-                                    DLTensorPtr* args, const uint32_t args_count,
-                                    uint32_t num_inputs, TVMPackedFunc* pf) {
+int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* executor, const TVMOpParam* param,
+                                     DLTensorPtr* args, const uint32_t args_count,
+                                     uint32_t num_inputs, TVMPackedFunc* pf) {
   int status = 0;
   uint32_t idx;
   TVMOpArgs arg_ptr;
@@ -1137,7 +1140,7 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam*
   }
 
   TVMArgs targs = TVMArgs_Create(arg_ptr.arg_values, arg_ptr.arg_tcodes, arg_ptr.arg_values_count);
-  status = TVMPackedFunc_InitModuleFunc(pf, runtime->module_handle, param->func_name, &targs);
+  status = TVMPackedFunc_InitModuleFunc(pf, executor->module_handle, param->func_name, &targs);
 
   return status;
 }
@@ -1151,28 +1154,28 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam*
  * executed on.
  * \return 0 on success.
  */
-int TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json,
-                         TVMModuleHandle module_handle, const DLDevice* devs) {
+int TVMGraphExecutor_Init(TVMGraphExecutor* executor, const char* graph_json,
+                          TVMModuleHandle module_handle, const DLDevice* devs) {
   JSONReader reader;
   tvm_crt_error_t err = JSONReader_Create(graph_json, &reader);
   if (err != kTvmErrorNoError) {
     return -1;
   }
 
-  TVMGraphRuntime_Load(runtime, &reader);
+  TVMGraphExecutor_Load(executor, &reader);
   err = JSONReader_Release(&reader);
   if (err != kTvmErrorNoError) {
     return -1;
   }
-  runtime->module_handle = module_handle;
-  runtime->devices[0] = devs[0];
+  executor->module_handle = module_handle;
+  executor->devices[0] = devs[0];
 
   int status;
-  status = TVMGraphRuntime_SetupStorage(runtime);
+  status = TVMGraphExecutor_SetupStorage(executor);
   if (status != 0) {
     return status;
   }
-  status = TVMGraphRuntime_SetupOpExecs(runtime);
+  status = TVMGraphExecutor_SetupOpExecs(executor);
   if (status != 0) {
     if (status != 0) {
       return status;
@@ -1184,74 +1187,74 @@ int TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json,
   return status;
 }
 
-int TVMGraphRuntime_Create(const char* sym_json, TVMModuleHandle module_handle,
-                           const DLDevice* devs, TVMGraphRuntime** runtime) {
+int TVMGraphExecutor_Create(const char* sym_json, TVMModuleHandle module_handle,
+                            const DLDevice* devs, TVMGraphExecutor** executor) {
   DLDevice dev = {kDLCPU, 0};
-  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphRuntime), dev, (void**)runtime);
+  tvm_crt_error_t err = TVMPlatformMemoryAllocate(sizeof(TVMGraphExecutor), dev, (void**)executor);
   if (err != kTvmErrorNoError) {
     fprintf(stderr, "memory allocate error: %08x", err);
     return -1;
   }
 
-  memset(*runtime, 0, sizeof(TVMGraphRuntime));
+  memset(*executor, 0, sizeof(TVMGraphExecutor));
   // init
-  return TVMGraphRuntime_Init(*runtime, sym_json, module_handle, devs);
+  return TVMGraphExecutor_Init(*executor, sym_json, module_handle, devs);
 }
 
-int TVMGraphRuntime_Release(TVMGraphRuntime** pptr) {
+int TVMGraphExecutor_Release(TVMGraphExecutor** pptr) {
   int status = 0;
   int32_t idx;
-  TVMGraphRuntime* runtime = (TVMGraphRuntime*)(*pptr);
-  for (idx = 0; idx < runtime->nodes_count; ++idx) {
-    status = TVMGraphRuntimeNodeRelease(&(runtime->nodes[idx]));
+  TVMGraphExecutor* executor = (TVMGraphExecutor*)(*pptr);
+  for (idx = 0; idx < executor->nodes_count; ++idx) {
+    status = TVMGraphExecutorNodeRelease(&(executor->nodes[idx]));
     if (status != 0) {
       return status;
     }
   }
   DLDevice dev = {kDLCPU, 0};
-  status = TVMPlatformMemoryFree(runtime->nodes, dev);
+  status = TVMPlatformMemoryFree(executor->nodes, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMGraphRuntimeGraphAttr_Release(&(runtime->attrs));
+  status = TVMGraphExecutorGraphAttr_Release(&(executor->attrs));
   if (status != 0) {
     return status;
   }
-  for (idx = 0; idx < runtime->storage_pool_count; ++idx) {
-    if (runtime->storage_pool[idx].is_linked_param == 0) {
-      status = TVMNDArray_Release(&(runtime->storage_pool[idx]).array);
+  for (idx = 0; idx < executor->storage_pool_count; ++idx) {
+    if (executor->storage_pool[idx].is_linked_param == 0) {
+      status = TVMNDArray_Release(&(executor->storage_pool[idx]).array);
       if (status != 0) {
         return status;
       }
     }
   }
-  for (idx = 0; idx < runtime->data_entry_count; ++idx) {
-    status = TVMPlatformMemoryFree(runtime->data_entry[idx].dl_tensor.shape, dev);
+  for (idx = 0; idx < executor->data_entry_count; ++idx) {
+    status = TVMPlatformMemoryFree(executor->data_entry[idx].dl_tensor.shape, dev);
     if (status != 0) {
       return status;
     }
   }
-  status = TVMPlatformMemoryFree(runtime->input_nodes, dev);
+  status = TVMPlatformMemoryFree(executor->input_nodes, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->node_row_ptr, dev);
+  status = TVMPlatformMemoryFree(executor->node_row_ptr, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->outputs, dev);
+  status = TVMPlatformMemoryFree(executor->outputs, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->storage_pool, dev);
+  status = TVMPlatformMemoryFree(executor->storage_pool, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->data_entry, dev);
+  status = TVMPlatformMemoryFree(executor->data_entry, dev);
   if (status != 0) {
     return status;
   }
-  status = TVMPlatformMemoryFree(runtime->op_execs, dev);
+  status = TVMPlatformMemoryFree(executor->op_execs, dev);
   if (status != 0) {
     return status;
   }
diff --git a/src/runtime/crt/graph_runtime/load_json.c b/src/runtime/crt/graph_executor/load_json.c
similarity index 99%
rename from src/runtime/crt/graph_runtime/load_json.c
rename to src/runtime/crt/graph_executor/load_json.c
index 65dcb3eccb44..dd2faecdc538 100644
--- a/src/runtime/crt/graph_runtime/load_json.c
+++ b/src/runtime/crt/graph_executor/load_json.c
@@ -25,7 +25,7 @@
  */
 #include <stdlib.h>
 #include <string.h>
-#include <tvm/runtime/crt/internal/graph_runtime/load_json.h>
+#include <tvm/runtime/crt/internal/graph_executor/load_json.h>
 #include <tvm/runtime/crt/memory.h>
 #include <tvm/runtime/crt/platform.h>
 
diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_executor_module/graph_executor_module.c
similarity index 51%
rename from src/runtime/crt/graph_runtime_module/graph_runtime_module.c
rename to src/runtime/crt/graph_executor_module/graph_executor_module.c
index 4a61b89528ad..7b2a25040d08 100644
--- a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c
+++ b/src/runtime/crt/graph_executor_module/graph_executor_module.c
@@ -20,27 +20,27 @@
 // LINT_C_FILE
 
 /*!
- * \file graph_runtime_module.c
- * \brief wrap graph_runtime into a TVMModule for use with RPC.
+ * \file graph_executor_module.c
+ * \brief wrap graph_executor into a TVMModule for use with RPC.
  */
 
 #include <tvm/runtime/crt/func_registry.h>
-#include <tvm/runtime/crt/graph_runtime.h>
-#include <tvm/runtime/crt/graph_runtime_module.h>
+#include <tvm/runtime/crt/graph_executor.h>
+#include <tvm/runtime/crt/graph_executor_module.h>
 #include <tvm/runtime/crt/module.h>
 
-#include "tvm/runtime/crt/internal/graph_runtime/graph_runtime.h"
+#include "tvm/runtime/crt/internal/graph_executor/graph_executor.h"
 
 typedef struct {
   TVMModule mod;
-  TVMGraphRuntime* runtime;
-} GraphRuntimeModule;
+  TVMGraphExecutor* executor;
+} GraphExecutorModule;
 
-static GraphRuntimeModule graph_runtime;
+static GraphExecutorModule graph_executor;
 
-int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                     int* ret_tcodes, void* resource_handle) {
-  if (graph_runtime.runtime != NULL) {
+int32_t TVMGraphExecutorModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+                                      int* ret_tcodes, void* resource_handle) {
+  if (graph_executor.executor != NULL) {
     return kTvmErrorGraphModuleAlreadyCreated;
   }
 
@@ -59,16 +59,16 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM
 
   DLDevice dev = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64};
   int ret_value =
-      TVMGraphRuntime_Create(args[0].v_str, args[1].v_handle, &dev, &graph_runtime.runtime);
+      TVMGraphExecutor_Create(args[0].v_str, args[1].v_handle, &dev, &graph_executor.executor);
   if (ret_value != 0) {
     return ret_value;
   }
 
   TVMModuleHandle out;
-  ret_value = TVMModCreateFromCModule(&graph_runtime.mod, &out);
+  ret_value = TVMModCreateFromCModule(&graph_executor.mod, &out);
   if (ret_value != 0) {
     ret_tcodes[0] = kTVMNullptr;
-    TVMGraphRuntime_Release(&graph_runtime.runtime);
+    TVMGraphExecutor_Release(&graph_executor.executor);
     return ret_value;
   }
 
@@ -77,8 +77,9 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM
   return kTvmErrorNoError;
 }
 
-int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                       int* ret_tcodes, void* resource_handle) {
+int32_t TVMGraphExecutorModule_GetInput(TVMValue* args, int* tcodes, int nargs,
+                                        TVMValue* ret_values, int* ret_tcodes,
+                                        void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -87,45 +88,45 @@ int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, T
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  int index = TVMGraphRuntime_GetInputIndex(graph_runtime.runtime, args[0].v_str);
+  int index = TVMGraphExecutor_GetInputIndex(graph_executor.executor, args[0].v_str);
   if (index < 0) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime,
-                                            graph_runtime.runtime->input_nodes[index], 0);
-  ret_values[0].v_handle = (void*)&graph_runtime.runtime->data_entry[eid].dl_tensor;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor,
+                                             graph_executor.executor->input_nodes[index], 0);
+  ret_values[0].v_handle = (void*)&graph_executor.executor->data_entry[eid].dl_tensor;
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
-                                           TVMValue* ret_values, int* ret_tcodes,
-                                           void* resource_handle) {
+int32_t TVMGraphExecutorModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs,
+                                            TVMValue* ret_values, int* ret_tcodes,
+                                            void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  ret_values[0].v_int64 = TVMGraphRuntime_GetNumInputs();
+  ret_values[0].v_int64 = TVMGraphExecutor_GetNumInputs();
   ret_tcodes[0] = kTVMArgInt;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
-                                            TVMValue* ret_values, int* ret_tcodes,
-                                            void* resource_handle) {
+int32_t TVMGraphExecutorModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs,
+                                             TVMValue* ret_values, int* ret_tcodes,
+                                             void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  ret_values[0].v_int64 = TVMGraphRuntime_GetNumOutputs(graph_runtime.runtime);
+  ret_values[0].v_int64 = TVMGraphExecutor_GetNumOutputs(graph_executor.executor);
   ret_tcodes[0] = kTVMArgInt;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
-                                        TVMValue* ret_values, int* ret_tcodes,
-                                        void* resource_handle) {
+int32_t TVMGraphExecutorModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
+                                         TVMValue* ret_values, int* ret_tcodes,
+                                         void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -135,22 +136,22 @@ int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs,
   }
 
   int output_index = args[0].v_int64;
-  if (output_index < 0 || output_index > TVMGraphRuntime_GetNumOutputs(graph_runtime.runtime)) {
+  if (output_index < 0 || output_index > TVMGraphExecutor_GetNumOutputs(graph_executor.executor)) {
     return kTvmErrorGraphModuleNoSuchInput;
   }
 
-  uint32_t nid = graph_runtime.runtime->outputs[output_index].node_id;
-  uint32_t index = graph_runtime.runtime->outputs[output_index].index;
-  uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, nid, index);
+  uint32_t nid = graph_executor.executor->outputs[output_index].node_id;
+  uint32_t index = graph_executor.executor->outputs[output_index].index;
+  uint32_t eid = TVMGraphExecutor_GetEntryId(graph_executor.executor, nid, index);
 
-  ret_values[0].v_handle = (void*)&(graph_runtime.runtime->data_entry[eid].dl_tensor);
+  ret_values[0].v_handle = (void*)&(graph_executor.executor->data_entry[eid].dl_tensor);
   ret_tcodes[0] = kTVMNDArrayHandle;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
-                                         TVMValue* ret_values, int* ret_tcodes,
-                                         void* resource_handle) {
+int32_t TVMGraphExecutorModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
+                                          TVMValue* ret_values, int* ret_tcodes,
+                                          void* resource_handle) {
   if (nargs != 1) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -162,23 +163,24 @@ int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs,
   ret_tcodes[0] = kTVMNullptr;
 
   TVMByteArray* arr = (TVMByteArray*)args[0].v_handle;
-  return TVMGraphRuntime_LoadParams(graph_runtime.runtime, arr->data, arr->size);
+  return TVMGraphExecutor_LoadParams(graph_executor.executor, arr->data, arr->size);
 }
 
-int32_t TVMGraphRuntimeModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                  int* ret_tcodes, void* resource_handle) {
+int32_t TVMGraphExecutorModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
+                                   int* ret_tcodes, void* resource_handle) {
   if (nargs != 0) {
     return kTvmErrorFunctionCallNumArguments;
   }
 
-  TVMGraphRuntime_Run(graph_runtime.runtime);
+  TVMGraphExecutor_Run(graph_executor.executor);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values,
-                                       int* ret_tcodes, void* resource_handle) {
+int32_t TVMGraphExecutorModule_SetInput(TVMValue* args, int* tcodes, int nargs,
+                                        TVMValue* ret_values, int* ret_tcodes,
+                                        void* resource_handle) {
   if (nargs != 2) {
     return kTvmErrorFunctionCallNumArguments;
   }
@@ -187,26 +189,26 @@ int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, T
     return kTvmErrorFunctionCallWrongArgType;
   }
 
-  TVMGraphRuntime_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*)args[1].v_handle);
+  TVMGraphExecutor_SetInput(graph_executor.executor, args[0].v_str, (DLTensor*)args[1].v_handle);
 
   ret_tcodes[0] = kTVMNullptr;
   return 0;
 }
 
-int32_t TVMGraphRuntimeModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
-                                             TVMValue* ret_values, int* ret_tcodes,
-                                             void* resource_handle) {
+int32_t TVMGraphExecutorModule_NotImplemented(TVMValue* args, int* tcodes, int nargs,
+                                              TVMValue* ret_values, int* ret_tcodes,
+                                              void* resource_handle) {
   return kTvmErrorFunctionCallNotImplemented;
 }
 
-static const TVMBackendPackedCFunc graph_runtime_registry_funcs[] = {
-    &TVMGraphRuntimeModule_GetInput,      &TVMGraphRuntimeModule_GetNumInputs,
-    &TVMGraphRuntimeModule_GetNumOutputs, &TVMGraphRuntimeModule_GetOutput,
-    &TVMGraphRuntimeModule_LoadParams,    &TVMGraphRuntimeModule_Run,
-    &TVMGraphRuntimeModule_SetInput,      &TVMGraphRuntimeModule_NotImplemented,
+static const TVMBackendPackedCFunc graph_executor_registry_funcs[] = {
+    &TVMGraphExecutorModule_GetInput,      &TVMGraphExecutorModule_GetNumInputs,
+    &TVMGraphExecutorModule_GetNumOutputs, &TVMGraphExecutorModule_GetOutput,
+    &TVMGraphExecutorModule_LoadParams,    &TVMGraphExecutorModule_Run,
+    &TVMGraphExecutorModule_SetInput,      &TVMGraphExecutorModule_NotImplemented,
 };
 
-static const TVMFuncRegistry graph_runtime_registry = {
+static const TVMFuncRegistry graph_executor_registry = {
     "\x08get_input\0"
     "get_num_inputs\0"
     "get_num_outputs\0"
@@ -215,11 +217,11 @@ static const TVMFuncRegistry graph_runtime_registry = {
     "run\0"
     "set_input\0"
     "share_params\0",
-    graph_runtime_registry_funcs};
+    graph_executor_registry_funcs};
 
-tvm_crt_error_t TVMGraphRuntimeModule_Register() {
-  graph_runtime.mod.registry = &graph_runtime_registry;
-  graph_runtime.runtime = NULL;
+tvm_crt_error_t TVMGraphExecutorModule_Register() {
+  graph_executor.mod.registry = &graph_executor_registry;
+  graph_executor.executor = NULL;
 
-  return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphRuntimeModule_Create, 0);
+  return TVMFuncRegisterGlobal("tvm.graph_executor.create", &TVMGraphExecutorModule_Create, 0);
 }
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
index 15e696b59f46..e64455417928 100644
--- a/src/runtime/crt/host/main.cc
+++ b/src/runtime/crt/host/main.cc
@@ -34,8 +34,8 @@
 
 #include "crt_config.h"
 
-#ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE
-#include <tvm/runtime/crt/graph_runtime_module.h>
+#ifdef TVM_HOST_USE_GRAPH_EXECUTOR_MODULE
+#include <tvm/runtime/crt/graph_executor_module.h>
 #endif
 
 using namespace std::chrono;
@@ -131,9 +131,9 @@ int main(int argc, char** argv) {
 
   utvm_rpc_server_t rpc_server = UTvmRpcServerInit(&UTvmWriteFunc, nullptr);
 
-#ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE
-  CHECK_EQ(TVMGraphRuntimeModule_Register(), kTvmErrorNoError,
-           "failed to register GraphRuntime TVMModule");
+#ifdef TVM_HOST_USE_GRAPH_EXECUTOR_MODULE
+  CHECK_EQ(TVMGraphExecutorModule_Register(), kTvmErrorNoError,
+           "failed to register GraphExecutor TVMModule");
 #endif
 
   if (TVMFuncRegisterGlobal("tvm.testing.reset_server", (TVMFunctionHandle)&testonly_reset_server,
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
similarity index 58%
rename from src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
rename to src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
index a64076ab61e0..47ef474778e0 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
@@ -18,41 +18,41 @@
  */
 
 /*!
- * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h
- * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc.
+ * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/graph_executor.h
+ * \brief Tiny graph executor that can run graph containing only tvm PackedFunc.
  */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
 
-#include <tvm/runtime/crt/graph_runtime.h>
+#include <tvm/runtime/crt/graph_executor.h>
 #include <tvm/runtime/crt/internal/common/ndarray.h>
-#include <tvm/runtime/crt/internal/graph_runtime/load_json.h>
+#include <tvm/runtime/crt/internal/graph_executor/load_json.h>
 #include <tvm/runtime/crt/module.h>
 
 // Memory pool entry.
-typedef struct TVMGraphRuntimePoolEntry {
+typedef struct TVMGraphExecutorPoolEntry {
   size_t size;
   int device_type;
   int entry_id;
-} TVMGraphRuntimePoolEntry;
+} TVMGraphExecutorPoolEntry;
 
 // Node entry
-typedef struct TVMGraphRuntimeNodeEntry {
+typedef struct TVMGraphExecutorNodeEntry {
   uint32_t node_id;
   uint32_t index;
   uint32_t version;
   // JSON Loader
   void (*Load)(JSONReader* reader);
-} TVMGraphRuntimeNodeEntry;
+} TVMGraphExecutorNodeEntry;
 
 // Storage entry.
-typedef struct TVMGraphRuntimeStorageEntry {
+typedef struct TVMGraphExecutorStorageEntry {
   uint8_t is_linked_param;
   TVMNDArray array;
-} TVMGraphRuntimeStorageEntry;
+} TVMGraphExecutorStorageEntry;
 
 // Node
-typedef struct TVMGraphRuntimeNode {
+typedef struct TVMGraphExecutorNode {
   // operator type in string
   char op_type[16];
   // name of the op
@@ -60,20 +60,20 @@ typedef struct TVMGraphRuntimeNode {
   // parameters
   TVMOpParam param;
   // inputs
-  TVMGraphRuntimeNodeEntry* inputs;
+  TVMGraphExecutorNodeEntry* inputs;
   // number of inputs
   size_t inputs_count;
   // control deps
   uint32_t control_deps[20];
   // JSON Loader
-  void (*LoadAttrs)(struct TVMGraphRuntimeNode* node, JSONReader* reader, TVMOpParam* param);
+  void (*LoadAttrs)(struct TVMGraphExecutorNode* node, JSONReader* reader, TVMOpParam* param);
   // JSON Loader
-  int (*Load)(struct TVMGraphRuntimeNode* node, JSONReader* reader);
-} TVMGraphRuntimeNode;
+  int (*Load)(struct TVMGraphExecutorNode* node, JSONReader* reader);
+} TVMGraphExecutorNode;
 
-typedef struct TVMGraphRuntime {
+typedef struct TVMGraphExecutor {
   /*! \brief The graph nodes. */
-  TVMGraphRuntimeNode* nodes;
+  TVMGraphExecutorNode* nodes;
   /*! \brief The graph nodes counter. */
   uint32_t nodes_count;
   /*! \brief The argument nodes. */
@@ -83,18 +83,18 @@ typedef struct TVMGraphRuntime {
   uint32_t* node_row_ptr;
   uint32_t node_row_ptr_count;
   /*! \brief Output entries. */
-  TVMGraphRuntimeNodeEntry* outputs;
+  TVMGraphExecutorNodeEntry* outputs;
   /*! \brief Output entries counter. */
   uint32_t outputs_count;
   /*! \brief Additional graph attributes. */
-  TVMGraphRuntimeGraphAttr attrs;
+  TVMGraphExecutorGraphAttr attrs;
   /*! \brief The code module that contains both host and device code. */
   TVMModuleHandle module_handle;
   /*! \brief Execution context of all devices including the host. */
   DLDevice devices[1];
   uint32_t devices_count;
   /*! \brief Common storage pool for all devices. */
-  TVMGraphRuntimeStorageEntry* storage_pool;
+  TVMGraphExecutorStorageEntry* storage_pool;
   uint32_t storage_pool_count;
   /*! \brief Data entry of each node. */
   TVMNDArray* data_entry;
@@ -102,20 +102,20 @@ typedef struct TVMGraphRuntime {
   /*! \brief Operator on each node. */
   TVMPackedFunc* op_execs;
   uint32_t op_execs_count;
-} TVMGraphRuntime;
+} TVMGraphExecutor;
 
 typedef DLTensor* DLTensorPtr;
 
 // private functions
-uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint32_t index);
-void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in);
-int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob,
-                               const uint32_t param_size);
-void TVMGraphRuntime_Run(TVMGraphRuntime* runtime);
-int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTensor* out);
+uint32_t TVMGraphExecutor_GetEntryId(TVMGraphExecutor* executor, uint32_t nid, uint32_t index);
+void TVMGraphExecutor_SetInput(TVMGraphExecutor* executor, const char* name, DLTensor* data_in);
+int TVMGraphExecutor_LoadParams(TVMGraphExecutor* executor, const char* param_blob,
+                                const uint32_t param_size);
+void TVMGraphExecutor_Run(TVMGraphExecutor* executor);
+int TVMGraphExecutor_GetOutput(TVMGraphExecutor* executor, const int32_t idx, DLTensor* out);
 
-int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam* param,
-                                    DLTensorPtr* args, const uint32_t args_count,
-                                    uint32_t num_inputs, TVMPackedFunc* pf);
+int32_t TVMGraphExecutor_CreateTVMOp(TVMGraphExecutor* executor, const TVMOpParam* param,
+                                     DLTensorPtr* args, const uint32_t args_count,
+                                     uint32_t num_inputs, TVMPackedFunc* pf);
 
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
similarity index 90%
rename from src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h
rename to src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
index af69506b0634..ac5adc842b62 100644
--- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h
+++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
@@ -18,11 +18,11 @@
  */
 
 /*!
- * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/load_json.h
+ * \file src/runtime/crt/include/tvm/runtime/crt/internal/graph_executor/load_json.h
  * \brief Lightweight JSON Reader that read save into C++ data structs.
  */
-#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
-#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
+#ifndef TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
+#define TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
 
 #include <ctype.h>
 #include <inttypes.h>
@@ -38,9 +38,9 @@ enum {
   JSON_READ_TYPE_S32 = 6,
   JSON_READ_TYPE_F32 = 7,
   JSON_READ_TYPE_F64 = 8,
-  JSON_READ_TYPE_GRAPH_RUNTIME_NODE = 9,
-  JSON_READ_TYPE_GRAPH_RUNTIME_NODE_ENTRY = 10,
-  JSON_READ_TYPE_GRAPH_RUNTIME_GRAPH_ATTR = 11
+  JSON_READ_TYPE_GRAPH_EXECUTOR_NODE = 9,
+  JSON_READ_TYPE_GRAPH_EXECUTOR_NODE_ENTRY = 10,
+  JSON_READ_TYPE_GRAPH_EXECUTOR_GRAPH_ATTR = 11
 };
 
 typedef struct Seq {
@@ -100,4 +100,4 @@ tvm_crt_error_t JSONReader_Create(const char* is, JSONReader* reader);
  */
 tvm_crt_error_t JSONReader_Release(JSONReader* reader);
 
-#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_RUNTIME_LOAD_JSON_H_
+#endif  // TVM_RUNTIME_CRT_INCLUDE_TVM_RUNTIME_CRT_INTERNAL_GRAPH_EXECUTOR_LOAD_JSON_H_
diff --git a/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
similarity index 76%
rename from src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc
rename to src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
index 3ad8453c4903..53f225403be6 100644
--- a/src/runtime/graph/cuda_graph/graph_runtime_cuda_graph.cc
+++ b/src/runtime/graph_executor/cuda_graph/graph_runtime_cuda_graph.cc
@@ -18,28 +18,28 @@
  */
 
 /*!
- * \file graph_runtime_cuda_graph.cc
+ * \file graph_executor_cuda_graph.cc
  */
 
 #include <tvm/runtime/registry.h>
 
 #include "../../cuda/cuda_common.h"
-#include "../graph_runtime.h"
+#include "../graph_executor.h"
 
 namespace tvm {
 namespace runtime {
 
 /*!
- * \brief Graph runtime with CUDA Graph Support.
+ * \brief Graph executor with CUDA Graph Support.
  *
- *  This is the extension of GraphRuntime class used for CUDA graph launch
+ *  This is the extension of GraphExecutor class used for CUDA graph launch
  *  instead of CUDA kernel launch. CUDA graph launch requires CUDA 10.0 or
  *  above, currently there are two ways of constructing CUDA graphs:
  *  (1) Using CUDA stream capture API to capture a series of operations on
  *  CUDA stream, and automatically generates a graph (2) Building a graph
  *  using CUDA graph API manually. This implementation uses stream capture.
  */
-class GraphRuntimeCudaGraph : public GraphRuntime {
+class GraphExecutorCudaGraph : public GraphExecutor {
  public:
   /*!
    * \brief Begin CUDA graph capture on stream, the stream enters capture mode.
@@ -93,8 +93,8 @@ class GraphRuntimeCudaGraph : public GraphRuntime {
   cudaGraphExec_t cuda_graph_exec_;
 };
 
-PackedFunc GraphRuntimeCudaGraph::GetFunction(const std::string& name,
-                                              const ObjectPtr<Object>& sptr_to_self) {
+PackedFunc GraphExecutorCudaGraph::GetFunction(const std::string& name,
+                                               const ObjectPtr<Object>& sptr_to_self) {
   if (name == "run_cuda_graph") {
     return PackedFunc(
         [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { this->RunCudaGraph(); });
@@ -104,23 +104,24 @@ PackedFunc GraphRuntimeCudaGraph::GetFunction(const std::string& name,
   } else if (name == "end_capture") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { this->EndCapture(); });
   } else {
-    return GraphRuntime::GetFunction(name, sptr_to_self);
+    return GraphExecutor::GetFunction(name, sptr_to_self);
   }
 }
 
-Module GraphRuntimeCudaGraphCreate(const std::string& sym_json, const tvm::runtime::Module& m,
-                                   const std::vector<Device>& devs,
-                                   PackedFunc lookup_linked_param_func) {
-  auto exec = make_object<GraphRuntimeCudaGraph>();
+Module GraphExecutorCudaGraphCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+                                    const std::vector<Device>& devs,
+                                    PackedFunc lookup_linked_param_func) {
+  auto exec = make_object<GraphExecutorCudaGraph>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_runtime_cuda_graph.create")
+TVM_REGISTER_GLOBAL("tvm.graph_executor_cuda_graph.create")
     .set_body([](TVMArgs args, TVMRetValue* rv) {
-      ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
-                                     "at least 4, but it has "
-                                  << args.num_args;
+      ICHECK_GE(args.num_args, 4)
+          << "The expected number of arguments for graph_executor.create is "
+             "at least 4, but it has "
+          << args.num_args;
       PackedFunc lookup_linked_param_func;
       int dev_start_arg = 2;
       if (args[2].type_code() == kTVMPackedFuncHandle) {
@@ -128,8 +129,8 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime_cuda_graph.create")
         dev_start_arg++;
       }
 
-      *rv = GraphRuntimeCudaGraphCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
-                                        lookup_linked_param_func);
+      *rv = GraphExecutorCudaGraphCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
+                                         lookup_linked_param_func);
     });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
similarity index 89%
rename from src/runtime/graph/debug/graph_runtime_debug.cc
rename to src/runtime/graph_executor/debug/graph_executor_debug.cc
index fedaf4f890bc..7c1e6960f9f5 100644
--- a/src/runtime/graph/debug/graph_runtime_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file graph_runtime_debug.cc
+ * \file graph_executor_debug.cc
  */
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/ndarray.h>
@@ -29,18 +29,18 @@
 #include <chrono>
 #include <sstream>
 
-#include "../graph_runtime.h"
+#include "../graph_executor.h"
 
 namespace tvm {
 namespace runtime {
 
 /*!
- * \brief Graph runtime with debug .
+ * \brief Graph executor with debug .
  *
- *  This is the extension of GraphRuntime class used for debugging
+ *  This is the extension of GraphExecutor class used for debugging
  *  TVM runtime PackedFunc API.
  */
-class GraphRuntimeDebug : public GraphRuntime {
+class GraphExecutorDebug : public GraphExecutor {
  public:
   /*!
    * \brief Run each operation in the graph and get the time per op for all ops.
@@ -58,7 +58,7 @@ class GraphRuntimeDebug : public GraphRuntime {
    */
   std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
     // warmup run
-    GraphRuntime::Run();
+    GraphExecutor::Run();
     std::string tkey = module_->type_key();
     std::vector<double> time_sec_per_op(op_execs_.size(), 0);
     if (tkey == "rpc") {
@@ -128,8 +128,8 @@ class GraphRuntimeDebug : public GraphRuntime {
           << "Don't know how to run op type " << nodes_[index].op_type
           << " remotely over RPC right now";
 
-      // NOTE: GraphRuntimeDebug expects graph nodes to have an "op" attribute of "tvm_op" or "null"
-      // and "null" is a placeholder node for a parameter or input.
+      // NOTE: GraphExecutorDebug expects graph nodes to have an "op" attribute of "tvm_op" or
+      // "null" and "null" is a placeholder node for a parameter or input.
       return 0;
     }
 
@@ -235,8 +235,8 @@ class GraphRuntimeDebug : public GraphRuntime {
  * \param name The function which needs to be invoked.
  * \param sptr_to_self Packed function pointer.
  */
-PackedFunc GraphRuntimeDebug::GetFunction(const std::string& name,
-                                          const ObjectPtr<Object>& sptr_to_self) {
+PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
+                                           const ObjectPtr<Object>& sptr_to_self) {
   // return member functions during query.
   if (name == "get_output_by_layer") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -261,26 +261,26 @@ PackedFunc GraphRuntimeDebug::GetFunction(const std::string& name,
       *rv = this->RunIndividual(number, repeat, min_repeat_ms);
     });
   } else {
-    return GraphRuntime::GetFunction(name, sptr_to_self);
+    return GraphExecutor::GetFunction(name, sptr_to_self);
   }
 }
 
 /*!
- * \brief GraphRuntimeDebugCreate Get the function based on input.
+ * \brief GraphExecutorDebugCreate Get the function based on input.
  * \param sym_json The graph symbol in json format.
  * \param m Compiled module which will be loaded.
  * \param devs All devices.
  */
-Module GraphRuntimeDebugCreate(const std::string& sym_json, const tvm::runtime::Module& m,
-                               const std::vector<Device>& devs,
-                               PackedFunc lookup_linked_param_func) {
-  auto exec = make_object<GraphRuntimeDebug>();
+Module GraphExecutorDebugCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+                                const std::vector<Device>& devs,
+                                PackedFunc lookup_linked_param_func) {
+  auto exec = make_object<GraphExecutorDebug>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_runtime_debug.create").set_body([](TVMArgs args, TVMRetValue* rv) {
-  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
+TVM_REGISTER_GLOBAL("tvm.graph_executor_debug.create").set_body([](TVMArgs args, TVMRetValue* rv) {
+  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_executor.create is "
                                  "at least 4, but it has "
                               << args.num_args;
   PackedFunc lookup_linked_param_func;
@@ -290,8 +290,8 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime_debug.create").set_body([](TVMArgs args,
     dev_start_arg++;
   }
 
-  *rv = GraphRuntimeDebugCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
-                                lookup_linked_param_func);
+  *rv = GraphExecutorDebugCreate(args[0], args[1], GetAllDevice(args, dev_start_arg),
+                                 lookup_linked_param_func);
 });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph_executor/graph_executor.cc
similarity index 88%
rename from src/runtime/graph/graph_runtime.cc
rename to src/runtime/graph_executor/graph_executor.cc
index a4320ee3b9c0..c4d984fe9633 100644
--- a/src/runtime/graph/graph_runtime.cc
+++ b/src/runtime/graph_executor/graph_executor.cc
@@ -18,9 +18,9 @@
  */
 
 /*!
- * \file graph_runtime.cc
+ * \file graph_executor.cc
  */
-#include "graph_runtime.h"
+#include "graph_executor.h"
 
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/device_api.h>
@@ -53,7 +53,7 @@ inline size_t GetDataAlignment(const DLTensor& arr) {
 /*!
  * \brief Run all the operations one by one.
  */
-void GraphRuntime::Run() {
+void GraphExecutor::Run() {
   // setup the array and requirements.
   for (size_t i = 0; i < op_execs_.size(); ++i) {
     if (op_execs_[i]) op_execs_[i]();
@@ -68,9 +68,9 @@ void GraphRuntime::Run() {
  * executed on.
  * \param lookup_linked_param_func Linked parameter lookup function. Default is nullptr.
  */
-void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module module,
-                        const std::vector<Device>& devs,
-                        const PackedFunc lookup_linked_param_func) {
+void GraphExecutor::Init(const std::string& graph_json, tvm::runtime::Module module,
+                         const std::vector<Device>& devs,
+                         const PackedFunc lookup_linked_param_func) {
   std::istringstream is(graph_json);
   dmlc::JSONReader reader(&is);
   this->Load(&reader);
@@ -94,7 +94,7 @@ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module modu
  * \param name The name of the input.
  * \return The index of input.
  */
-int GraphRuntime::GetInputIndex(const std::string& name) {
+int GraphExecutor::GetInputIndex(const std::string& name) {
   auto it = input_map_.find(name);
   if (it != input_map_.end()) {
     return it->second;
@@ -106,7 +106,7 @@ int GraphRuntime::GetInputIndex(const std::string& name) {
  * \param index The input index.
  * \param data_in The input data.
  */
-void GraphRuntime::SetInput(int index, DLTensor* data_in) {
+void GraphExecutor::SetInput(int index, DLTensor* data_in) {
   ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   data_entry_[eid].CopyFrom(data_in);
@@ -116,7 +116,7 @@ void GraphRuntime::SetInput(int index, DLTensor* data_in) {
  * \param index The input index.
  * \param data_ref The input data that is referred.
  */
-void GraphRuntime::SetInputZeroCopy(int index, DLTensor* data_ref) {
+void GraphExecutor::SetInputZeroCopy(int index, DLTensor* data_ref) {
   ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   const DLTensor* old_t = data_entry_[eid].operator->();
@@ -141,20 +141,20 @@ void GraphRuntime::SetInputZeroCopy(int index, DLTensor* data_ref) {
  *
  * \return The number of outputs from graph.
  */
-int GraphRuntime::NumOutputs() const { return outputs_.size(); }
+int GraphExecutor::NumOutputs() const { return outputs_.size(); }
 /*!
  * \brief Get the number of inputs
  *
  * \return The number of inputs to the graph.
  */
-int GraphRuntime::NumInputs() const { return input_nodes_.size(); }
+int GraphExecutor::NumInputs() const { return input_nodes_.size(); }
 /*!
  * \brief Return NDArray for given input index.
  * \param index The input index.
  *
  * \return NDArray corresponding to given input node index.
  */
-NDArray GraphRuntime::GetInput(int index) const {
+NDArray GraphExecutor::GetInput(int index) const {
   ICHECK_LT(static_cast<size_t>(index), input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   return data_entry_[eid];
@@ -165,7 +165,7 @@ NDArray GraphRuntime::GetInput(int index) const {
  *
  * \return NDArray corresponding to given output node index.
  */
-NDArray GraphRuntime::GetOutput(int index) const {
+NDArray GraphExecutor::GetOutput(int index) const {
   ICHECK_LT(static_cast<size_t>(index), outputs_.size());
   uint32_t eid = this->entry_id(outputs_[index]);
   return data_entry_[eid];
@@ -175,7 +175,7 @@ NDArray GraphRuntime::GetOutput(int index) const {
  * \param index The output index.
  * \param data_out the output data.
  */
-void GraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
+void GraphExecutor::CopyOutputTo(int index, DLTensor* data_out) {
   ICHECK_LT(static_cast<size_t>(index), outputs_.size());
   uint32_t eid = this->entry_id(outputs_[index]);
 
@@ -193,12 +193,12 @@ void GraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
  * \brief Load parameters from parameter blob.
  * \param param_blob A binary blob of parameter.
  */
-void GraphRuntime::LoadParams(const std::string& param_blob) {
+void GraphExecutor::LoadParams(const std::string& param_blob) {
   dmlc::MemoryStringStream strm(const_cast<std::string*>(&param_blob));
   this->LoadParams(&strm);
 }
 
-void GraphRuntime::LoadParams(dmlc::Stream* strm) {
+void GraphExecutor::LoadParams(dmlc::Stream* strm) {
   Map<String, NDArray> params = ::tvm::runtime::LoadParams(strm);
   for (auto& p : params) {
     int in_idx = GetInputIndex(p.first);
@@ -208,7 +208,7 @@ void GraphRuntime::LoadParams(dmlc::Stream* strm) {
   }
 }
 
-void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) {
+void GraphExecutor::ShareParams(const GraphExecutor& other, dmlc::Stream* strm) {
   uint64_t header, reserved;
   ICHECK(strm->Read(&header)) << "Invalid parameters file format";
   ICHECK(header == kTVMNDArrayListMagic) << "Invalid parameters file format";
@@ -233,13 +233,13 @@ void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) {
   this->SetupOpExecs();
 }
 
-void GraphRuntime::LinkedNDArrayDeleter(Object* container) {
+void GraphExecutor::LinkedNDArrayDeleter(Object* container) {
   // container is the NDArray::Container which needs to get deleted.
   // The data member points to global const memory, so it does not need deleting.
   delete static_cast<NDArray::Container*>(container);
 }
 
-void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
+void GraphExecutor::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
   Module mod = args[0];
   int64_t storage_id = args[1];
   DLTensor* template_tensor = args[2];
@@ -266,11 +266,11 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
 
   std::unique_ptr<NDArray::Container> container{new NDArray::Container(
       static_cast<void*>(opaque_handle), shape_vec, template_tensor->dtype, dev)};
-  container->SetDeleter(GraphRuntime::LinkedNDArrayDeleter);
+  container->SetDeleter(GraphExecutor::LinkedNDArrayDeleter);
   *rv = NDArray(GetObjectPtr<Object>(container.release()));
 }
 
-void GraphRuntime::SetupStorage() {
+void GraphExecutor::SetupStorage() {
   // Grab saved optimization plan from graph.
   std::vector<DLDataType> vtype;
   for (const std::string& s_type : attrs_.dltype) {
@@ -352,7 +352,7 @@ void GraphRuntime::SetupStorage() {
   }
 }
 
-void GraphRuntime::SetupOpExecs() {
+void GraphExecutor::SetupOpExecs() {
   op_execs_.resize(this->GetNumOfNodes());
   input_dltensors_.resize(num_node_entries());
   std::unordered_set<uint32_t> input_node_eids;
@@ -389,9 +389,10 @@ void GraphRuntime::SetupOpExecs() {
   }
 }
 
-std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> > GraphRuntime::CreateTVMOp(
-    const TVMOpParam& param, const std::vector<DLTensor>& args, size_t num_inputs) {
-  std::shared_ptr<GraphRuntime::OpArgs> arg_ptr = std::make_shared<GraphRuntime::OpArgs>();
+std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> >
+GraphExecutor::CreateTVMOp(const TVMOpParam& param, const std::vector<DLTensor>& args,
+                           size_t num_inputs) {
+  std::shared_ptr<GraphExecutor::OpArgs> arg_ptr = std::make_shared<GraphExecutor::OpArgs>();
   // setup address.
   arg_ptr->args = args;
   if (param.flatten_data) {
@@ -438,8 +439,8 @@ std::pair<std::function<void()>, std::shared_ptr<GraphRuntime::OpArgs> > GraphRu
   return {fexec, arg_ptr};
 }
 
-PackedFunc GraphRuntime::GetFunction(const std::string& name,
-                                     const ObjectPtr<Object>& sptr_to_self) {
+PackedFunc GraphExecutor::GetFunction(const std::string& name,
+                                      const ObjectPtr<Object>& sptr_to_self) {
   // Return member functions during query.
   if (name == "set_input") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -494,20 +495,20 @@ PackedFunc GraphRuntime::GetFunction(const std::string& name,
   } else if (name == "share_params") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
       const auto& module = args[0].operator Module();
-      ICHECK_EQ(module.operator->()->type_key(), std::string("GraphRuntime"));
+      ICHECK_EQ(module.operator->()->type_key(), std::string("GraphExecutor"));
       const auto& param_blob = args[1].operator std::string();
       dmlc::MemoryStringStream strm(const_cast<std::string*>(&param_blob));
-      this->ShareParams(dynamic_cast<const GraphRuntime&>(*module.operator->()), &strm);
+      this->ShareParams(dynamic_cast<const GraphExecutor&>(*module.operator->()), &strm);
     });
   } else {
     return PackedFunc();
   }
 }
 
-Module GraphRuntimeCreate(const std::string& sym_json, const tvm::runtime::Module& m,
-                          const std::vector<Device>& devs,
-                          const PackedFunc lookup_linked_param_func) {
-  auto exec = make_object<GraphRuntime>();
+Module GraphExecutorCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+                           const std::vector<Device>& devs,
+                           const PackedFunc lookup_linked_param_func) {
+  auto exec = make_object<GraphExecutor>();
   exec->Init(sym_json, m, devs, lookup_linked_param_func);
   return Module(exec);
 }
@@ -531,8 +532,8 @@ std::vector<Device> GetAllDevice(const TVMArgs& args, int dev_start_arg) {
 // execution support yet. For heterogenenous execution, at least 5 arguments will
 // be passed in. The third one is the number of devices.
 // Eventually, we will only probably pass Device for all the languages.
-TVM_REGISTER_GLOBAL("tvm.graph_runtime.create").set_body([](TVMArgs args, TVMRetValue* rv) {
-  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
+TVM_REGISTER_GLOBAL("tvm.graph_executor.create").set_body([](TVMArgs args, TVMRetValue* rv) {
+  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_executor.create is "
                                  "at least 4, but it has "
                               << args.num_args;
   PackedFunc lookup_linked_param_func;
@@ -542,7 +543,7 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime.create").set_body([](TVMArgs args, TVMRet
     dev_start_arg++;
   }
   const auto& devices = GetAllDevice(args, dev_start_arg);
-  *rv = GraphRuntimeCreate(args[0], args[1], devices, lookup_linked_param_func);
+  *rv = GraphExecutorCreate(args[0], args[1], devices, lookup_linked_param_func);
 });
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph_executor/graph_executor.h
similarity index 95%
rename from src/runtime/graph/graph_runtime.h
rename to src/runtime/graph_executor/graph_executor.h
index aeaee9e3483e..37a47f6971e6 100644
--- a/src/runtime/graph/graph_runtime.h
+++ b/src/runtime/graph_executor/graph_executor.h
@@ -18,12 +18,12 @@
  */
 
 /*!
- * \brief Tiny graph runtime that can run graph
+ * \brief Tiny graph executor that can run graph
  *        containing only tvm PackedFunc.
- * \file graph_runtime.h
+ * \file graph_executor.h
  */
-#ifndef TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
 
 #include <dlpack/dlpack.h>
 #include <dmlc/json.h>
@@ -56,12 +56,12 @@ struct TVMOpParam {
 };
 
 /*!
- * \brief Tiny graph runtime.
+ * \brief Tiny graph executor.
  *
  *  This runtime can be acccesibly in various language via
  *  TVM runtime PackedFunc API.
  */
-class TVM_DLL GraphRuntime : public ModuleNode {
+class TVM_DLL GraphExecutor : public ModuleNode {
   struct OpArgs {
     std::vector<DLTensor> args;
     std::vector<TVMValue> arg_values;
@@ -81,7 +81,7 @@ class TVM_DLL GraphRuntime : public ModuleNode {
   /*!
    * \return The type key of the executor.
    */
-  const char* type_key() const final { return "GraphRuntime"; }
+  const char* type_key() const final { return "GraphExecutor"; }
   void Run();
 
   /*!
@@ -162,12 +162,12 @@ class TVM_DLL GraphRuntime : public ModuleNode {
   void LoadParams(const std::string& param_blob);
 
   /*!
-   * \brief Share parameters from pre-existing GraphRuntime instance.
-   * \param other A GraphRuntime instance, previously with |LoadParams| called with the
+   * \brief Share parameters from pre-existing GraphExecutor instance.
+   * \param other A GraphExecutor instance, previously with |LoadParams| called with the
    * identical input |param_blob|.
    * \param strm The input stream.
    */
-  void ShareParams(const GraphRuntime& other, dmlc::Stream* strm);
+  void ShareParams(const GraphExecutor& other, dmlc::Stream* strm);
 
   /*!
    * \brief Get total number of nodes.
@@ -430,4 +430,4 @@ std::vector<Device> GetAllDevice(const TVMArgs& args, int dev_start_arg);
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/graph/graph_runtime_factory.cc b/src/runtime/graph_executor/graph_executor_factory.cc
similarity index 65%
rename from src/runtime/graph/graph_runtime_factory.cc
rename to src/runtime/graph_executor/graph_executor_factory.cc
index a6f87bac8a6e..a6cef931421b 100644
--- a/src/runtime/graph/graph_runtime_factory.cc
+++ b/src/runtime/graph_executor/graph_executor_factory.cc
@@ -18,11 +18,11 @@
  */
 
 /*!
- * \file graph_runtime_factory.cc
- * \brief Graph runtime factory implementations
+ * \file graph_executor_factory.cc
+ * \brief Graph executor factory implementations
  */
 
-#include "./graph_runtime_factory.h"
+#include "./graph_executor_factory.h"
 
 #include <tvm/runtime/container.h>
 #include <tvm/runtime/device_api.h>
@@ -34,7 +34,7 @@
 namespace tvm {
 namespace runtime {
 
-GraphRuntimeFactory::GraphRuntimeFactory(
+GraphExecutorFactory::GraphExecutorFactory(
     const std::string& graph_json,
     const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
     const std::string& module_name) {
@@ -43,7 +43,7 @@ GraphRuntimeFactory::GraphRuntimeFactory(
   module_name_ = module_name;
 }
 
-PackedFunc GraphRuntimeFactory::GetFunction(
+PackedFunc GraphExecutorFactory::GetFunction(
     const std::string& name, const tvm::runtime::ObjectPtr<tvm::runtime::Object>& sptr_to_self) {
   if (name == module_name_) {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -51,7 +51,7 @@ PackedFunc GraphRuntimeFactory::GetFunction(
       for (int i = 0; i < args.num_args; ++i) {
         devices.emplace_back(args[i].operator Device());
       }
-      *rv = this->RuntimeCreate(devices);
+      *rv = this->ExecutorCreate(devices);
     });
   } else if (name == "debug_create") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
@@ -62,13 +62,13 @@ PackedFunc GraphRuntimeFactory::GetFunction(
       for (int i = 1; i < args.num_args; ++i) {
         devices.emplace_back(args[i].operator Device());
       }
-      *rv = this->DebugRuntimeCreate(devices);
+      *rv = this->DebugExecutorCreate(devices);
     });
   } else if (name == "remove_params") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
       std::unordered_map<std::string, tvm::runtime::NDArray> empty_params{};
       auto exec =
-          make_object<GraphRuntimeFactory>(this->graph_json_, empty_params, this->module_name_);
+          make_object<GraphExecutorFactory>(this->graph_json_, empty_params, this->module_name_);
       exec->Import(this->imports_[0]);
       *rv = Module(exec);
     });
@@ -78,14 +78,14 @@ PackedFunc GraphRuntimeFactory::GetFunction(
       for (int i = 0; i < args.num_args; ++i) {
         devices.emplace_back(args[i].operator Device());
       }
-      *rv = this->CudaGraphRuntimeCreate(devices);
+      *rv = this->CudaGraphExecutorCreate(devices);
     });
   } else {
     return PackedFunc();
   }
 }
 
-void GraphRuntimeFactory::SaveToBinary(dmlc::Stream* stream) {
+void GraphExecutorFactory::SaveToBinary(dmlc::Stream* stream) {
   stream->Write(graph_json_);
   std::vector<std::string> names;
   std::vector<DLTensor*> arrays;
@@ -103,19 +103,19 @@ void GraphRuntimeFactory::SaveToBinary(dmlc::Stream* stream) {
   stream->Write(module_name_);
 }
 
-Module GraphRuntimeFactory::RuntimeCreate(const std::vector<Device>& devs) {
-  auto exec = make_object<GraphRuntime>();
+Module GraphExecutorFactory::ExecutorCreate(const std::vector<Device>& devs) {
+  auto exec = make_object<GraphExecutor>();
   exec->Init(this->graph_json_, this->imports_[0], devs, PackedFunc());
   // set params
   SetParams(exec.get(), this->params_);
   return Module(exec);
 }
 
-Module GraphRuntimeFactory::DebugRuntimeCreate(const std::vector<Device>& devs) {
-  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_runtime_debug.create");
-  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_runtime_debug.create in registry. "
-                           "Do you enable debug graph runtime build?";
-  // Debug runtime create packed function will call GetAllContexs, so we unpack the devs.
+Module GraphExecutorFactory::DebugExecutorCreate(const std::vector<Device>& devs) {
+  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_debug.create");
+  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_debug.create in registry. "
+                           "Do you enable debug graph executor build?";
+  // Debug executor create packed function will call GetAllContexs, so we unpack the devs.
   std::vector<int> unpacked_devs;
   for (const auto& dev : devs) {
     unpacked_devs.emplace_back(dev.device_type);
@@ -133,15 +133,15 @@ Module GraphRuntimeFactory::DebugRuntimeCreate(const std::vector<Device>& devs)
   TVMRetValue rv;
   pf->CallPacked(TVMArgs(values.data(), codes.data(), args_size), &rv);
   Module mod = rv.operator Module();
-  // debug graph runtime is one child class of graph runtime.
-  SetParams(const_cast<GraphRuntime*>(mod.as<GraphRuntime>()), this->params_);
+  // debug graph executor is one child class of graph executor.
+  SetParams(const_cast<GraphExecutor*>(mod.as<GraphExecutor>()), this->params_);
   return mod;
 }
 
-Module GraphRuntimeFactory::CudaGraphRuntimeCreate(const std::vector<Device>& devs) {
-  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_runtime_cuda_graph.create");
-  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_runtime_cuda_graph.create in registry. "
-                           "Did you set(USE_GRAPH_RUNTIME_CUGRAPH=ON)?";
+Module GraphExecutorFactory::CudaGraphExecutorCreate(const std::vector<Device>& devs) {
+  const PackedFunc* pf = tvm::runtime::Registry::Get("tvm.graph_executor_cuda_graph.create");
+  ICHECK(pf != nullptr) << "Cannot find function tvm.graph_executor_cuda_graph.create in registry. "
+                           "Did you set(USE_GRAPH_EXECUTOR_CUGRAPH=ON)?";
   std::vector<int> unpacked_devs;
   for (const auto& dev : devs) {
     unpacked_devs.emplace_back(dev.device_type);
@@ -159,11 +159,11 @@ Module GraphRuntimeFactory::CudaGraphRuntimeCreate(const std::vector<Device>& de
   TVMRetValue rv;
   pf->CallPacked(TVMArgs(values.data(), codes.data(), args_size), &rv);
   Module mod = rv.operator Module();
-  SetParams(const_cast<GraphRuntime*>(mod.as<GraphRuntime>()), this->params_);
+  SetParams(const_cast<GraphExecutor*>(mod.as<GraphExecutor>()), this->params_);
   return mod;
 }
 
-Module GraphRuntimeFactoryModuleLoadBinary(void* strm) {
+Module GraphExecutorFactoryModuleLoadBinary(void* strm) {
   dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
   std::string graph_json;
   std::unordered_map<std::string, tvm::runtime::NDArray> params;
@@ -180,30 +180,31 @@ Module GraphRuntimeFactoryModuleLoadBinary(void* strm) {
     params[names[i]] = temp;
   }
   ICHECK(stream->Read(&module_name));
-  auto exec = make_object<GraphRuntimeFactory>(graph_json, params, module_name);
+  auto exec = make_object<GraphExecutorFactory>(graph_json, params, module_name);
   return Module(exec);
 }
 
-TVM_REGISTER_GLOBAL("tvm.graph_runtime_factory.create").set_body([](TVMArgs args, TVMRetValue* rv) {
-  ICHECK_GE(args.num_args, 3) << "The expected number of arguments for "
-                                 "graph_runtime_factory.create needs at least 3, "
-                                 "but it has "
-                              << args.num_args;
-  // The argument order is graph_json, module, module_name, param0_name, param0_tensor,
-  // [param1_name, param1_tensor], ...
-  ICHECK_EQ((args.size() - 3) % 2, 0);
-  std::unordered_map<std::string, tvm::runtime::NDArray> params;
-  for (size_t i = 3; i < static_cast<size_t>(args.size()); i += 2) {
-    std::string name = args[i].operator String();
-    params[name] = args[i + 1].operator tvm::runtime::NDArray();
-  }
-  auto exec = make_object<GraphRuntimeFactory>(args[0], params, args[2]);
-  exec->Import(args[1]);
-  *rv = Module(exec);
-});
+TVM_REGISTER_GLOBAL("tvm.graph_executor_factory.create")
+    .set_body([](TVMArgs args, TVMRetValue* rv) {
+      ICHECK_GE(args.num_args, 3) << "The expected number of arguments for "
+                                     "graph_executor_factory.create needs at least 3, "
+                                     "but it has "
+                                  << args.num_args;
+      // The argument order is graph_json, module, module_name, param0_name, param0_tensor,
+      // [param1_name, param1_tensor], ...
+      ICHECK_EQ((args.size() - 3) % 2, 0);
+      std::unordered_map<std::string, tvm::runtime::NDArray> params;
+      for (size_t i = 3; i < static_cast<size_t>(args.size()); i += 2) {
+        std::string name = args[i].operator String();
+        params[name] = args[i + 1].operator tvm::runtime::NDArray();
+      }
+      auto exec = make_object<GraphExecutorFactory>(args[0], params, args[2]);
+      exec->Import(args[1]);
+      *rv = Module(exec);
+    });
 
-TVM_REGISTER_GLOBAL("runtime.module.loadbinary_GraphRuntimeFactory")
-    .set_body_typed(GraphRuntimeFactoryModuleLoadBinary);
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_GraphExecutorFactory")
+    .set_body_typed(GraphExecutorFactoryModuleLoadBinary);
 
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/graph/graph_runtime_factory.h b/src/runtime/graph_executor/graph_executor_factory.h
similarity index 68%
rename from src/runtime/graph/graph_runtime_factory.h
rename to src/runtime/graph_executor/graph_executor_factory.h
index 86958218a0f7..46346cbea002 100644
--- a/src/runtime/graph/graph_runtime_factory.h
+++ b/src/runtime/graph_executor/graph_executor_factory.h
@@ -18,12 +18,12 @@
  */
 
 /*!
- * \file tvm/runtime/graph_runtime_factory.h
- * \brief Graph runtime factory creating graph runtime.
+ * \file tvm/runtime/graph_executor/graph_executor_factory.h
+ * \brief Graph executor factory creating graph executor.
  */
 
-#ifndef TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_FACTORY_H_
-#define TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_FACTORY_H_
+#ifndef TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
+#define TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
 
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/module.h>
@@ -37,22 +37,22 @@
 #include <unordered_map>
 #include <vector>
 
-#include "./graph_runtime.h"
+#include "./graph_executor.h"
 
 namespace tvm {
 namespace runtime {
 
-class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
+class TVM_DLL GraphExecutorFactory : public runtime::ModuleNode {
  public:
   /*!
-   * \brief Construct the GraphRuntimeFactory.
+   * \brief Construct the GraphExecutorFactory.
    * \param graph_json The execution graph.
    * \param params The params of graph.
    * \param module_name The module name of graph.
    */
-  GraphRuntimeFactory(const std::string& graph_json,
-                      const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
-                      const std::string& module_name = "default");
+  GraphExecutorFactory(const std::string& graph_json,
+                       const std::unordered_map<std::string, tvm::runtime::NDArray>& params,
+                       const std::string& module_name = "default");
 
   /*!
    * \brief Get member function to front-end
@@ -65,7 +65,7 @@ class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
   /*!
    * \return The type key of the executor.
    */
-  const char* type_key() const override { return "GraphRuntimeFactory"; }
+  const char* type_key() const override { return "GraphExecutorFactory"; }
 
   /*!
    * \brief Save the module to binary stream.
@@ -74,35 +74,35 @@ class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
   void SaveToBinary(dmlc::Stream* stream) override;
 
   /*!
-   * \brief Create a specific runtime module
+   * \brief Create a specific executor module
    * \param devs The device of the host and devices where graph nodes will be
    *  executed on.
-   * \return created runtime module
+   * \return created executor module
    */
-  Module RuntimeCreate(const std::vector<Device>& devs);
+  Module ExecutorCreate(const std::vector<Device>& devs);
 
   /*!
-   * \brief Create a specific debug runtime module
+   * \brief Create a specific debug executor module
    * \param devs The device of the host and devices where graph nodes will be
    *  executed on.
-   * \return created debug runtime module
+   * \return created debug executor module
    */
-  Module DebugRuntimeCreate(const std::vector<Device>& devs);
+  Module DebugExecutorCreate(const std::vector<Device>& devs);
 
   /*!
-   * \brief Create a specific cuda graph runtime module
+   * \brief Create a specific cuda graph executor module
    * \param devs The device of the host and devices where graph nodes will be
    *  executed on.
-   * \return created cuda graph runtime module
+   * \return created cuda graph executor module
    */
-  Module CudaGraphRuntimeCreate(const std::vector<Device>& devs);
+  Module CudaGraphExecutorCreate(const std::vector<Device>& devs);
 
   /*!
    * \brief Set params.
-   * \param graph_runtime The graph runtime we want to set the params into.
+   * \param graph_executor The graph executor we want to set the params into.
    * \param params The graph params value we want to set.
    */
-  void SetParams(GraphRuntime* graph_runtime,
+  void SetParams(GraphExecutor* graph_executor,
                  const std::unordered_map<std::string, tvm::runtime::NDArray>& params) const {
     std::unordered_map<std::string, tvm::runtime::NDArray> value = params;
     // upload big arrays first to avoid memory issue in rpc mode
@@ -117,9 +117,9 @@ class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
                 return lhs_size > rhs_size;
               });
     for (const auto& key : keys) {
-      int in_idx = graph_runtime->GetInputIndex(key);
+      int in_idx = graph_executor->GetInputIndex(key);
       if (in_idx >= 0) {
-        graph_runtime->SetInput(in_idx, const_cast<DLTensor*>(value[key].operator->()));
+        graph_executor->SetInput(in_idx, const_cast<DLTensor*>(value[key].operator->()));
       }
     }
   }
@@ -136,4 +136,4 @@ class TVM_DLL GraphRuntimeFactory : public runtime::ModuleNode {
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_GRAPH_GRAPH_RUNTIME_FACTORY_H_
+#endif  // TVM_RUNTIME_GRAPH_EXECUTOR_GRAPH_EXECUTOR_FACTORY_H_
diff --git a/src/runtime/metadata_module.cc b/src/runtime/metadata_module.cc
index 665c72cc5e0d..4a1d89ce1a1f 100644
--- a/src/runtime/metadata_module.cc
+++ b/src/runtime/metadata_module.cc
@@ -21,7 +21,7 @@
  * \file src/runtime/metadata_module.cc
  * \brief A wrapper for initializing imported modules using metadata. This
  * module is intended to be used by various runtime in the TVM stack, i.e.
- * graph runtime, relay VM, AOT runtime, and various user defined runtimes. It
+ * graph executor, relay VM, AOT runtime, and various user defined runtimes. It
  * paves the way to separate the code and metedata, which makes compilation
  * and/or interpretation more convenient. In addition, the clear separation of
  * code and metadata significantly reduces the efforts for handling external
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.cc b/src/runtime/micro/standalone/utvm_graph_executor.cc
similarity index 96%
rename from src/runtime/micro/standalone/utvm_graph_runtime.cc
rename to src/runtime/micro/standalone/utvm_graph_executor.cc
index 897a4f4b5c0c..920faa134cf5 100644
--- a/src/runtime/micro/standalone/utvm_graph_runtime.cc
+++ b/src/runtime/micro/standalone/utvm_graph_executor.cc
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#include "utvm_graph_runtime.h"
+#include "utvm_graph_executor.h"
 
 #include <dlfcn.h>
 
@@ -226,7 +226,7 @@ void* DSOModule::GetSymbol(const char* name) const {
   return f;
 }
 
-MicroGraphRuntime::MicroGraphRuntime(const std::string& graph_json, DSOModule* module) {
+MicroGraphExecutor::MicroGraphExecutor(const std::string& graph_json, DSOModule* module) {
   assert(module);
   module_ = module;
   picojson::value v;
@@ -240,28 +240,28 @@ MicroGraphRuntime::MicroGraphRuntime(const std::string& graph_json, DSOModule* m
   SetupOpExecs();
 }
 
-MicroGraphRuntime::~MicroGraphRuntime() {}
+MicroGraphExecutor::~MicroGraphExecutor() {}
 
-void MicroGraphRuntime::Run() {
+void MicroGraphExecutor::Run() {
   for (size_t i = 0; i < op_execs_.size(); ++i) {
     if (op_execs_[i]) op_execs_[i]();
   }
 }
 
-void MicroGraphRuntime::SetInput(int index, DLTensor* data_in) {
+void MicroGraphExecutor::SetInput(int index, DLTensor* data_in) {
   assert(static_cast<size_t>(index) < input_nodes_.size());
   uint32_t eid = this->entry_id(input_nodes_[index], 0);
   data_entry_[eid].CopyFrom(data_in);
 }
 
-void MicroGraphRuntime::CopyOutputTo(int index, DLTensor* data_out) {
+void MicroGraphExecutor::CopyOutputTo(int index, DLTensor* data_out) {
   assert(static_cast<size_t>(index) < outputs_.size());
   uint32_t eid = this->entry_id(outputs_[index]);
   const NDArray& data = data_entry_[eid];
   data.CopyTo(data_out);
 }
 
-void MicroGraphRuntime::SetupStorage() {
+void MicroGraphExecutor::SetupStorage() {
   // Grab saved optimization plan from graph.
   DynArray<DLDataType> vtype(attrs_.dltype.size());
   for (size_t i = 0; i < attrs_.dltype.size(); ++i) {
@@ -373,7 +373,7 @@ std::function<void()> CreateTVMOp(const DSOModule& module, const TVMOpParam& par
   return fexec;
 }
 
-void MicroGraphRuntime::SetupOpExecs() {
+void MicroGraphExecutor::SetupOpExecs() {
   op_execs_.resize(nodes_.size());
   // setup the array and requirements.
   for (uint32_t nid = 0; nid < nodes_.size(); ++nid) {
diff --git a/src/runtime/micro/standalone/utvm_graph_runtime.h b/src/runtime/micro/standalone/utvm_graph_executor.h
similarity index 90%
rename from src/runtime/micro/standalone/utvm_graph_runtime.h
rename to src/runtime/micro/standalone/utvm_graph_executor.h
index 3a2519c98fd4..afede6a7b30a 100644
--- a/src/runtime/micro/standalone/utvm_graph_runtime.h
+++ b/src/runtime/micro/standalone/utvm_graph_executor.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
-#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
+#ifndef TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_EXECUTOR_H_
+#define TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_EXECUTOR_H_
 
 #include <dlpack/dlpack.h>
 
@@ -116,12 +116,12 @@ class NDArray {
   DLDevice device_;
 };
 
-// Minimal GraphRuntime implementation
-class MicroGraphRuntime {
+// Minimal GraphExecutor implementation
+class MicroGraphExecutor {
  public:
-  // Construct a GraphRuntime with the given graph and DSOModule.
-  MicroGraphRuntime(const std::string& graph_json, DSOModule* module);
-  ~MicroGraphRuntime();
+  // Construct a GraphExecutor with the given graph and DSOModule.
+  MicroGraphExecutor(const std::string& graph_json, DSOModule* module);
+  ~MicroGraphExecutor();
   // Run the graph
   void Run();
   // Set the input at `index` to a copy of the tensor `data_in`
@@ -164,4 +164,4 @@ class MicroGraphRuntime {
 }  // namespace micro
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_RUNTIME_H_
+#endif  // TVM_RUNTIME_MICRO_STANDALONE_UTVM_GRAPH_EXECUTOR_H_
diff --git a/src/runtime/micro/standalone/utvm_runtime.cc b/src/runtime/micro/standalone/utvm_runtime.cc
index 73d616b6d482..585da9300128 100644
--- a/src/runtime/micro/standalone/utvm_runtime.cc
+++ b/src/runtime/micro/standalone/utvm_runtime.cc
@@ -20,28 +20,28 @@
 
 #include <cassert>
 
-#include "utvm_graph_runtime.h"
+#include "utvm_graph_executor.h"
 
 void* UTVMRuntimeCreate(const char* json, size_t json_len, void* module) {
-  return new tvm::micro::MicroGraphRuntime(std::string(json, json + json_len),
-                                           reinterpret_cast<tvm::micro::DSOModule*>(module));
+  return new tvm::micro::MicroGraphExecutor(std::string(json, json + json_len),
+                                            reinterpret_cast<tvm::micro::DSOModule*>(module));
 }
 
 void UTVMRuntimeDestroy(void* handle) {
-  delete reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle);
+  delete reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle);
 }
 
 void UTVMRuntimeSetInput(void* handle, int index, void* tensor) {
-  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->SetInput(
+  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->SetInput(
       index, reinterpret_cast<DLTensor*>(tensor));
 }
 
 void UTVMRuntimeRun(void* handle) {
-  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->Run();
+  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->Run();
 }
 
 void UTVMRuntimeGetOutput(void* handle, int index, void* tensor) {
-  reinterpret_cast<tvm::micro::MicroGraphRuntime*>(handle)->CopyOutputTo(
+  reinterpret_cast<tvm::micro::MicroGraphExecutor*>(handle)->CopyOutputTo(
       index, reinterpret_cast<DLTensor*>(tensor));
 }
 void* UTVMRuntimeDSOModuleCreate(const char* so, size_t so_len) {
diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc
index d6c8f1799596..ea3a22e8ab01 100644
--- a/src/support/libinfo.cc
+++ b/src/support/libinfo.cc
@@ -76,12 +76,12 @@
 #define TVM_INFO_USE_STACKVM_RUNTIME "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_GRAPH_RUNTIME
-#define TVM_INFO_USE_GRAPH_RUNTIME "NOT-FOUND"
+#ifndef TVM_INFO_USE_GRAPH_EXECUTOR
+#define TVM_INFO_USE_GRAPH_EXECUTOR "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_GRAPH_RUNTIME_DEBUG
-#define TVM_INFO_USE_GRAPH_RUNTIME_DEBUG "NOT-FOUND"
+#ifndef TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG
+#define TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG "NOT-FOUND"
 #endif
 
 #ifndef TVM_INFO_USE_OPENMP
@@ -200,8 +200,8 @@
 #define TVM_INFO_USE_ARM_COMPUTE_LIB "NOT-FOUND"
 #endif
 
-#ifndef TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME
-#define TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "NOT-FOUND"
+#ifndef TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR
+#define TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR "NOT-FOUND"
 #endif
 
 #ifndef TVM_INFO_INDEX_DEFAULT_I64
@@ -234,8 +234,8 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_LLVM", TVM_INFO_USE_LLVM},
       {"LLVM_VERSION", TVM_INFO_LLVM_VERSION},
       {"USE_STACKVM_RUNTIME", TVM_INFO_USE_STACKVM_RUNTIME},
-      {"USE_GRAPH_RUNTIME", TVM_INFO_USE_GRAPH_RUNTIME},
-      {"USE_GRAPH_RUNTIME_DEBUG", TVM_INFO_USE_GRAPH_RUNTIME_DEBUG},
+      {"USE_GRAPH_EXECUTOR", TVM_INFO_USE_GRAPH_EXECUTOR},
+      {"USE_GRAPH_EXECUTOR_DEBUG", TVM_INFO_USE_GRAPH_EXECUTOR_DEBUG},
       {"USE_OPENMP", TVM_INFO_USE_OPENMP},
       {"USE_RELAY_DEBUG", TVM_INFO_USE_RELAY_DEBUG},
       {"USE_RTTI", TVM_INFO_USE_RTTI},
@@ -265,7 +265,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
       {"USE_COREML", TVM_INFO_USE_COREML},
       {"USE_TARGET_ONNX", TVM_INFO_USE_TARGET_ONNX},
       {"USE_ARM_COMPUTE_LIB", TVM_INFO_USE_ARM_COMPUTE_LIB},
-      {"USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME", TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME},
+      {"USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR", TVM_INFO_USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR},
       {"INDEX_DEFAULT_I64", TVM_INFO_INDEX_DEFAULT_I64},
       {"TVM_CXX_COMPILER_PATH", TVM_CXX_COMPILER_PATH}};
   return result;
diff --git a/src/target/metadata_module.cc b/src/target/metadata_module.cc
index 0b30d42c876c..8184e9189c4b 100644
--- a/src/target/metadata_module.cc
+++ b/src/target/metadata_module.cc
@@ -35,7 +35,7 @@ namespace codegen {
 
 /*!
  * \brief Create a metadata module wrapper. The helper is used by different
- *        codegens, such as graph runtime codegen and the vm compiler.
+ *        codegens, such as graph executor codegen and the vm compiler.
  *
  * \param params The metadata for initialization of all modules.
  * \param target_module the internal module that is compiled by tvm.
diff --git a/tests/azure-pipelines/main.yml b/tests/azure-pipelines/main.yml
index 094c1df12739..49d488aba5fd 100644
--- a/tests/azure-pipelines/main.yml
+++ b/tests/azure-pipelines/main.yml
@@ -35,7 +35,7 @@ jobs:
         cmakeArgs: >
           -DUSE_SORT=ON
           -DUSE_RPC=ON
-          -DUSE_GRAPH_RUNTIME=ON
+          -DUSE_GRAPH_EXECUTOR=ON
           ..
     - task: MSBuild@1
       inputs:
@@ -56,7 +56,7 @@ jobs:
         cmakeArgs: >
           -DUSE_SORT=ON
           -DUSE_RPC=ON
-          -DUSE_GRAPH_RUNTIME=ON
+          -DUSE_GRAPH_EXECUTOR=ON
           ..
     - task: MSBuild@1
       inputs:
@@ -75,7 +75,7 @@ jobs:
         cmakeArgs: >
           -DUSE_SORT=ON
           -DUSE_RPC=ON
-          -DUSE_GRAPH_RUNTIME=ON
+          -DUSE_GRAPH_EXECUTOR=ON
           ..
     - script: cd build.common && make -j`sysctl -n hw.ncpu`
       displayName: Build the project
diff --git a/tests/cpp/build_module_test.cc b/tests/cpp/build_module_test.cc
index ed50e3c86e85..e9373936e0d4 100644
--- a/tests/cpp/build_module_test.cc
+++ b/tests/cpp/build_module_test.cc
@@ -163,16 +163,16 @@ TEST(BuildModule, Heterogeneous) {
     pc[i] = i - 1.0;
   }
 
-  // Initialize graph runtime.
+  // Initialize graph executor.
   int cpu_dev_ty = static_cast<int>(kDLCPU);
   int cpu_dev_id = 0;
   int gpu_dev_ty = static_cast<int>(kDLGPU);
   int gpu_dev_id = 0;
 
-  const runtime::PackedFunc* graph_runtime =
-      tvm::runtime::Registry::Get("tvm.graph_runtime.create");
+  const runtime::PackedFunc* graph_executor =
+      tvm::runtime::Registry::Get("tvm.graph_executor.create");
   runtime::Module mod =
-      (*graph_runtime)(json, module, cpu_dev_ty, cpu_dev_id, gpu_dev_ty, gpu_dev_id);
+      (*graph_executor)(json, module, cpu_dev_ty, cpu_dev_id, gpu_dev_ty, gpu_dev_id);
 
   // test FFI for module.
   auto test_ffi = PackedFunc([](TVMArgs args, TVMRetValue* rv) {
diff --git a/tests/cpp/relay_build_module_test.cc b/tests/cpp/relay_build_module_test.cc
index 74e5289c5093..b7b5abfd697d 100644
--- a/tests/cpp/relay_build_module_test.cc
+++ b/tests/cpp/relay_build_module_test.cc
@@ -124,7 +124,7 @@ TEST(Relay, BuildModule) {
   tvm::runtime::Module mod = mod_f();
   // run
   auto dev = A->device;
-  auto pfr = tvm::runtime::Registry::Get("tvm.graph_runtime.create");
+  auto pfr = tvm::runtime::Registry::Get("tvm.graph_executor.create");
   ICHECK(mod.defined()) << "Module must be defined";
   tvm::runtime::Module run_mod = (*pfr)(json, mod, (int)dev.device_type, (int)dev.device_id);
   auto set_input_f = run_mod.GetFunction("set_input_zero_copy", false);
diff --git a/tests/micro/test_runtime_micro_on_arm.py b/tests/micro/test_runtime_micro_on_arm.py
index 45ca8e74323c..7d19d9510062 100644
--- a/tests/micro/test_runtime_micro_on_arm.py
+++ b/tests/micro/test_runtime_micro_on_arm.py
@@ -19,7 +19,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm import relay
 import tvm.micro as micro
 from tvm.micro import create_micro_mod
@@ -36,7 +36,7 @@
 
 
 def relay_micro_build(func, dev_config, params=None):
-    """Create a graph runtime module with a micro device context from a Relay function.
+    """Create a graph executor module with a micro device context from a Relay function.
 
     Parameters
     ----------
@@ -52,7 +52,7 @@ def relay_micro_build(func, dev_config, params=None):
     Return
     ------
     mod : tvm.runtime.Module
-        graph runtime module for the target device
+        graph executor module for the target device
     """
     with tvm.transform.PassContext(
         disabled_pass={"FuseOps"}, config={"tir.disable_vectorize": True}
@@ -60,7 +60,7 @@ def relay_micro_build(func, dev_config, params=None):
         graph, c_mod, params = relay.build(func, target=TARGET, params=params)
     micro_mod = micro.create_micro_mod(c_mod, dev_config)
     ctx = tvm.micro_dev(0)
-    mod = graph_runtime.create(graph, micro_mod, ctx)
+    mod = graph_executor.create(graph, micro_mod, ctx)
     mod.set_input(**params)
     return mod
 
@@ -171,8 +171,8 @@ def test_workspace_add():
         tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 2.0)
 
 
-def test_graph_runtime():
-    """Test a program which uses the graph runtime."""
+def test_graph_executor():
+    """Test a program which uses the graph executor."""
     if not tvm.runtime.enabled("micro_dev"):
         return
     shape = (1024,)
@@ -347,9 +347,9 @@ def test_inactive_session_use():
     print()
     print("finished workspace add test")
     input("[press enter to continue]")
-    test_graph_runtime()
+    test_graph_executor()
     print()
-    print("finished graph runtime test")
+    print("finished graph executor test")
     input("[press enter to continue]")
     test_conv2d()
     print()
diff --git a/tests/micro/zephyr/test_zephyr.py b/tests/micro/zephyr/test_zephyr.py
index 003cd54bba90..b4731f16d99f 100644
--- a/tests/micro/zephyr/test_zephyr.py
+++ b/tests/micro/zephyr/test_zephyr.py
@@ -198,7 +198,7 @@ def test_relay(platform, west_cmd):
         graph, mod, params = tvm.relay.build(func, target=target)
 
     with _make_session(model, target, zephyr_board, west_cmd, mod) as session:
-        graph_mod = tvm.micro.create_local_graph_runtime(
+        graph_mod = tvm.micro.create_local_graph_executor(
             graph, session.get_system_lib(), session.device
         )
         graph_mod.set_input(**params)
@@ -239,7 +239,7 @@ def test_onnx(platform, west_cmd):
         graph = lowered.get_json()
 
     with _make_session(model, target, zephyr_board, west_cmd, lowered.lib) as session:
-        graph_mod = tvm.micro.create_local_graph_runtime(
+        graph_mod = tvm.micro.create_local_graph_executor(
             graph, session.get_system_lib(), session.device
         )
 
@@ -318,7 +318,7 @@ def check_result(relay_mod, model, zephyr_board, west_cmd, map_inputs, out_shape
         graph, mod, params = tvm.relay.build(relay_mod, target=target)
 
     with _make_session(model, target, zephyr_board, west_cmd, mod) as session:
-        rt_mod = tvm.micro.create_local_graph_runtime(
+        rt_mod = tvm.micro.create_local_graph_executor(
             graph, session.get_system_lib(), session.device
         )
         rt_mod.set_input(**params)
diff --git a/tests/python/contrib/test_arm_compute_lib/infrastructure.py b/tests/python/contrib/test_arm_compute_lib/infrastructure.py
index 9a9bf69958f5..35f345cea78a 100644
--- a/tests/python/contrib/test_arm_compute_lib/infrastructure.py
+++ b/tests/python/contrib/test_arm_compute_lib/infrastructure.py
@@ -24,7 +24,7 @@
 import tvm
 from tvm import relay
 from tvm import rpc
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.op.contrib import arm_compute_lib
 from tvm.contrib import utils
 from tvm.autotvm.measure import request_remote
@@ -214,7 +214,7 @@ def build_and_run(
         raise Exception(err_msg)
 
     lib = update_lib(lib, device.device, device.cross_compile)
-    gen_module = graph_runtime.GraphModule(lib["default"](device.device.cpu(0)))
+    gen_module = graph_executor.GraphModule(lib["default"](device.device.cpu(0)))
     gen_module.set_input(**inputs)
     out = []
     for _ in range(no_runs):
diff --git a/tests/python/contrib/test_bnns/infrastructure.py b/tests/python/contrib/test_bnns/infrastructure.py
index 0107de54a04f..d046ee9ad0dd 100644
--- a/tests/python/contrib/test_bnns/infrastructure.py
+++ b/tests/python/contrib/test_bnns/infrastructure.py
@@ -25,7 +25,7 @@
 import tvm
 from tvm import relay
 from tvm import rpc
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.op.contrib.bnns import partition_for_bnns
 from tvm.contrib import utils
 from tvm.autotvm.measure import request_remote
@@ -171,7 +171,7 @@ def build_and_run(
         raise Exception(err_msg)
 
     lib = update_lib(lib, device.device, device.cross_compile)
-    gen_module = graph_runtime.GraphModule(lib["default"](device.device.cpu(0)))
+    gen_module = graph_executor.GraphModule(lib["default"](device.device.cpu(0)))
     gen_module.set_input(**inputs)
     out = []
     for _ in range(no_runs):
diff --git a/tests/python/contrib/test_bnns/test_onnx_topologies.py b/tests/python/contrib/test_bnns/test_onnx_topologies.py
index 7c8dfa6b5e9a..25c4bc483333 100644
--- a/tests/python/contrib/test_bnns/test_onnx_topologies.py
+++ b/tests/python/contrib/test_bnns/test_onnx_topologies.py
@@ -21,7 +21,7 @@
 import tvm
 from tvm import relay
 from tvm.relay import transform
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 from tvm.contrib.download import download_testdata
 from tvm.relay.op.contrib.bnns import partition_for_bnns
 
@@ -118,7 +118,7 @@ def run(mod, target, simplify=True, with_bnns=False):
         dev = tvm.cpu(0)
         loaded_lib = tvm.runtime.load_module(path_dso)
 
-        module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        module = graph_executor.GraphModule(loaded_lib["default"](dev))
         module.run()
         return module.get_output(0).asnumpy()
 
diff --git a/tests/python/contrib/test_coreml_codegen.py b/tests/python/contrib/test_coreml_codegen.py
index f8baf9f3530d..b93c489fdac6 100644
--- a/tests/python/contrib/test_coreml_codegen.py
+++ b/tests/python/contrib/test_coreml_codegen.py
@@ -105,7 +105,7 @@ def test_compile_and_run():
 
     with relay.build_config(opt_level=3):
         lib = relay.build(_create_graph_annotated(), target=target)
-    m = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+    m = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
 
     shape = (10, 10)
     x_data = np.random.rand(*shape).astype("float32")
diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py
index 791ef4a2a105..59021cf86211 100644
--- a/tests/python/contrib/test_ethosn/infrastructure.py
+++ b/tests/python/contrib/test_ethosn/infrastructure.py
@@ -20,7 +20,7 @@
 from __future__ import absolute_import, print_function
 import tvm
 from tvm import relay
-from tvm.contrib import utils, graph_runtime, download
+from tvm.contrib import utils, graph_executor, download
 from hashlib import md5
 from itertools import zip_longest, combinations
 import numpy as np
@@ -211,7 +211,7 @@ def run(lib, inputs, outputs, npu=True):
     lib_path = temp.relpath(lib_name)
     lib.export_library(lib_path)
     lib = tvm.runtime.load_module(lib_path)
-    module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+    module = graph_executor.GraphModule(lib["default"](tvm.cpu()))
     module.set_input(**inputs)
     module.run()
     out = [module.get_output(i) for i in range(outputs)]
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 0e25ca24b2a6..2bef7be65938 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -24,7 +24,7 @@
 
 from tvm import relay, runtime
 from tvm.relay.op.contrib import tensorrt
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm.runtime.vm import VirtualMachine
 from tvm.relay import Any, GlobalVar, transform
 from tvm.relay.expr_functor import ExprVisitor
@@ -252,7 +252,7 @@ def test_tensorrt_not_compatible():
                 results = exec.evaluate()(x_data)
 
 
-def test_tensorrt_serialize_graph_runtime():
+def test_tensorrt_serialize_graph_executor():
     if skip_codegen_test():
         return
     import mxnet as mx
@@ -273,7 +273,7 @@ def compile_graph(mod, params):
         return graph, lib, params
 
     def run_graph(graph, lib, params):
-        mod_ = graph_runtime.create(graph, lib, device=tvm.gpu(0))
+        mod_ = graph_executor.create(graph, lib, device=tvm.gpu(0))
         mod_.load_params(params)
         mod_.run(data=i_data)
         res = mod_.get_output(0)
@@ -296,7 +296,7 @@ def load_graph():
         lib = tvm.runtime.load_module(tmpdir.relpath("compiled.so"))
         return graph, lib, params
 
-    # Test serialization with graph runtime
+    # Test serialization with graph executor
     graph, lib, graph_params = compile_graph(mod, params)
     save_graph(graph, lib, graph_params)
     loaded_graph, loaded_lib, loaded_params = load_graph()
diff --git a/tests/python/contrib/test_vitis_ai/infrastructure.py b/tests/python/contrib/test_vitis_ai/infrastructure.py
index acc538822c3c..501ee255c143 100644
--- a/tests/python/contrib/test_vitis_ai/infrastructure.py
+++ b/tests/python/contrib/test_vitis_ai/infrastructure.py
@@ -34,7 +34,7 @@
 from tvm.relay.op.contrib.vitis_ai import annotation
 from tvm.relay.build_module import bind_params_by_name
 from tvm.contrib.target import vitis_ai
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib import utils
 
 
@@ -154,7 +154,7 @@ def verify_result(
 
     lib = build_module(mod, target, params=params, dpu_target=dpu_target, tvm_ops=tvm_ops)
     lib = update_lib(lib)
-    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+    rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
 
     for name, data in map_inputs.items():
         rt_mod.set_input(name, data)
diff --git a/tests/python/frontend/caffe/test_forward.py b/tests/python/frontend/caffe/test_forward.py
index 2e8807564239..d0f87fcc21c7 100644
--- a/tests/python/frontend/caffe/test_forward.py
+++ b/tests/python/frontend/caffe/test_forward.py
@@ -36,7 +36,7 @@
 
 import tvm
 from tvm import relay
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 from tvm.contrib.download import download_testdata
 
 CURRENT_DIR = os.path.join(os.path.expanduser("~"), ".tvm_test_data", "caffe_test")
@@ -205,7 +205,7 @@ def _run_tvm(data, proto_file, blob_file):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target=target, target_host=target_host, params=params)
     dtype = "float32"
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     if isinstance(data, (tuple, list)):
         for idx, d in enumerate(data):
             m.set_input("data" + str(idx), tvm.nd.array(d.astype(dtype)))
diff --git a/tests/python/frontend/caffe2/test_forward.py b/tests/python/frontend/caffe2/test_forward.py
index 879613f6bd08..1081b087c468 100644
--- a/tests/python/frontend/caffe2/test_forward.py
+++ b/tests/python/frontend/caffe2/test_forward.py
@@ -17,7 +17,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import relay
 from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
 from caffe2.python import workspace, core
@@ -42,7 +42,7 @@ def get_tvm_output(model, input_data, target, device, output_shape, output_dtype
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target, params=params)
 
-    m = graph_runtime.GraphModule(lib["default"](device))
+    m = graph_executor.GraphModule(lib["default"](device))
 
     # set inputs
     m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
diff --git a/tests/python/frontend/coreml/test_forward.py b/tests/python/frontend/coreml/test_forward.py
index 2c1295b28381..c227c3955c5b 100644
--- a/tests/python/frontend/coreml/test_forward.py
+++ b/tests/python/frontend/coreml/test_forward.py
@@ -21,7 +21,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import topi
 import tvm.topi.testing
 from tvm import relay
@@ -37,7 +37,7 @@ def get_tvm_output(
 ):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(func, target, params=params)
-    m = graph_runtime.GraphModule(lib["default"](device))
+    m = graph_executor.GraphModule(lib["default"](device))
     # set inputs
     m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
     m.run()
@@ -88,9 +88,9 @@ def run_tvm_graph(
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target, params=params)
 
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
-    m = graph_runtime.GraphModule(lib["default"](device))
+    m = graph_executor.GraphModule(lib["default"](device))
     # set inputs
     if isinstance(input_data, list):
         for i, e in enumerate(input_name):
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
index 72457bf5cd46..3bb8e93d3d22 100644
--- a/tests/python/frontend/darknet/test_forward.py
+++ b/tests/python/frontend/darknet/test_forward.py
@@ -24,7 +24,7 @@
 import numpy as np
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.download import download_testdata
 
 download_testdata.__test__ = False
@@ -80,7 +80,7 @@ def _get_tvm_output(net, data, build_dtype="float32", states=None):
 
     # Execute on TVM
     dev = tvm.cpu(0)
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     # set inputs
     m.set_input("data", tvm.nd.array(data.astype(dtype)))
     if states:
diff --git a/tests/python/frontend/keras/test_forward.py b/tests/python/frontend/keras/test_forward.py
index bdd7d7b73d53..c7f734b891dd 100644
--- a/tests/python/frontend/keras/test_forward.py
+++ b/tests/python/frontend/keras/test_forward.py
@@ -18,7 +18,7 @@
 import tvm
 from tvm import te
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import keras
 import tvm.testing
 
@@ -89,7 +89,7 @@ def get_tvm_output(xs, target, dev, dtype="float32"):
         mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout)
         with tvm.transform.PassContext(opt_level=2):
             lib = relay.build(mod, target, params=params)
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         for name, x in zip(keras_model.input_names, xs):
             m.set_input(name, tvm.nd.array(x.astype(dtype)))
         m.run()
diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py
index 19b31d6fd3ed..c4e8e804b15a 100644
--- a/tests/python/frontend/mxnet/test_forward.py
+++ b/tests/python/frontend/mxnet/test_forward.py
@@ -19,7 +19,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import relay
 import mxnet as mx
 
@@ -78,7 +78,7 @@ def get_tvm_output(symbol, x, args, auxs, target, dev, dtype="float32"):
             )
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(mod, target, params=params)
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         # set inputs
         m.set_input("data", tvm.nd.array(x.astype(dtype)))
         m.run()
@@ -802,7 +802,7 @@ def verify(val, shape, dtype):
         mx_sym = mx.sym.full(shape, val, dtype=dtype)
         mod, _ = relay.frontend.from_mxnet(mx_sym, {})
         for target, dev in tvm.testing.enabled_targets():
-            # Skip testing graph runtime because this op will be optimized out
+            # Skip testing graph executor because this op will be optimized out
             # by constant folding.
             for kind in ["debug"]:
                 intrp = relay.create_executor(kind, mod=mod, device=dev, target=target)
@@ -994,7 +994,7 @@ def verify(
 
         mod, params = relay.frontend.from_mxnet(mx_sym, shape=shape_dict, arg_params=mx_params)
         for target, dev in tvm.testing.enabled_targets():
-            # only test graph runtime because debug runtime is too slow
+            # only test graph executor because debug runtime is too slow
             for kind in ["graph"]:
                 intrp = relay.create_executor(kind, mod=mod, device=dev, target=target)
                 op_res = intrp.evaluate()(**inputs, **params)
diff --git a/tests/python/frontend/mxnet/test_qnn_ops_utils.py b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
index fa5c5d9715cc..a200e06ed2d0 100644
--- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py
+++ b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
@@ -18,7 +18,7 @@
 import numpy as np
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.frontend.mxnet_qnn_op_utils import (
     dequantize_mxnet_min_max,
     quantize_mxnet_min_max,
@@ -41,7 +41,7 @@ def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
         mod = tvm.IRModule.from_expr(mod)
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             rt_mod.set_input(input_data=in_data)
             rt_mod.set_input(**params)
             rt_mod.run()
@@ -120,7 +120,7 @@ def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
         mod = tvm.IRModule.from_expr(mod)
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build(mod, "llvm", params=None)
-            rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             rt_mod.set_input(input_data=in_data)
             rt_mod.set_input(**params)
             rt_mod.run()
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 772d834bbe27..04b6c94a5f53 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -23,7 +23,7 @@
 import tvm.topi.testing
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import scipy
 import tvm.testing
 
@@ -78,7 +78,7 @@ def get_tvm_output(
     with tvm.transform.PassContext(opt_level=1):
         graph, lib, params = relay.build(mod, target, params=params)
 
-    m = graph_runtime.create(graph, lib, device)
+    m = graph_executor.create(graph, lib, device)
     # set inputs
     if isinstance(input_data, list):
         for i, e in enumerate(input_names):
diff --git a/tests/python/frontend/pytorch/qnn_test.py b/tests/python/frontend/pytorch/qnn_test.py
index 29c69abba542..5b0b65f7b128 100644
--- a/tests/python/frontend/pytorch/qnn_test.py
+++ b/tests/python/frontend/pytorch/qnn_test.py
@@ -49,7 +49,7 @@ def get_tvm_runtime(script_module, input_name, ishape):
         # also not to make CI too slow
         lib = relay.build(mod, target="llvm", params=params)
 
-    runtime = tvm.contrib.graph_runtime.GraphModule(lib["default"](tvm.cpu(0)))
+    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](tvm.cpu(0)))
     return runtime
 
 
diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 6416043068da..9ec52987c354 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -27,7 +27,7 @@
 from torch.nn import functional as F
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.nvcc import have_fp16
 import tvm.testing
 from packaging import version as package_version
@@ -208,7 +208,7 @@ def verify_model(model_name, input_data=[], custom_convert_map={}, rtol=1e-5, at
     with tvm.transform.PassContext(opt_level=3):
         for target, dev in tvm.testing.enabled_targets():
             relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params)
-            relay_model = graph_runtime.create(relay_graph, relay_lib, dev)
+            relay_model = graph_executor.create(relay_graph, relay_lib, dev)
             relay_model.set_input(**relay_params)
             for name, inp in compiled_input.items():
                 relay_model.set_input(name, inp)
@@ -3590,7 +3590,7 @@ def test_forward_pretrained_bert_base_uncased():
     # --------------
 
     dev = tvm.device(target, 0)
-    relay_model = graph_runtime.create(relay_graph, relay_lib, dev)
+    relay_model = graph_executor.create(relay_graph, relay_lib, dev)
     relay_model.set_input(**relay_params)
     relay_model.set_input(input_1, tokens_tensor)
     relay_model.set_input(input_2, segments_tensors)
diff --git a/tests/python/frontend/tensorflow/test_bn_dynamic.py b/tests/python/frontend/tensorflow/test_bn_dynamic.py
index 81a2db4704e6..4eb0d01ef102 100644
--- a/tests/python/frontend/tensorflow/test_bn_dynamic.py
+++ b/tests/python/frontend/tensorflow/test_bn_dynamic.py
@@ -66,9 +66,9 @@ def verify_fused_batch_norm(shape):
         mod, params = relay.frontend.from_tensorflow(constant_graph, outputs=["output"])
         with tvm.transform.PassContext(opt_level=3):
             graph, lib, params = relay.build(mod, target=device, params=params)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
-        m = graph_runtime.create(graph, lib, dev)
+        m = graph_executor.create(graph, lib, dev)
         m.set_input(**params)
         m.set_input("input", data)
         m.run()
diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py
index 4accd16a2567..53f424b922e2 100644
--- a/tests/python/frontend/tensorflow/test_forward.py
+++ b/tests/python/frontend/tensorflow/test_forward.py
@@ -110,7 +110,7 @@ def run_tvm_graph(
     target="llvm",
     out_names=None,
     opt_level=3,
-    mode="graph_runtime",
+    mode="graph_executor",
     cuda_layout="NCHW",
     layout=None,
     disabled_pass=None,
@@ -165,9 +165,9 @@ def run_tvm_graph(
     else:
         with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass):
             graph, lib, params = relay.build(mod, target, target_host, params)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
-        m = graph_runtime.create(graph, lib, dev)
+        m = graph_executor.create(graph, lib, dev)
         # set inputs
         for e, i in zip(input_node, input_data):
             if e != "":
@@ -207,7 +207,7 @@ def compare_tf_with_tvm(
     init_global_variables=False,
     no_gpu=False,
     opt_level=3,
-    mode="graph_runtime",
+    mode="graph_executor",
     cuda_layout="NCHW",
     add_shapes_to_graph_def=True,
     targets=None,
@@ -3856,10 +3856,10 @@ def _get_tvm_graph_module(graph_def):
         target = "llvm"
         with tvm.transform.PassContext(opt_level=0):
             graph, lib, params = relay.build(mod, target, params=params)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
         dev = tvm.cpu(0)
-        return params, graph_runtime.create(graph, lib, dev)
+        return params, graph_executor.create(graph, lib, dev)
 
     def _do_tvm_sample(model, data, in_states, params, num_samples):
         """Sampled from the model"""
@@ -4073,7 +4073,7 @@ def test_forward_floor():
 def test_forward_relu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_runtime", "vm"]:
+    for mode in ["graph_executor", "vm"]:
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.nn.relu(in1)
@@ -4083,7 +4083,7 @@ def test_forward_relu():
 def test_forward_leaky_relu():
     ishape = (1, 3, 10, 10)
     inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    for mode in ["graph_runtime", "vm"]:
+    for mode in ["graph_executor", "vm"]:
         with tf.Graph().as_default():
             in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
             tf.nn.leaky_relu(in1, alpha=0.4)
diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py
index 05e0f076e079..b02d246d1ae5 100644
--- a/tests/python/frontend/tflite/test_forward.py
+++ b/tests/python/frontend/tflite/test_forward.py
@@ -160,7 +160,7 @@ def run_tvm_graph(
     num_output=1,
     target="llvm",
     out_names=None,
-    mode="graph_runtime",
+    mode="graph_executor",
 ):
     """ Generic function to compile on relay and execute on tvm """
     # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
@@ -208,9 +208,9 @@ def run_tvm_graph(
             lib = relay.build(mod, target, params=params)
 
         dev = tvm.device(target, 0)
-        from tvm.contrib import graph_runtime
+        from tvm.contrib import graph_executor
 
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         # set inputs
         for i, e in enumerate(input_node):
             m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
@@ -264,7 +264,7 @@ def compare_tflite_with_tvm(
     out_names=None,
     quantized=False,
     input_range=None,
-    mode="graph_runtime",
+    mode="graph_executor",
     experimental_new_converter=False,
 ):
     """Generic function to generate and compare TFLite and TVM output"""
diff --git a/tests/python/nightly/quantization/test_quantization_accuracy.py b/tests/python/nightly/quantization/test_quantization_accuracy.py
index 4ecd5dce4649..57fa49e93a04 100644
--- a/tests/python/nightly/quantization/test_quantization_accuracy.py
+++ b/tests/python/nightly/quantization/test_quantization_accuracy.py
@@ -98,7 +98,7 @@ def eval_acc(
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(model, target)
     # create runtime module
-    m = tvm.contrib.graph_runtime.create(graph, lib, device)
+    m = tvm.contrib.graph_executor.create(graph, lib, device)
     m.set_input(**params)
 
     # setup evaluaiton metric
diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py
index c4695f971e97..44ce9be766d2 100644
--- a/tests/python/relay/benchmarking/benchmark_vm.py
+++ b/tests/python/relay/benchmarking/benchmark_vm.py
@@ -19,7 +19,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm import relay
 from tvm.runtime import container
 from tvm.runtime import vm as vm_rt
@@ -36,25 +36,25 @@ def benchmark_execution(
     dtype="float32",
     model="unknown",
 ):
-    def get_graph_runtime_output(
+    def get_graph_executor_output(
         mod, data, params, target, dev, dtype="float32", number=2, repeat=20
     ):
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build(mod, target, params=params)
 
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         # set inputs
         m.set_input("data", data)
         m.run()
         out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
 
         if measure:
-            print("Evaluate graph runtime inference cost of {} on " "{}".format(model, repr(dev)))
+            print("Evaluate graph executor inference cost of {} on " "{}".format(model, repr(dev)))
             ftimer = m.module.time_evaluator("run", dev, number=1, repeat=20)
             # Measure in millisecond.
             prof_res = np.array(ftimer().results) * 1000
             print(
-                "Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)"
+                "Mean graph executor inference time (std dev): %.2f ms (%.2f ms)"
                 % (np.mean(prof_res), np.std(prof_res))
             )
 
@@ -82,7 +82,7 @@ def get_vm_output(mod, data, params, target, dev, dtype="float32", number=2, rep
     data = np.random.uniform(size=data_shape).astype(dtype)
 
     for target, dev in testing.enabled_targets():
-        tvm_out = get_graph_runtime_output(
+        tvm_out = get_graph_executor_output(
             mod, tvm.nd.array(data.astype(dtype)), params, target, dev, dtype
         )
         vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params, target, dev, dtype)
diff --git a/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py b/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py
index 95f1177da024..8466fc1700b0 100644
--- a/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py
+++ b/tests/python/relay/test_auto_scheduler_layout_rewrite_networks.py
@@ -21,7 +21,7 @@
 
 import tvm
 from tvm import relay, auto_scheduler
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 
@@ -169,7 +169,7 @@ def tune_and_check(mod, data, weight):
 
         def get_output(data, lib):
             dev = tvm.cpu()
-            module = graph_runtime.GraphModule(lib["default"](dev))
+            module = graph_executor.GraphModule(lib["default"](dev))
             module.set_input("data", data)
             module.run()
 
diff --git a/tests/python/relay/test_auto_scheduler_tuning.py b/tests/python/relay/test_auto_scheduler_tuning.py
index af930e7f7f19..1250543a13ae 100644
--- a/tests/python/relay/test_auto_scheduler_tuning.py
+++ b/tests/python/relay/test_auto_scheduler_tuning.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from tvm import auto_scheduler, relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 from test_auto_scheduler_task_extraction import get_network
@@ -70,7 +70,7 @@ def tune_network(network, target):
         # Check the correctness
         def get_output(data, lib):
             dev = tvm.gpu()
-            module = graph_runtime.GraphModule(lib["default"](dev))
+            module = graph_executor.GraphModule(lib["default"](dev))
             module.set_input("data", data)
             module.run()
             return module.get_output(0).asnumpy()
diff --git a/tests/python/relay/test_backend_graph_runtime.py b/tests/python/relay/test_backend_graph_executor.py
similarity index 97%
rename from tests/python/relay/test_backend_graph_runtime.py
rename to tests/python/relay/test_backend_graph_executor.py
index 6f75c43579a7..b9553d79c3b6 100644
--- a/tests/python/relay/test_backend_graph_runtime.py
+++ b/tests/python/relay/test_backend_graph_executor.py
@@ -18,7 +18,7 @@
 
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.op import add
 import tvm.testing
 
@@ -102,7 +102,7 @@ def test_with_params():
     y_data = np.random.rand(1, 5).astype("float32")
     params = {"y": y_data}
     graph, lib, params = relay.build(tvm.IRModule.from_expr(func), "llvm", params=params)
-    mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
     mod.set_input(**params)
     mod.set_input(x=x_data)
     mod.run()
@@ -174,7 +174,7 @@ def unit_numpy(X, W):
     for target, dev in tvm.testing.enabled_targets():
         with tvm.transform.PassContext(opt_level=2):
             graph, lib, params = relay.build(tvm.IRModule.from_expr(z), target)
-            m = graph_runtime.create(graph, lib, dev)
+            m = graph_executor.create(graph, lib, dev)
             m.set_input("X", tvm.nd.array(x.astype(dtype)))
             m.set_input("y", tvm.nd.array(y.astype(dtype)))
             m.set_input(**params)
@@ -194,7 +194,7 @@ def test_compile_nested_tuples():
     func = relay.Function([x], out)
 
     graph, lib, _ = relay.build(tvm.IRModule.from_expr(func), "llvm")
-    mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
 
     x_data = np.random.uniform(size=(10,)).astype(np.float32)
     mod.set_input(x=x_data)
diff --git a/tests/python/relay/test_cpp_build_module.py b/tests/python/relay/test_cpp_build_module.py
index 6f13533f9318..7d2209a34835 100644
--- a/tests/python/relay/test_cpp_build_module.py
+++ b/tests/python/relay/test_cpp_build_module.py
@@ -48,7 +48,7 @@ def test_basic_build():
     assert mod["main"] == func_in_mod, "relay.build changed module in-place"
 
     # test
-    rt = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+    rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
     rt.set_input("a", A)
     rt.run()
     out = rt.get_output(0)
@@ -85,7 +85,7 @@ def test_fp16_build():
     g_json, mmod, params = relay.build(func, "cuda", params=params)
 
     # test
-    rt = tvm.contrib.graph_runtime.create(g_json, mmod, dev)
+    rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
     rt.load_params(runtime.save_param_dict(params))
     rt.run()
     out = rt.get_output(0)
@@ -114,7 +114,7 @@ def test_fp16_conversion(target, dev):
             g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)
 
         # test
-        rt = tvm.contrib.graph_runtime.create(g_json, mmod, dev)
+        rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
         rt.set_input("x", X)
         rt.run()
         out = rt.get_output(0)
diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py
index 9241f8e82745..9f6d88e47f0b 100644
--- a/tests/python/relay/test_external_codegen.py
+++ b/tests/python/relay/test_external_codegen.py
@@ -63,11 +63,11 @@ def check_vm_result():
         out = vm.run(**map_inputs)
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
             json, lib, _ = relay.build(mod, target=target)
         lib = update_lib(lib)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -78,7 +78,7 @@ def check_graph_runtime_result():
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def set_external_func_attr(func, compiler, ext_symbol):
@@ -336,7 +336,7 @@ def test_extern_dnnl_const():
 
 def test_load_params_with_constants_in_ext_codegen():
     # After binding params and partitioning graph_module.get_params()
-    # might contain parameters that are not an graph runtime input but
+    # might contain parameters that are not an graph executor input but
     # for example constants in external function.
     y_in = np.ones((1,)).astype("float32")
     params = {"y": y_in}
@@ -353,7 +353,7 @@ def test_load_params_with_constants_in_ext_codegen():
 
     graph_module = relay.build(mod, target="llvm", params=params)
     lib = update_lib(graph_module.get_lib())
-    rt_mod = tvm.contrib.graph_runtime.create(graph_module.get_json(), lib, tvm.cpu(0))
+    rt_mod = tvm.contrib.graph_executor.create(graph_module.get_json(), lib, tvm.cpu(0))
     rt_mod.load_params(runtime.save_param_dict(graph_module.get_params()))
 
 
diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py
index 79d1f710cf41..bf5676d096f1 100644
--- a/tests/python/relay/test_json_runtime.py
+++ b/tests/python/relay/test_json_runtime.py
@@ -50,7 +50,7 @@ def check_result(
     compile_engine.get().clear()
     with tvm.transform.PassContext(opt_level=3):
         json, lib, param = relay.build(ref_mod, target=target, params=params)
-    rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+    rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
     for name, data in map_inputs.items():
         rt_mod.set_input(name, data)
@@ -70,11 +70,11 @@ def check_vm_result():
         out = vm.run(**map_inputs)
         tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         compile_engine.get().clear()
         with relay.build_config(opt_level=3):
             json, lib, param = relay.build(mod, target=target, params=params)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -85,7 +85,7 @@ def check_graph_runtime_result():
         tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def test_conv2d():
diff --git a/tests/python/relay/test_op_fast_math.py b/tests/python/relay/test_op_fast_math.py
index a82efb7ad2ad..7bcbc6839c4f 100644
--- a/tests/python/relay/test_op_fast_math.py
+++ b/tests/python/relay/test_op_fast_math.py
@@ -22,7 +22,7 @@
 import tvm.relay as relay
 from tvm import topi
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 
 def test_fastmath():
@@ -43,7 +43,7 @@ def test_apply(relay_op, name, f_numpy, low, high, step, dtype="float32"):
         assert lib.get_function(func_name)
 
         dev = tvm.cpu(0)
-        m = graph_runtime.create(graph, lib, dev)
+        m = graph_executor.create(graph, lib, dev)
         # Set inputs
         m.set_input("x", tvm.nd.array(a_np, dev))
         m.set_input(**params)
diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py
index b9e7b075a998..c5843758c3d2 100644
--- a/tests/python/relay/test_op_level2.py
+++ b/tests/python/relay/test_op_level2.py
@@ -405,7 +405,7 @@ def run_test_conv2d_cuda(
                 dev = tvm.device(target, 0)
                 params = {"w": tvm.nd.array(kernel)}
                 graph, lib, params = relay.build_module.build(mod, target=target, params=params)
-                module = tvm.contrib.graph_runtime.create(graph, lib, dev)
+                module = tvm.contrib.graph_executor.create(graph, lib, dev)
                 module.set_input("x", tvm.nd.array(data))
                 module.set_input(**params)
                 module.run()
@@ -668,7 +668,7 @@ def run_test_conv3d_cuda(
                 dev = tvm.device(target, 0)
                 params = {"w": tvm.nd.array(kernel)}
                 graph, lib, params = relay.build_module.build(mod, target=target, params=params)
-                module = tvm.contrib.graph_runtime.create(graph, lib, dev)
+                module = tvm.contrib.graph_executor.create(graph, lib, dev)
                 module.set_input("x", tvm.nd.array(data))
                 module.set_input(**params)
                 module.run()
diff --git a/tests/python/relay/test_op_qnn_concatenate.py b/tests/python/relay/test_op_qnn_concatenate.py
index a9207f67fc68..453875301af9 100644
--- a/tests/python/relay/test_op_qnn_concatenate.py
+++ b/tests/python/relay/test_op_qnn_concatenate.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.topi.testing
 
 
diff --git a/tests/python/relay/test_op_qnn_conv2_transpose.py b/tests/python/relay/test_op_qnn_conv2_transpose.py
index 93776d265a49..e4e02279efd6 100644
--- a/tests/python/relay/test_op_qnn_conv2_transpose.py
+++ b/tests/python/relay/test_op_qnn_conv2_transpose.py
@@ -21,7 +21,7 @@
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
 
@@ -191,7 +191,7 @@ def get_output(func, golden_inputs):
             golden_data, golden_weight = golden_inputs
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py
index 87b1f732e2bd..928450312147 100644
--- a/tests/python/relay/test_op_qnn_conv2d.py
+++ b/tests/python/relay/test_op_qnn_conv2d.py
@@ -21,7 +21,7 @@
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
 # We use llvm target for testing functionality. `llvm` points to an older Intel
@@ -198,7 +198,7 @@ def get_output(func, golden_inputs):
             golden_data, golden_weight = golden_inputs
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
@@ -722,7 +722,7 @@ def test_tflite_large_irregular():
         with tvm.transform.PassContext(opt_level=2):
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(qnn_func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
@@ -767,7 +767,7 @@ def test_tflite_output_multiplier_greater_than_one():
         with tvm.transform.PassContext(opt_level=2):
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(qnn_func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
@@ -830,7 +830,7 @@ def test_tflite_anistropic_strides():
         with tvm.transform.PassContext(opt_level=2):
             params = {"kernel": golden_weight}
             graph, lib, params = relay.build(qnn_func, "llvm", params=params)
-            mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
             mod.set_input("data", golden_data)
             mod.set_input(**params)
             mod.run()
diff --git a/tests/python/relay/test_op_qnn_dense.py b/tests/python/relay/test_op_qnn_dense.py
index ef74ce3edcf6..c47ac6b35ec7 100644
--- a/tests/python/relay/test_op_qnn_dense.py
+++ b/tests/python/relay/test_op_qnn_dense.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
 
@@ -211,7 +211,7 @@ def qnn_dense_driver(test_configuration):
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     with tvm.transform.PassContext(opt_level=2):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-        mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         mod.set_input(quantized_data_name, test_configuration[quantized_data_name])
         mod.set_input(quantized_kernel_name, test_configuration[quantized_kernel_name])
         if test_configuration[bias_name] is not None:
diff --git a/tests/python/relay/test_op_qnn_dequantize.py b/tests/python/relay/test_op_qnn_dequantize.py
index 85f97a9c073c..ab398bbc1316 100644
--- a/tests/python/relay/test_op_qnn_dequantize.py
+++ b/tests/python/relay/test_op_qnn_dequantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing import run_infer_type
 
 
@@ -35,7 +35,7 @@ def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data, ax
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-        rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input(input_data=in_data)
         rt_mod.set_input(**params)
         rt_mod.run()
@@ -140,7 +140,7 @@ def test_dynamic_dequantize():
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             lib = relay.build(mod, target=target)
 
-    module = graph_runtime.GraphModule(lib["default"](dev))
+    module = graph_executor.GraphModule(lib["default"](dev))
     module.set_input(**{"x": data, "scale": scale, "zp": zp})
     module.run()
 
diff --git a/tests/python/relay/test_op_qnn_mul.py b/tests/python/relay/test_op_qnn_mul.py
index 10721b573234..8ff3ab5c3df2 100644
--- a/tests/python/relay/test_op_qnn_mul.py
+++ b/tests/python/relay/test_op_qnn_mul.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.topi.testing
 
 # "unquantize" a quantized tensor
diff --git a/tests/python/relay/test_op_qnn_quantize.py b/tests/python/relay/test_op_qnn_quantize.py
index e92344f7dcfa..2ae688ef4784 100644
--- a/tests/python/relay/test_op_qnn_quantize.py
+++ b/tests/python/relay/test_op_qnn_quantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.testing import run_infer_type
 
 
@@ -39,7 +39,7 @@ def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data, verify_
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-        rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input(input_data=in_data)
         rt_mod.set_input(**params)
         rt_mod.run()
@@ -155,7 +155,7 @@ def test_dynamic_quantize():
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             lib = relay.build(mod, target=target)
 
-    module = graph_runtime.GraphModule(lib["default"](dev))
+    module = graph_executor.GraphModule(lib["default"](dev))
     module.set_input(**{"x": data, "scale": scale, "zp": zp})
     module.run()
 
diff --git a/tests/python/relay/test_op_qnn_requantize.py b/tests/python/relay/test_op_qnn_requantize.py
index 5aa3f7f7fdc8..5e61fad7676d 100644
--- a/tests/python/relay/test_op_qnn_requantize.py
+++ b/tests/python/relay/test_op_qnn_requantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 roundings = ["UPWARD", "TONEAREST"]
 
@@ -28,7 +28,7 @@ def verify(mod, goldens):
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
         golden_data, golden_output = goldens
-        rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input("quantized_data", golden_data)
         rt_mod.set_input(**params)
         rt_mod.run()
diff --git a/tests/python/relay/test_op_qnn_simulated_dequantize.py b/tests/python/relay/test_op_qnn_simulated_dequantize.py
index 266e9d73b3cb..3aecd935b62b 100644
--- a/tests/python/relay/test_op_qnn_simulated_dequantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_dequantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.runtime.vm import VirtualMachine
 from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 
@@ -39,7 +39,7 @@ def dequantize_test_driver(in_dtype, quant_args, axis, in_data):
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-    rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
     rt_mod.set_input(input_data=in_data)
     rt_mod.set_input(**params)
     rt_mod.run()
diff --git a/tests/python/relay/test_op_qnn_simulated_quantize.py b/tests/python/relay/test_op_qnn_simulated_quantize.py
index 8a15a037d8ba..fd9d13168e01 100644
--- a/tests/python/relay/test_op_qnn_simulated_quantize.py
+++ b/tests/python/relay/test_op_qnn_simulated_quantize.py
@@ -19,7 +19,7 @@
 from tvm import te
 import numpy as np
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.runtime.vm import VirtualMachine
 from tvm.topi.nn.qnn import SQNN_DTYPE_TO_CODE
 
@@ -47,7 +47,7 @@ def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data):
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
-    rt_mod = graph_runtime.create(graph, lib, device=tvm.cpu(0))
+    rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
     rt_mod.set_input(input_data=in_data)
     rt_mod.set_input(**params)
     rt_mod.run()
diff --git a/tests/python/relay/test_param_dict.py b/tests/python/relay/test_param_dict.py
index 7bc4ab422a64..2272883fc39c 100644
--- a/tests/python/relay/test_param_dict.py
+++ b/tests/python/relay/test_param_dict.py
@@ -24,7 +24,7 @@
 from tvm.relay.op import add
 from tvm import relay
 from tvm import rpc
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 
 
 def test_save_load():
@@ -60,7 +60,7 @@ def test_bigendian_rpc_param():
     if host is None:
         return
 
-    def verify_graph_runtime(remote, target, shape, dtype):
+    def verify_graph_executor(remote, target, shape, dtype):
         x = relay.var("x")
         y = relay.const(1)
         z = relay.add(x, y)
@@ -76,7 +76,7 @@ def verify_graph_runtime(remote, target, shape, dtype):
         remote.upload(path_dso)
         lib = remote.load_module("dev_lib.o")
         dev = remote.cpu(0)
-        mod = graph_runtime.create(graph, lib, dev)
+        mod = graph_executor.create(graph, lib, dev)
         mod.load_params(runtime.save_param_dict(params))
         mod.run()
         out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, device=dev))
@@ -86,7 +86,7 @@ def verify_graph_runtime(remote, target, shape, dtype):
     remote = rpc.connect(host, port)
     target = "llvm -mtriple=powerpc-linux-gnu"
     for dtype in ["float32", "float64", "int32", "int8"]:
-        verify_graph_runtime(remote, target, (10,), dtype)
+        verify_graph_executor(remote, target, (10,), dtype)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relay/test_pass_annotate_target.py b/tests/python/relay/test_pass_annotate_target.py
index 52deb4e89ef9..c756d74ff0be 100644
--- a/tests/python/relay/test_pass_annotate_target.py
+++ b/tests/python/relay/test_pass_annotate_target.py
@@ -60,11 +60,11 @@ def check_vm_result():
         out = vm.run(**map_inputs)
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
             json, lib, param = relay.build(mod, target=target, params=params)
         lib = update_lib(lib)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -76,7 +76,7 @@ def check_graph_runtime_result():
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def test_extern_dnnl():
diff --git a/tests/python/relay/test_pass_annotation.py b/tests/python/relay/test_pass_annotation.py
index 70f1a0aa52e5..a9c31f5ccedd 100644
--- a/tests/python/relay/test_pass_annotation.py
+++ b/tests/python/relay/test_pass_annotation.py
@@ -20,7 +20,7 @@
 
 import tvm
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay.expr_functor import ExprMutator
 from tvm.relay import transform
 import tvm.testing
@@ -31,7 +31,7 @@ def _trace(module, metadata, _):
         pass  # import pdb; pdb.set_trace()
 
 
-def check_graph_runtime(
+def check_graph_executor(
     target, ref_res, device, func, params, config, opt_level, expected_index=None
 ):
     with tvm.transform.PassContext(opt_level=opt_level, config=config):
@@ -41,7 +41,7 @@ def check_graph_runtime(
         if "device_index" in graph_json["attrs"]:
             device_index = graph_json["attrs"]["device_index"][1]
             assert device_index == expected_index
-        mod = graph_runtime.create(graph, lib, contexts)
+        mod = graph_executor.create(graph, lib, contexts)
         mod.set_input(**new_params)
         mod.run()
         res = mod.get_output(0).asnumpy()
@@ -429,7 +429,7 @@ def expected():
         check_annotated_graph(annotated_func, expected_func)
         opt_level = 1
         config = {"relay.fallback_device_type": fallback_device.device_type}
-        check_graph_runtime(
+        check_graph_executor(
             target, ref_res, device, annotated_func, params, config, opt_level, expected_index
         )
         opt_level = 2
@@ -465,7 +465,7 @@ def annotated():
         check_annotated_graph(annotated_func, expected_func)
         opt_level = 1
         config = {"relay.fallback_device_type": fallback_device.device_type}
-        check_graph_runtime(target, ref_res, device, annotated_func, params, config, opt_level)
+        check_graph_executor(target, ref_res, device, annotated_func, params, config, opt_level)
         opt_level = 2
         check_vm_runtime(target, ref_res, device, annotated_func, params, config, opt_level)
 
@@ -506,7 +506,7 @@ def expected():
         opt_level = 1
         config = {"relay.fallback_device_type": fallback_device.device_type}
         check_annotated_graph(annotated_func, expected_func)
-        check_graph_runtime(
+        check_graph_executor(
             target, ref_res, device, annotated_func, params, config, opt_level, expected_index
         )
         opt_level = 2
@@ -520,7 +520,7 @@ def test_fallback_all_operators(device, tgt):
         expected_func = get_func()
         check_annotated_graph(annotated_func, expected_func)
         opt_level = 2
-        check_graph_runtime(target, ref_res, device, annotated_func, params, {}, opt_level)
+        check_graph_executor(target, ref_res, device, annotated_func, params, {}, opt_level)
         check_vm_runtime(target, ref_res, device, annotated_func, params, {}, opt_level)
 
     test_fuse_log_add(dev, tgt)
@@ -582,7 +582,7 @@ def expected():
     opt_level = 0
     config = {"relay.fallback_device_type": fallback_device.device_type}
 
-    check_graph_runtime(
+    check_graph_executor(
         target, ref_res, dev, annotated_func, params, config, opt_level, expected_index
     )
 
diff --git a/tests/python/relay/test_pass_legalize.py b/tests/python/relay/test_pass_legalize.py
index 0d14f6611db9..8a37da33a10f 100644
--- a/tests/python/relay/test_pass_legalize.py
+++ b/tests/python/relay/test_pass_legalize.py
@@ -20,7 +20,7 @@
 from tvm import te
 
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py
index 5ecda4ba07a8..f45e39047238 100644
--- a/tests/python/relay/test_pass_legalize_tensorcore.py
+++ b/tests/python/relay/test_pass_legalize_tensorcore.py
@@ -20,7 +20,7 @@
 from tvm import te
 from tvm import topi
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index a5bc3ff2cead..01a1e48f832a 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -208,12 +208,12 @@ def check_vm_result():
         for out, ref in zip(outs, results):
             tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=tol, atol=tol)
 
-    def check_graph_runtime_result():
+    def check_graph_executor_result():
         compile_engine.get().clear()
         with tvm.transform.PassContext(opt_level=3):
             json, lib, param = relay.build(mod, target=target, params=params)
         lib = update_lib(lib)
-        rt_mod = tvm.contrib.graph_runtime.create(json, lib, device)
+        rt_mod = tvm.contrib.graph_executor.create(json, lib, device)
 
         for name, data in map_inputs.items():
             rt_mod.set_input(name, data)
@@ -229,7 +229,7 @@ def check_graph_runtime_result():
             tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
 
     check_vm_result()
-    check_graph_runtime_result()
+    check_graph_executor_result()
 
 
 def test_multi_node_compiler():
diff --git a/tests/python/relay/test_pass_qnn_legalize.py b/tests/python/relay/test_pass_qnn_legalize.py
index 6a5c8f7cd647..a30cd1e73e3f 100644
--- a/tests/python/relay/test_pass_qnn_legalize.py
+++ b/tests/python/relay/test_pass_qnn_legalize.py
@@ -20,7 +20,7 @@
 from tvm import te
 
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import transform, analysis
 from tvm.relay.testing.temp_op_attr import TempOpAttr
 
diff --git a/tests/python/relay/test_simplify_fc_transpose.py b/tests/python/relay/test_simplify_fc_transpose.py
index ce93a68c7321..fa5f332e6cd5 100644
--- a/tests/python/relay/test_simplify_fc_transpose.py
+++ b/tests/python/relay/test_simplify_fc_transpose.py
@@ -31,11 +31,11 @@ def run_func(func, params, x):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(func, "llvm", params=params)
 
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
     dev = tvm.cpu(0)
     dtype = "float32"
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     # set inputs
     m.set_input("data", tvm.nd.array(x.astype(dtype)))
     # execute
diff --git a/tests/python/relay/test_sparse_dense_convert.py b/tests/python/relay/test_sparse_dense_convert.py
index 4eaaf769ff11..1efa813ebfb0 100644
--- a/tests/python/relay/test_sparse_dense_convert.py
+++ b/tests/python/relay/test_sparse_dense_convert.py
@@ -52,11 +52,11 @@ def run_func(func, params, x):
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, new_params = relay.build(func, "llvm", params=params)
 
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
     dev = tvm.cpu(0)
     dtype = "float32"
-    m = graph_runtime.create(graph, lib, dev)
+    m = graph_executor.create(graph, lib, dev)
     # set inputs
     m.set_input("data", tvm.nd.array(x.astype(dtype)))
     m.set_input(**new_params)
diff --git a/tests/python/topi/python/test_topi_qnn.py b/tests/python/topi/python/test_topi_qnn.py
index 1f49a68aa519..995cfd2df666 100644
--- a/tests/python/topi/python/test_topi_qnn.py
+++ b/tests/python/topi/python/test_topi_qnn.py
@@ -18,7 +18,7 @@
 import numpy as np
 import tvm
 from tvm import topi, relay, te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.topi.testing
 
 
@@ -59,7 +59,7 @@ def check_target(target, dev):
             lib = relay.build(tvm.IRModule.from_expr(real_q_op), target=target)
 
         # Get real qnn quantize output.
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         m.set_input("a", a_np)
 
         m.run()
@@ -126,7 +126,7 @@ def check_target(target, dev):
             lib = relay.build(tvm.IRModule.from_expr(real_dq_op), target=target)
 
         # Get real qnn quantize output.
-        m = graph_runtime.GraphModule(lib["default"](dev))
+        m = graph_executor.GraphModule(lib["default"](dev))
         m.set_input("a", a_np)
 
         m.run()
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index c9dfef4654ed..a0524353f3b3 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -138,8 +138,8 @@ def test_reset():
 
 
 @tvm.testing.requires_micro
-def test_graph_runtime():
-    """Test use of the graph runtime with microTVM."""
+def test_graph_executor():
+    """Test use of the graph executor with microTVM."""
     import tvm.micro
 
     workspace = tvm.micro.Workspace(debug=True)
@@ -156,7 +156,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), uint8]) {
         factory = tvm.relay.build(relay_mod, target=TARGET)
 
     with _make_session(workspace, factory.get_lib()) as sess:
-        graph_mod = tvm.micro.create_local_graph_runtime(
+        graph_mod = tvm.micro.create_local_graph_executor(
             factory.get_json(), sess.get_system_lib(), sess.device
         )
         A_data = tvm.nd.array(np.array([2, 3], dtype="uint8"), device=sess.device)
diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py
index 9fde832d08ce..3ad515604d0b 100644
--- a/tests/python/unittest/test_link_params.py
+++ b/tests/python/unittest/test_link_params.py
@@ -124,7 +124,7 @@ def _verify_linked_param(dtype, lib, mod, graph, name):
     """Directly read memory from the linked library to verify the linked parameter is correct."""
     sid = _lookup_sid(graph, name)
     # NOTE: query_imports=True because when loading a module from disk (i.e. for C backend),
-    # a GraphRuntimeFactory module is created instead of the module itself.
+    # a GraphExecutorFactory module is created instead of the module itself.
     param_ptr = mod.get_function("_lookup_linked_param", True)(sid)
     gen_param = lib.params[name]
     arr_data = (_get_ctypes_dtype(dtype) * np.prod(gen_param.shape)).from_address(param_ptr.value)
@@ -154,7 +154,7 @@ def _add_decl(name, dtype):
         f"def @main(%rand_input : Tensor[{INPUT_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } )  {{",
         # This program ensures that GraphPlanMemory alternates between the same two storage IDs for a
         # while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an
-        # index unequal to its storage_id. This ensures that GraphRuntimeCodegen encodes the storage_id
+        # index unequal to its storage_id. This ensures that GraphExecutorCodegen encodes the storage_id
         # and not the parameter index into the graph.
         (
             f'    %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", '
@@ -206,7 +206,7 @@ def test_llvm_link_params():
             # Wrap in function to explicitly deallocate the runtime.
             def _run_linked(lib, mod):
                 graph_json, _, _ = lib
-                graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0))
+                graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
                 graph_rt.set_input("rand_input", rand_input)  # NOTE: params not required.
                 graph_rt.run()
                 return graph_rt.get_output(0)
@@ -218,7 +218,7 @@ def _run_linked(lib, mod):
 
             def _run_unlinked(lib):
                 graph_json, mod, lowered_params = lib
-                graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0))
+                graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
                 graph_rt.set_input("rand_input", rand_input, **lowered_params)
                 graph_rt.run()
                 return graph_rt.get_output(0)
@@ -316,7 +316,7 @@ def test_c_link_params():
 
             # Wrap in function to explicitly deallocate the runtime.
             def _run_linked(lib_mod):
-                graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0)))
+                graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
                 graph_rt.set_input("rand_input", rand_input)  # NOTE: params not required.
                 graph_rt.run()
 
@@ -334,7 +334,7 @@ def _run_linked(lib_mod):
             lib_mod = tvm.runtime.load_module(lib_path)
 
             def _run_unlinked(lib_mod):
-                graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0)))
+                graph_rt = tvm.contrib.graph_executor.GraphModule(lib_mod["default"](tvm.cpu(0)))
                 graph_rt.set_input("rand_input", rand_input, **params)
                 graph_rt.run()
                 return graph_rt.get_output(0)
@@ -365,7 +365,7 @@ def test_crt_link_params():
             opts = tvm.micro.default_options(
                 os.path.join(tvm.micro.get_standalone_crt_dir(), "template", "host")
             )
-            opts["bin_opts"]["ldflags"].append("-DTVM_HOST_USE_GRAPH_RUNTIME_MODULE")
+            opts["bin_opts"]["ldflags"].append("-DTVM_HOST_USE_GRAPH_EXECUTOR_MODULE")
 
             micro_binary = tvm.micro.build_static_runtime(
                 workspace,
@@ -374,7 +374,7 @@ def test_crt_link_params():
                 compiler_options=opts,
                 extra_libs=[
                     tvm.micro.get_standalone_crt_lib(m)
-                    for m in ("memory", "graph_runtime_module", "graph_runtime")
+                    for m in ("memory", "graph_executor_module", "graph_executor")
                 ],
             )
 
@@ -383,7 +383,7 @@ def test_crt_link_params():
             }
             flasher = compiler.flasher(**flasher_kw)
             with tvm.micro.Session(binary=micro_binary, flasher=flasher) as sess:
-                graph_rt = tvm.micro.session.create_local_graph_runtime(
+                graph_rt = tvm.micro.session.create_local_graph_executor(
                     graph_json, sess.get_system_lib(), sess.device
                 )
 
@@ -397,7 +397,7 @@ def test_crt_link_params():
 
             def _run_unlinked(lib):
                 graph_json, mod, lowered_params = lib
-                graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0))
+                graph_rt = tvm.contrib.graph_executor.create(graph_json, mod, tvm.cpu(0))
                 graph_rt.set_input("rand_input", rand_input, **lowered_params)
                 graph_rt.run()
                 return graph_rt.get_output(0).asnumpy()
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index c999091cc3cc..db6c55bca12a 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -26,7 +26,7 @@
 
 import tvm
 import tvm.relay
-from tvm.relay.backend import graph_runtime_factory
+from tvm.relay.backend import graph_executor_factory
 import tvm.runtime.module
 import tvm.testing
 from tvm.contrib import utils
@@ -170,7 +170,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 @tvm.testing.requires_micro
 def test_export_model():
     module = tvm.support.FrontendTestModule()
-    factory = graph_runtime_factory.GraphRuntimeFactoryModule(
+    factory = graph_executor_factory.GraphExecutorFactoryModule(
         None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}
     )
 
diff --git a/tests/python/unittest/test_runtime_graph.py b/tests/python/unittest/test_runtime_graph.py
index f37d4089a8ee..5f0c7837d4f5 100644
--- a/tests/python/unittest/test_runtime_graph.py
+++ b/tests/python/unittest/test_runtime_graph.py
@@ -21,7 +21,7 @@
 import json
 from tvm import rpc
 from tvm import relay
-from tvm.contrib import utils, graph_runtime
+from tvm.contrib import utils, graph_executor
 
 
 @tvm.testing.requires_llvm
@@ -59,7 +59,7 @@ def test_graph_simple():
 
     def check_verify():
         mlib = tvm.build(s, [A, B], "llvm", name="myadd")
-        mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
+        mod = graph_executor.create(graph, mlib, tvm.cpu(0))
         a = np.random.uniform(size=(n,)).astype(A.dtype)
         mod.run(x=a)
         out = mod.get_output(0, tvm.nd.empty((n,)))
@@ -75,7 +75,7 @@ def check_remote():
         mlib.export_library(path_dso)
         remote.upload(path_dso)
         mlib = remote.load_module("dev_lib.so")
-        mod = graph_runtime.create(graph, mlib, remote.cpu(0))
+        mod = graph_executor.create(graph, mlib, remote.cpu(0))
         a = np.random.uniform(size=(n,)).astype(A.dtype)
         mod.run(x=tvm.nd.array(a, dev))
         out = tvm.nd.empty((n,), device=dev)
@@ -92,10 +92,10 @@ def check_sharing():
         params = {"x": x_in}
         graph, lib, params = relay.build(func, target="llvm", params=params)
 
-        mod_shared = graph_runtime.create(graph, lib, tvm.cpu(0))
+        mod_shared = graph_executor.create(graph, lib, tvm.cpu(0))
         mod_shared.load_params(runtime.save_param_dict(params))
         num_mods = 10
-        mods = [graph_runtime.create(graph, lib, tvm.cpu(0)) for _ in range(num_mods)]
+        mods = [graph_executor.create(graph, lib, tvm.cpu(0)) for _ in range(num_mods)]
 
         for mod in mods:
             mod.share_params(mod_shared, runtime.save_param_dict(params))
@@ -120,7 +120,7 @@ def check_sharing():
 
 
 def test_load_unexpected_params():
-    # Test whether graph_runtime.load_params works if parameters
+    # Test whether graph_executor.load_params works if parameters
     # are provided that are not an expected input.
     mod = tvm.IRModule()
     params = {}
@@ -130,7 +130,7 @@ def test_load_unexpected_params():
     mod["main"] = relay.Function([x, y], z)
 
     graph_module = relay.build(mod, target="llvm", params=params)
-    rt_mod = tvm.contrib.graph_runtime.create(
+    rt_mod = tvm.contrib.graph_executor.create(
         graph_module.get_json(), graph_module.get_lib(), tvm.cpu(0)
     )
 
diff --git a/tests/python/unittest/test_runtime_graph_cuda_graph.py b/tests/python/unittest/test_runtime_graph_cuda_graph.py
index 41c782a91d9b..ee7750e3e142 100644
--- a/tests/python/unittest/test_runtime_graph_cuda_graph.py
+++ b/tests/python/unittest/test_runtime_graph_cuda_graph.py
@@ -27,8 +27,8 @@
 from tvm import te
 import numpy as np
 
-from tvm.contrib import utils, graph_runtime
-from tvm.contrib.cuda_graph import cuda_graph_runtime
+from tvm.contrib import utils, graph_executor
+from tvm.contrib.cuda_graph import cuda_graph_executor
 
 
 bx = te.thread_axis("blockIdx.x")
@@ -75,7 +75,7 @@ def check_verify():
         mlib = tvm.build(s, [A, B], "cuda", name="myadd")
         dev = tvm.gpu(0)
         try:
-            mod = cuda_graph_runtime.create(graph, mlib, dev)
+            mod = cuda_graph_executor.create(graph, mlib, dev)
         except ValueError:
             return
 
diff --git a/tests/python/unittest/test_runtime_graph_debug.py b/tests/python/unittest/test_runtime_graph_debug.py
index b0173d1e7b7f..6cab75d9b9fc 100644
--- a/tests/python/unittest/test_runtime_graph_debug.py
+++ b/tests/python/unittest/test_runtime_graph_debug.py
@@ -28,7 +28,7 @@
 import numpy as np
 from tvm import rpc
 from tvm.contrib import utils
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib.debugger import debug_executor
 
 
 @tvm.testing.requires_llvm
@@ -75,7 +75,7 @@ def myadd(*args):
         mlib_proxy = tvm.support.FrontendTestModule()
         mlib_proxy["myadd"] = myadd
         try:
-            mod = debug_runtime.create(graph, mlib_proxy, tvm.cpu(0))
+            mod = debug_executor.create(graph, mlib_proxy, tvm.cpu(0))
         except ValueError:
             return
 
@@ -171,7 +171,7 @@ def check_remote():
         remote.upload(path_dso)
         mlib = remote.load_module("dev_lib.so")
         try:
-            mod = debug_runtime.create(graph, mlib, remote.cpu(0))
+            mod = debug_executor.create(graph, mlib, remote.cpu(0))
         except ValueError:
             print("Skip because debug runtime not enabled")
             return
diff --git a/tests/python/unittest/test_runtime_heterogeneous.py b/tests/python/unittest/test_runtime_heterogeneous.py
index b3fbc2fdf248..e97b349af36e 100644
--- a/tests/python/unittest/test_runtime_heterogeneous.py
+++ b/tests/python/unittest/test_runtime_heterogeneous.py
@@ -21,7 +21,7 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm import topi
 
 
@@ -172,7 +172,7 @@ def check_device(device, target_device):
         target_flist = {target_device: lower_add, target_host: lower_sub}
         mhost = tvm.build(target_flist, target_host=target_host)
         dev = [host_dev, device_dev]
-        mod = graph_runtime.create(graph, mhost, dev)
+        mod = graph_executor.create(graph, mhost, dev)
         params = {}
         params["A"] = tensor_a = np.random.uniform(size=shape).astype(tensor_a.dtype)
         params["B"] = tensor_b = np.random.uniform(size=shape).astype(tensor_b.dtype)
@@ -408,7 +408,7 @@ def check_device(device, target_device):
         params["D"] = tensor_d = np.random.uniform(size=shape).astype(tensor_d.dtype)
 
         def check_verify():
-            mod = graph_runtime.create(graph, mhost, dev)
+            mod = graph_executor.create(graph, mhost, dev)
             mod.set_input(**params)
             mod.run()
             out = mod.get_output(0, tvm.nd.empty(shape))
@@ -422,7 +422,7 @@ def check_load_module():
                 out_file.write(graph)
             loaded_lib = tvm.runtime.load_module(path_lib)
             loaded_graph = open(temp.relpath("deploy.json")).read()
-            mod = graph_runtime.create(loaded_graph, loaded_lib, dev)
+            mod = graph_executor.create(loaded_graph, loaded_lib, dev)
             mod.set_input(**params)
             mod.run()
             out = mod.get_output(0, tvm.nd.empty(shape))
diff --git a/tests/python/unittest/test_runtime_module_based_interface.py b/tests/python/unittest/test_runtime_module_based_interface.py
index a9f8f6352d35..766338de3558 100644
--- a/tests/python/unittest/test_runtime_module_based_interface.py
+++ b/tests/python/unittest/test_runtime_module_based_interface.py
@@ -18,9 +18,9 @@
 from tvm import relay, runtime
 from tvm.relay import testing
 import tvm
-from tvm.contrib import graph_runtime
-from tvm.contrib.debugger import debug_runtime
-from tvm.contrib.cuda_graph import cuda_graph_runtime
+from tvm.contrib import graph_executor
+from tvm.contrib.debugger import debug_executor
+from tvm.contrib.cuda_graph import cuda_graph_executor
 import tvm.testing
 
 
@@ -37,7 +37,7 @@ def verify(data):
         graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)
 
     dev = tvm.cpu()
-    module = graph_runtime.create(graph, lib, dev)
+    module = graph_executor.create(graph, lib, dev)
     module.set_input("data", data)
     module.set_input(**graph_params)
     module.run()
@@ -55,7 +55,7 @@ def test_legacy_compatibility():
         graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)
     data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
     dev = tvm.cpu()
-    module = graph_runtime.create(graph, lib, dev)
+    module = graph_executor.create(graph, lib, dev)
     module.set_input("data", data)
     module.set_input(**graph_params)
     module.run()
@@ -82,8 +82,8 @@ def test_cpu():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # graph runtime wrapper
-    gmod = graph_runtime.GraphModule(complied_graph_lib["default"](dev))
+    # graph executor wrapper
+    gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
     out = gmod.get_output(0).asnumpy()
@@ -109,8 +109,8 @@ def test_gpu():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # graph runtime wrapper
-    gmod = graph_runtime.GraphModule(complied_graph_lib["default"](dev))
+    # graph executor wrapper
+    gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
     gmod.set_input("data", data)
     gmod.run()
     out = gmod.get_output(0).asnumpy()
@@ -151,8 +151,8 @@ def verify_cpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -190,8 +190,8 @@ def verify_gpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -234,8 +234,8 @@ def verify_rpc_cpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -279,8 +279,8 @@ def verify_rpc_gpu_export(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         gmod.set_input("data", data)
         gmod.run()
         out = gmod.get_output(0).asnumpy()
@@ -333,8 +333,8 @@ def verify_cpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -380,8 +380,8 @@ def verify_gpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -433,8 +433,8 @@ def verify_rpc_cpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -486,8 +486,8 @@ def verify_rpc_gpu_remove_package_params(obj_format):
         out = get_output(0).asnumpy()
         tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-        # graph runtime wrapper
-        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
+        # graph executor wrapper
+        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
         loaded_params = bytearray(open(path_params, "rb").read())
         gmod.set_input("data", data)
         gmod.load_params(loaded_params)
@@ -502,7 +502,7 @@ def verify_rpc_gpu_remove_package_params(obj_format):
         verify_rpc_gpu_remove_package_params(obj_format)
 
 
-def test_debug_graph_runtime():
+def test_debug_graph_executor():
     if not tvm.testing.device_enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
@@ -516,7 +516,7 @@ def test_debug_graph_runtime():
     try:
         gmod = complied_graph_lib["debug_create"]("default", dev)
     except:
-        print("Skip because debug graph_runtime not enabled")
+        print("Skip because debug graph_executor not enabled")
         return
     set_input = gmod["set_input"]
     run = gmod["run"]
@@ -526,8 +526,8 @@ def test_debug_graph_runtime():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # debug graph runtime wrapper
-    debug_g_mod = debug_runtime.GraphModuleDebug(
+    # debug graph executor wrapper
+    debug_g_mod = debug_executor.GraphModuleDebug(
         complied_graph_lib["debug_create"]("default", dev),
         [dev],
         complied_graph_lib.get_json(),
@@ -540,7 +540,7 @@ def test_debug_graph_runtime():
 
 
 @tvm.testing.requires_cudagraph
-def test_cuda_graph_runtime():
+def test_cuda_graph_executor():
     mod, params = relay.testing.synthetic.get_workload()
     with tvm.transform.PassContext(opt_level=3):
         complied_graph_lib = relay.build_module.build(mod, "cuda", params=params)
@@ -560,8 +560,8 @@ def test_cuda_graph_runtime():
     out = get_output(0).asnumpy()
     tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
 
-    # cuda graph runtime wrapper
-    cu_gmod = cuda_graph_runtime.GraphModuleCudaGraph(gmod)
+    # cuda graph executor wrapper
+    cu_gmod = cuda_graph_executor.GraphModuleCudaGraph(gmod)
     cu_gmod.set_input("data", data)
     cu_gmod.run()
     out = cu_gmod.get_output(0).asnumpy()
@@ -602,5 +602,5 @@ def make_module(mod):
     test_gpu()
     test_mod_export()
     test_remove_package_params()
-    test_debug_graph_runtime()
+    test_debug_graph_executor()
     test_multiple_imported_modules()
diff --git a/tests/python/unittest/test_target_codegen_blob.py b/tests/python/unittest/test_target_codegen_blob.py
index 3429b39c6ac3..f1290ddd1e51 100644
--- a/tests/python/unittest/test_target_codegen_blob.py
+++ b/tests/python/unittest/test_target_codegen_blob.py
@@ -18,7 +18,7 @@
 import numpy as np
 from tvm import relay
 from tvm.relay import testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm
 from tvm import te
 import ctypes
@@ -39,7 +39,7 @@ def verify(data):
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build_module.build(mod, "llvm", params=params)
         dev = tvm.cpu()
-        module = graph_runtime.GraphModule(lib["default"](dev))
+        module = graph_executor.GraphModule(lib["default"](dev))
         module.set_input("data", data)
         module.run()
         out = module.get_output(0).asnumpy()
@@ -58,7 +58,7 @@ def verify(data):
     loaded_lib = tvm.runtime.load_module(path_lib)
     data = np.random.uniform(-1, 1, size=input_shape).astype("float32")
     dev = tvm.gpu()
-    module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+    module = graph_executor.GraphModule(loaded_lib["default"](dev))
     module.set_input("data", data)
     module.run()
     out = module.get_output(0).asnumpy()
diff --git a/tests/python/unittest/test_tir_transform_hoist_if.py b/tests/python/unittest/test_tir_transform_hoist_if.py
index 748a33d977ee..7d02e4f12c1d 100644
--- a/tests/python/unittest/test_tir_transform_hoist_if.py
+++ b/tests/python/unittest/test_tir_transform_hoist_if.py
@@ -765,7 +765,7 @@ def test_hoisting_op_conv():
     for target, dev in enabled_targets():
         with tvm.transform.PassContext(opt_level=3):
             lib = relay.build_module.build(mod, target=target, params=params)
-            m = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+            m = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
             x = np.random.uniform(size=dshape)
             data_tvm = tvm.nd.array(data)
             m.set_input("x", data_tvm)
@@ -779,7 +779,7 @@ def test_hoisting_op_conv():
             opt_level=3, config={"tir.HoistIfThenElse": {"support_block_scope_hosting": True}}
         ):
             lib = relay.build_module.build(mod, target=target, params=params)
-            m = tvm.contrib.graph_runtime.GraphModule(lib["default"](dev))
+            m = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
             x = np.random.uniform(size=dshape)
             data_tvm = tvm.nd.array(data)
             m.set_input("x", data_tvm)
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index 7338555c4c94..609325c9962b 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -34,7 +34,7 @@ echo set\(USE_NNPACK ON\) >> config.cmake
 echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_SORT ON\) >> config.cmake
-echo set\(USE_GRAPH_RUNTIME ON\) >> config.cmake
+echo set\(USE_GRAPH_EXECUTOR ON\) >> config.cmake
 echo set\(USE_STACKVM_RUNTIME ON\) >> config.cmake
 echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_ANTLR ON\) >> config.cmake
diff --git a/tests/scripts/task_java_unittest.sh b/tests/scripts/task_java_unittest.sh
index 7ab4afae3c2e..7818d7d458d6 100755
--- a/tests/scripts/task_java_unittest.sh
+++ b/tests/scripts/task_java_unittest.sh
@@ -32,7 +32,7 @@ TEMP_DIR=$(mktemp -d)
 
 python3 $SCRIPT_DIR/test_add_cpu.py $TEMP_DIR
 python3 $SCRIPT_DIR/test_add_gpu.py $TEMP_DIR
-python3 $SCRIPT_DIR/test_graph_runtime.py $TEMP_DIR
+python3 $SCRIPT_DIR/test_graph_executor.py $TEMP_DIR
 
 # start rpc proxy server
 PORT=$(( ( RANDOM % 1000 )  + 9000 ))
diff --git a/tests/scripts/task_rust.sh b/tests/scripts/task_rust.sh
index 2c87cceec8bb..c40585b62b47 100755
--- a/tests/scripts/task_rust.sh
+++ b/tests/scripts/task_rust.sh
@@ -58,14 +58,14 @@ cd $RUST_DIR/tvm-rt
 cargo build
 cargo test --tests
 
-# Next we test the graph runtime crate.
+# Next we test the graph executor crate.
 cd $RUST_DIR/tvm-graph-rt
 
 # We first we compile a model using the Python bindings then run the tests.
 python3 tests/build_model.py
 cargo test --tests
 
-# Run some more tests involving the graph runtime API.
+# Run some more tests involving the graph executor API.
 cd tests/test_tvm_basic
 cargo run
 cd -
diff --git a/tutorials/auto_scheduler/tune_network_arm.py b/tutorials/auto_scheduler/tune_network_arm.py
index 30bbce1e6b7e..153143dd4e94 100644
--- a/tutorials/auto_scheduler/tune_network_arm.py
+++ b/tutorials/auto_scheduler/tune_network_arm.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.utils import tempdir
 
 #################################################################
@@ -319,9 +319,9 @@ def tune_and_evaluate():
     remote.upload(tmp.relpath(filename))
     rlib = remote.load_module(filename)
 
-    # Create graph runtime
+    # Create graph executor
     dev = remote.cpu()
-    module = graph_runtime.GraphModule(rlib["default"](dev))
+    module = graph_executor.GraphModule(rlib["default"](dev))
     data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
     module.set_input("data", data_tvm)
 
diff --git a/tutorials/auto_scheduler/tune_network_cuda.py b/tutorials/auto_scheduler/tune_network_cuda.py
index b5162fc85f14..7b5619c671be 100644
--- a/tutorials/auto_scheduler/tune_network_cuda.py
+++ b/tutorials/auto_scheduler/tune_network_cuda.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 #################################################################
 # Define a Network
@@ -280,9 +280,9 @@ def run_tuning():
     with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
         lib = relay.build(mod, target=target, params=params)
 
-# Create graph runtime
+# Create graph executor
 dev = tvm.device(str(target), 0)
-module = graph_runtime.GraphModule(lib["default"](dev))
+module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
 module.set_input("data", data_tvm)
 
diff --git a/tutorials/auto_scheduler/tune_network_mali.py b/tutorials/auto_scheduler/tune_network_mali.py
index 3d3861263e4b..13d1e4793ffa 100644
--- a/tutorials/auto_scheduler/tune_network_mali.py
+++ b/tutorials/auto_scheduler/tune_network_mali.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import os
 
 #################################################################
@@ -242,7 +242,7 @@ def tune_and_evaluate():
         ):
             lib = relay.build(mod, target=target, target_host=target_host, params=params)
 
-    # Create graph runtime
+    # Create graph executor
     print("=============== Request Remote ===============")
     from tvm.auto_scheduler.utils import request_remote
 
@@ -256,7 +256,7 @@ def tune_and_evaluate():
     lib.export_library(path_lib, ndk.create_shared)
     remote.upload(path_lib)
     loaded_lib = remote.load_module(filename)
-    module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+    module = graph_executor.GraphModule(loaded_lib["default"](dev))
     data = (np.random.uniform(size=input_shape)).astype(dtype)
     data_tvm = tvm.nd.array(data)
     module.set_input("data", data_tvm)
diff --git a/tutorials/auto_scheduler/tune_network_x86.py b/tutorials/auto_scheduler/tune_network_x86.py
index ca2b8c197778..91dc64eec20e 100644
--- a/tutorials/auto_scheduler/tune_network_x86.py
+++ b/tutorials/auto_scheduler/tune_network_x86.py
@@ -49,7 +49,7 @@
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 #################################################################
 # Define a Network
@@ -279,9 +279,9 @@ def run_tuning():
     with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
         lib = relay.build(mod, target=target, params=params)
 
-# Create graph runtime
+# Create graph executor
 dev = tvm.device(str(target), 0)
-module = graph_runtime.GraphModule(lib["default"](dev))
+module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
 module.set_input("data", data_tvm)
 
diff --git a/tutorials/autotvm/tune_relay_arm.py b/tutorials/autotvm/tune_relay_arm.py
index 9ddcf817c168..9223eb30cd9d 100644
--- a/tutorials/autotvm/tune_relay_arm.py
+++ b/tutorials/autotvm/tune_relay_arm.py
@@ -70,7 +70,7 @@
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py
index 8ae5144eb31d..50485c4d7ff2 100644
--- a/tutorials/autotvm/tune_relay_cuda.py
+++ b/tutorials/autotvm/tune_relay_cuda.py
@@ -67,7 +67,7 @@
 from tvm import relay, autotvm
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define Network
diff --git a/tutorials/autotvm/tune_relay_mobile_gpu.py b/tutorials/autotvm/tune_relay_mobile_gpu.py
index 08fc87d4da1b..aefa600e3c3f 100644
--- a/tutorials/autotvm/tune_relay_mobile_gpu.py
+++ b/tutorials/autotvm/tune_relay_mobile_gpu.py
@@ -69,7 +69,7 @@
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib.utils import tempdir
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
diff --git a/tutorials/autotvm/tune_relay_x86.py b/tutorials/autotvm/tune_relay_x86.py
index b072a3adfea3..dd5d4057c211 100644
--- a/tutorials/autotvm/tune_relay_x86.py
+++ b/tutorials/autotvm/tune_relay_x86.py
@@ -36,7 +36,7 @@
 from tvm.relay import testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
diff --git a/tutorials/frontend/build_gcn.py b/tutorials/frontend/build_gcn.py
index b21c09692a64..e73dc2dca287 100644
--- a/tutorials/frontend/build_gcn.py
+++ b/tutorials/frontend/build_gcn.py
@@ -175,7 +175,7 @@ def evaluate(data, logits):
 #                                        = ((H * W)^t * A^t)^t
 #                                        = ((W^t * H^t) * A^t)^t
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm
 from tvm import te
 
@@ -335,9 +335,9 @@ def prepare_params(g, data):
 with tvm.transform.PassContext(opt_level=0):  # Currently only support opt_level=0
     lib = relay.build(mod, target, params=params)
 
-# Generate graph runtime
+# Generate graph executor
 dev = tvm.device(target, 0)
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 
 ######################################################################
 # Run the TVM model, test for accuracy and verify with DGL
diff --git a/tutorials/frontend/deploy_model_on_android.py b/tutorials/frontend/deploy_model_on_android.py
index c69ac8d3f0e3..8efcb706b380 100644
--- a/tutorials/frontend/deploy_model_on_android.py
+++ b/tutorials/frontend/deploy_model_on_android.py
@@ -34,7 +34,7 @@
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, ndk, graph_runtime as runtime
+from tvm.contrib import utils, ndk, graph_executor as runtime
 from tvm.contrib.download import download_testdata
 
 
@@ -71,7 +71,7 @@
 #         -DUSE_RPC=ON \
 #         -DUSE_SORT=ON \
 #         -DUSE_VULKAN=ON \
-#         -DUSE_GRAPH_RUNTIME=ON \
+#         -DUSE_GRAPH_EXECUTOR=ON \
 #         ..
 #   make -j10
 #
diff --git a/tutorials/frontend/deploy_model_on_rasp.py b/tutorials/frontend/deploy_model_on_rasp.py
index 75e142243a38..a59665f62f1c 100644
--- a/tutorials/frontend/deploy_model_on_rasp.py
+++ b/tutorials/frontend/deploy_model_on_rasp.py
@@ -30,7 +30,7 @@
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, graph_runtime as runtime
+from tvm.contrib import utils, graph_executor as runtime
 from tvm.contrib.download import download_testdata
 
 ######################################################################
diff --git a/tutorials/frontend/deploy_prequantized.py b/tutorials/frontend/deploy_prequantized.py
index 681bc0741338..308027a4a193 100644
--- a/tutorials/frontend/deploy_prequantized.py
+++ b/tutorials/frontend/deploy_prequantized.py
@@ -90,7 +90,7 @@ def run_tvm_model(mod, params, input_name, inp, target="llvm"):
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build(mod, target=target, params=params)
 
-    runtime = tvm.contrib.graph_runtime.GraphModule(lib["default"](tvm.device(target, 0)))
+    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](tvm.device(target, 0)))
 
     runtime.set_input(input_name, inp)
     runtime.run()
diff --git a/tutorials/frontend/deploy_prequantized_tflite.py b/tutorials/frontend/deploy_prequantized_tflite.py
index 547704a72fc4..e0f9a6b2ebde 100644
--- a/tutorials/frontend/deploy_prequantized_tflite.py
+++ b/tutorials/frontend/deploy_prequantized_tflite.py
@@ -168,9 +168,9 @@ def run_tflite_model(tflite_model_buf, input_data):
 ###############################################################################
 # Lets run TVM compiled pre-quantized model inference and get the TVM prediction.
 def run_tvm(lib):
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
-    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu(0)))
+    rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0)))
     rt_mod.set_input("input", data)
     rt_mod.run()
     tvm_res = rt_mod.get_output(0).asnumpy()
diff --git a/tutorials/frontend/deploy_sparse.py b/tutorials/frontend/deploy_sparse.py
index 4d96f4d5de9f..1fcb1b3246da 100644
--- a/tutorials/frontend/deploy_sparse.py
+++ b/tutorials/frontend/deploy_sparse.py
@@ -82,7 +82,7 @@
 import numpy as np
 import tensorflow as tf
 from tvm import relay, runtime
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.relay import data_dep_optimization as ddo
 from tensorflow.python.framework.convert_to_constants import (
     convert_variables_to_constants_v2,
@@ -214,7 +214,7 @@ def run_relay_graph(mod, params, shape_dict, target, dev):
     input_shape = shape_dict["input_1"]
     dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype("int32")
 
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     m.set_input(0, dummy_data)
     m.run()
     tvm_output = m.get_output(0)
diff --git a/tutorials/frontend/deploy_ssd_gluoncv.py b/tutorials/frontend/deploy_ssd_gluoncv.py
index 9f31ab70731a..40b40ce1f441 100644
--- a/tutorials/frontend/deploy_ssd_gluoncv.py
+++ b/tutorials/frontend/deploy_ssd_gluoncv.py
@@ -28,7 +28,7 @@
 
 from matplotlib import pyplot as plt
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 from tvm.contrib.download import download_testdata
 from gluoncv import model_zoo, data, utils
 
@@ -102,7 +102,7 @@ def build(target):
 
 def run(lib, dev):
     # Build TVM runtime
-    m = graph_runtime.GraphModule(lib["default"](dev))
+    m = graph_executor.GraphModule(lib["default"](dev))
     tvm_input = tvm.nd.array(x.asnumpy(), device=dev)
     m.set_input("data", tvm_input)
     # execute
diff --git a/tutorials/frontend/from_caffe2.py b/tutorials/frontend/from_caffe2.py
index 390fd2f32035..a3378de8b0e3 100644
--- a/tutorials/frontend/from_caffe2.py
+++ b/tutorials/frontend/from_caffe2.py
@@ -105,12 +105,12 @@ def transform_image(image):
 # The process is no different from other examples.
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 # context x86 CPU, use tvm.gpu(0) if you run on GPU
 dev = tvm.cpu(0)
 # create a runtime executor module
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input(input_name, tvm.nd.array(data.astype("float32")))
 # execute
diff --git a/tutorials/frontend/from_coreml.py b/tutorials/frontend/from_coreml.py
index 5703f5742d3d..ea8817d3a0a8 100644
--- a/tutorials/frontend/from_coreml.py
+++ b/tutorials/frontend/from_coreml.py
@@ -81,11 +81,11 @@
 # Execute on TVM
 # -------------------
 # The process is no different from other example
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dev = tvm.cpu(0)
 dtype = "float32"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("image", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/tutorials/frontend/from_darknet.py b/tutorials/frontend/from_darknet.py
index b76c32e83137..356dc16bedf0 100644
--- a/tutorials/frontend/from_darknet.py
+++ b/tutorials/frontend/from_darknet.py
@@ -117,9 +117,9 @@
 # Execute on TVM Runtime
 # ----------------------
 # The process is no different from other examples.
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 
 # set inputs
 m.set_input("data", tvm.nd.array(data.astype(dtype)))
diff --git a/tutorials/frontend/from_mxnet.py b/tutorials/frontend/from_mxnet.py
index 696af6b4a3a5..bfaac2c6c98e 100644
--- a/tutorials/frontend/from_mxnet.py
+++ b/tutorials/frontend/from_mxnet.py
@@ -104,11 +104,11 @@ def transform_image(image):
 # Execute the portable graph on TVM
 # ---------------------------------
 # Now, we would like to reproduce the same forward computation using TVM.
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dev = tvm.gpu(0)
 dtype = "float32"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("data", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/tutorials/frontend/from_pytorch.py b/tutorials/frontend/from_pytorch.py
index ecc3e9aef40c..a0db518025e3 100644
--- a/tutorials/frontend/from_pytorch.py
+++ b/tutorials/frontend/from_pytorch.py
@@ -110,10 +110,10 @@
 # Execute the portable graph on TVM
 # ---------------------------------
 # Now we can try deploying the compiled model on target.
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dtype = "float32"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # Set inputs
 m.set_input(input_name, tvm.nd.array(img.astype(dtype)))
 # Execute
diff --git a/tutorials/frontend/from_tensorflow.py b/tutorials/frontend/from_tensorflow.py
index 5a7c98105715..96c001e4fd41 100644
--- a/tutorials/frontend/from_tensorflow.py
+++ b/tutorials/frontend/from_tensorflow.py
@@ -152,10 +152,10 @@
 # ---------------------------------
 # Now we can try deploying the compiled model on target.
 
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dtype = "uint8"
-m = graph_runtime.GraphModule(lib["default"](dev))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("DecodeJpeg/contents", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/tutorials/frontend/from_tflite.py b/tutorials/frontend/from_tflite.py
index f7e8422c37b6..a85cfcea913c 100644
--- a/tutorials/frontend/from_tflite.py
+++ b/tutorials/frontend/from_tflite.py
@@ -148,7 +148,7 @@ def extract(path):
 # --------------
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 
 # Create a runtime executor module
 module = runtime.GraphModule(lib["default"](tvm.cpu()))
diff --git a/tutorials/frontend/using_external_lib.py b/tutorials/frontend/using_external_lib.py
index 667dc2de1b9b..232f618bb28a 100644
--- a/tutorials/frontend/using_external_lib.py
+++ b/tutorials/frontend/using_external_lib.py
@@ -34,7 +34,7 @@
 import tvm
 from tvm import te
 import numpy as np
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 from tvm import relay
 from tvm.relay import testing
 import tvm.testing
diff --git a/tutorials/get_started/relay_quick_start.py b/tutorials/get_started/relay_quick_start.py
index 86c4ac4399ed..fa9207604bac 100644
--- a/tutorials/get_started/relay_quick_start.py
+++ b/tutorials/get_started/relay_quick_start.py
@@ -43,7 +43,7 @@
 from tvm.relay import testing
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 ######################################################################
@@ -104,13 +104,13 @@
 #####################################################################
 # Run the generate library
 # ------------------------
-# Now we can create graph runtime and run the module on Nvidia GPU.
+# Now we can create graph executor and run the module on Nvidia GPU.
 
 # create random input
 dev = tvm.gpu()
 data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
 # create module
-module = graph_runtime.GraphModule(lib["default"](dev))
+module = graph_executor.GraphModule(lib["default"](dev))
 # set input and parameters
 module.set_input("data", data)
 # run
@@ -143,7 +143,7 @@
 loaded_lib = tvm.runtime.load_module(path_lib)
 input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))
 
-module = graph_runtime.GraphModule(loaded_lib["default"](dev))
+module = graph_executor.GraphModule(loaded_lib["default"](dev))
 module.run(data=input_data)
 out_deploy = module.get_output(0).asnumpy()
 
diff --git a/tutorials/micro/micro_tflite.py b/tutorials/micro/micro_tflite.py
index 53271b29e20d..f59b1c3723a8 100644
--- a/tutorials/micro/micro_tflite.py
+++ b/tutorials/micro/micro_tflite.py
@@ -127,7 +127,7 @@
 import tvm
 import tvm.micro as micro
 from tvm.contrib.download import download_testdata
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm import relay
 
 model_url = "https://people.linaro.org/~tom.gall/sine_model.tflite"
@@ -257,7 +257,7 @@
 
 flasher = compiler.flasher()
 with tvm.micro.Session(binary=micro_binary, flasher=flasher) as session:
-    graph_mod = tvm.micro.create_local_graph_runtime(
+    graph_mod = tvm.micro.create_local_graph_executor(
         graph, session.get_system_lib(), session.device
     )
 
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
index a10d1de8c46b..dfb74b129718 100644
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -28,8 +28,8 @@
 from tvm import rpc, autotvm, relay
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib import graph_runtime, utils, download
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib import graph_executor, utils, download
+from tvm.contrib.debugger import debug_executor
 import vta
 from vta.testing import simulator
 from vta.top import graph_pack
@@ -325,9 +325,9 @@ def tune_tasks(
 
         # If detailed runtime info is needed build with debug runtime
         if opt.debug_profile:
-            m = debug_runtime.create(graph, lib, ctx)
+            m = debug_executor.create(graph, lib, ctx)
         else:
-            m = graph_runtime.create(graph, lib, ctx)
+            m = graph_executor.create(graph, lib, ctx)
 
         # Set the network parameters and synthetic input
         image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py
index ed2671c75ae8..7deb7408479a 100644
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -62,7 +62,7 @@
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_executor, utils, download
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 
@@ -431,9 +431,9 @@ def tune_and_evaluate(tuning_opt):
         remote.upload(temp.relpath("graphlib.tar"))
         lib = remote.load_module("graphlib.tar")
 
-        # Generate the graph runtime
+        # Generate the graph executor
         ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-        m = graph_runtime.GraphModule(lib["default"](ctx))
+        m = graph_executor.GraphModule(lib["default"](ctx))
 
         # upload parameters to device
         image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py
index 1bf4161a3340..f9db824eafa3 100644
--- a/vta/tutorials/frontend/deploy_classification.py
+++ b/vta/tutorials/frontend/deploy_classification.py
@@ -52,8 +52,8 @@
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
-from tvm.contrib.debugger import debug_runtime
+from tvm.contrib import graph_executor, utils, download
+from tvm.contrib.debugger import debug_executor
 from tvm.relay import transform
 
 import vta
@@ -135,7 +135,7 @@
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
 
 ######################################################################
-# Build the inference graph runtime
+# Build the inference graph executor
 # ---------------------------------
 # Grab vision model from Gluon model zoo and compile with Relay.
 # The compilation steps are:
@@ -147,7 +147,7 @@
 # 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
 # 5. Perform relay build to object file.
 # 6. Load the object file onto remote (FPGA device).
-# 7. Generate graph runtime, `m`.
+# 7. Generate graph executor, `m`.
 #
 
 # Load pre-configured AutoTVM schedules
@@ -209,8 +209,8 @@
     remote.upload(temp.relpath("graphlib.tar"))
     lib = remote.load_module("graphlib.tar")
 
-    # Graph runtime
-    m = graph_runtime.GraphModule(lib["default"](ctx))
+    # Graph executor
+    m = graph_executor.GraphModule(lib["default"](ctx))
 
 ######################################################################
 # Perform image classification inference
diff --git a/vta/tutorials/frontend/legacy/deploy_detection.py b/vta/tutorials/frontend/legacy/deploy_detection.py
index cbb320e98f13..696d0508b956 100644
--- a/vta/tutorials/frontend/legacy/deploy_detection.py
+++ b/vta/tutorials/frontend/legacy/deploy_detection.py
@@ -58,7 +58,7 @@
 from tvm import rpc, autotvm, relay
 from tvm.relay.testing import yolo_detection, darknet
 from tvm.relay.testing.darknet import __darknetffi__
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_executor, utils
 from tvm.contrib.download import download_testdata
 from vta.testing import simulator
 from vta.top import graph_pack
@@ -178,7 +178,7 @@
 ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
 
 ####################################
-# Build the inference graph runtime.
+# Build the inference graph executor.
 # ----------------------------------
 # Using Darknet library load downloaded vision model and compile with Relay.
 # The compilation steps are:
@@ -190,7 +190,7 @@
 # 4. Perform constant folding to reduce number of operators (e.g. eliminate batch norm multiply).
 # 5. Perform relay build to object file.
 # 6. Load the object file onto remote (FPGA device).
-# 7. Generate graph runtime, `m`.
+# 7. Generate graph executor, `m`.
 #
 
 # Load pre-configured AutoTVM schedules
@@ -246,8 +246,8 @@
     remote.upload(temp.relpath("graphlib.tar"))
     lib = remote.load_module("graphlib.tar")
 
-    # Graph runtime
-    m = graph_runtime.GraphModule(lib["default"](ctx))
+    # Graph executor
+    m = graph_executor.GraphModule(lib["default"](ctx))
 
 ####################################
 # Perform image detection inference.
diff --git a/web/emcc/wasm_runtime.cc b/web/emcc/wasm_runtime.cc
index ba880e7b5c0a..bcad656678e5 100644
--- a/web/emcc/wasm_runtime.cc
+++ b/web/emcc/wasm_runtime.cc
@@ -34,7 +34,7 @@
 #include "src/runtime/c_runtime_api.cc"
 #include "src/runtime/cpu_device_api.cc"
 #include "src/runtime/file_utils.cc"
-#include "src/runtime/graph/graph_runtime.cc"
+#include "src/runtime/graph_executor/graph_executor.cc"
 #include "src/runtime/library_module.cc"
 #include "src/runtime/logging.cc"
 #include "src/runtime/module.cc"
diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index 968dd9acbb5b..a76096ebba4d 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -570,13 +570,13 @@ export class Module implements Disposable {
 }
 
 /**
- *  Graph runtime.
+ *  Graph executor.
  *
  *  This is a thin wrapper of the underlying TVM module.
  *  you can also directly call set_input, run, and get_output
  *  of underlying module functions
  */
-class GraphRuntime implements Disposable {
+class GraphExecutor implements Disposable {
   module: Module;
   private packedSetInput: PackedFunc;
   private packedRun: PackedFunc;
@@ -986,24 +986,20 @@ export class Instance implements Disposable {
   }
 
   /**
-   * Create a new graph runtime.
+   * Create a new graph executor.
    *
-   * @param graphJson The graph runtime json file.
+   * @param graphJson The graph executor json file.
    * @param lib The underlying library.
    * @param dev The execution device of the graph.
    */
-  createGraphRuntime(
-    graphJson: string,
-    lib: Module,
-    dev: DLDevice
-  ): GraphRuntime {
-    const fcreate = this.getGlobalFunc("tvm.graph_runtime.create");
+  createGraphExecutor(graphJson: string, lib: Module, dev: DLDevice): GraphExecutor {
+    const fcreate = this.getGlobalFunc('tvm.graph_executor.create');
     const module = fcreate(
       graphJson,
       lib,
       this.scalar(dev.deviceType, "int32"),
       this.scalar(dev.deviceId, "int32")) as Module;
-    return new GraphRuntime(module);
+    return new GraphExecutor(module);
   }