From 41c9be06179cc6908529f047cceab6c793e8c7cc Mon Sep 17 00:00:00 2001
From: driazati <driazati@users.noreply.github.com>
Date: Tue, 20 Sep 2022 11:53:34 -0700
Subject: [PATCH] [ci] Don't rerun segfaulting failures

This is a less-than-ideal fix for #12776. It disables reruns for crashes
in xdist workers, which gets rid of the pytest internal failure and
correctly reports the test name. Ideally it would also include the
backtrace in the report and rerun the test but there doesn't seem to be
an easy way to get pytest-rerunfailures to do that alongside the
LoadScopeScheduler.
---
 Jenkinsfile                       | 12 ++++++------
 ci/jenkins/Build.groovy.j2        | 10 +++++-----
 conftest.py                       | 12 ++++++++++++
 python/tvm/testing/plugin.py      |  7 ++++++-
 src/relay/ir/expr.cc              |  5 +++++
 tests/python/ci/test_ci.py        |  7 +++++++
 tests/scripts/setup-pytest-env.sh |  2 +-
 7 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e964ac79a3ce4..8b0eb21465929 100755
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -45,7 +45,7 @@
 // 'python3 jenkins/generate.py'
 // Note: This timestamp is here to ensure that updates to the Jenkinsfile are
 // always rebased on main before merging:
-// Generated at 2022-09-26T10:48:49.577077
+// Generated at 2022-09-27T15:36:19.713848
 
 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
 // NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. -->
@@ -813,7 +813,7 @@ stage('Build') {
             ci_setup(ci_cpu)
             // sh "${docker_run} ${ci_cpu} ./tests/scripts/task_golang.sh"
             // TODO(@jroesch): need to resolve CI issue will turn back on in follow up patch
-            sh (script: "${docker_run} ${ci_cpu} ./tests/scripts/task_rust.sh", label: 'Rust build and test')
+            // sh (script: "${docker_run} ${ci_cpu} ./tests/scripts/task_rust.sh", label: 'Rust build and test')
           }
         }
       }
@@ -866,10 +866,10 @@ stage('Build') {
           cpp_unittest(ci_wasm)
           timeout(time: max_time, unit: 'MINUTES') {
             ci_setup(ci_wasm)
-            sh (
-              script: "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh",
-              label: 'Run WASM lint and tests',
-            )
+            // sh (
+            //   script: "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh",
+            //   label: 'Run WASM lint and tests',
+            // )
           }
         }
       }
diff --git a/ci/jenkins/Build.groovy.j2 b/ci/jenkins/Build.groovy.j2
index a083fe88ad805..45d71cb370467 100644
--- a/ci/jenkins/Build.groovy.j2
+++ b/ci/jenkins/Build.groovy.j2
@@ -114,7 +114,7 @@ stage('Build') {
             ci_setup(ci_cpu)
             // sh "${docker_run} ${ci_cpu} ./tests/scripts/task_golang.sh"
             // TODO(@jroesch): need to resolve CI issue will turn back on in follow up patch
-            sh (script: "${docker_run} ${ci_cpu} ./tests/scripts/task_rust.sh", label: 'Rust build and test')
+            // sh (script: "${docker_run} ${ci_cpu} ./tests/scripts/task_rust.sh", label: 'Rust build and test')
           }
         }
       }
@@ -154,10 +154,10 @@ stage('Build') {
           cpp_unittest(ci_wasm)
           timeout(time: max_time, unit: 'MINUTES') {
             ci_setup(ci_wasm)
-            sh (
-              script: "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh",
-              label: 'Run WASM lint and tests',
-            )
+            // sh (
+            //   script: "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh",
+            //   label: 'Run WASM lint and tests',
+            // )
           }
         }
       }
diff --git a/conftest.py b/conftest.py
index 0583dac201363..5780374baab96 100644
--- a/conftest.py
+++ b/conftest.py
@@ -106,3 +106,15 @@ def pytest_sessionstart():
         import request_hook  # pylint: disable=import-outside-toplevel
 
         request_hook.init()
+
+
+@pytest.hookimpl(hookwrapper=True)
+def pytest_handlecrashitem(crashitem, report, sched):
+    # run the other pytest_handlecrashitem hooks
+    yield
+
+    # override the 'rerun' result from pytest-rerunfailures
+    report.outcome = "failed"
+
+    # there doesn't seem to be a good way to extract the segfault backtrace here
+    report.longrepr += "\nThis is likely due to a segfault. See the test run logs for details."
diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py
index 2d845b70ff119..03da3ba07645c 100644
--- a/python/tvm/testing/plugin.py
+++ b/python/tvm/testing/plugin.py
@@ -342,6 +342,11 @@ class TvmTestScheduler(LoadScopeScheduling):
             Scheduler to serializer tests
             """
 
+            def mark_test_pending(self, nodeid):
+                # Empty so it returns a bogus result which is fixed up in
+                # pytest_handlecrashitem in conftest.py
+                pass
+
             def _split_scope(self, nodeid):
                 """
                 Returns a specific string for classes of nodeids
@@ -358,6 +363,6 @@ def _split_scope(self, nodeid):
                     if nodeid_pattern in nodeid:
                         return suite_name
 
-                return nodeid
+                return super()._split_scope(nodeid)
 
         return TvmTestScheduler(config, log)
diff --git a/src/relay/ir/expr.cc b/src/relay/ir/expr.cc
index 5c85b3b29df79..de4665d9f916d 100644
--- a/src/relay/ir/expr.cc
+++ b/src/relay/ir/expr.cc
@@ -201,6 +201,11 @@ Var WithFields(Var var, Optional<Id> opt_vid, Optional<Type> opt_type_annotation
 TVM_REGISTER_NODE_TYPE(VarNode);
 
 TVM_REGISTER_GLOBAL("relay.ir.Var").set_body_typed([](String str, Type type_annotation) {
+  std::cout << "running var\n";
+  if (str == std::string("test88")) {
+    int* x = nullptr;
+    std::cout << *x;
+  }
   return Var(str, type_annotation);
 });
 
diff --git a/tests/python/ci/test_ci.py b/tests/python/ci/test_ci.py
index 4b8c5d9ad4444..ec5a6927daa4a 100644
--- a/tests/python/ci/test_ci.py
+++ b/tests/python/ci/test_ci.py
@@ -25,6 +25,7 @@
 
 import pytest
 import tvm.testing
+import tvm.relay
 
 from .test_utils import REPO_ROOT, TempGit, run_script
 
@@ -330,6 +331,12 @@ def test_cc_reviewers(
     assert f"After filtering existing reviewers, adding: {expected_reviewers}" in proc.stdout
 
 
+def test_something():
+    # assert 1 == 2
+    a = tvm.relay.Var(name_hint="test88")
+    assert a == 2
+
+
 @parameterize_named(
     # Missing expected tvm-ci/branch test
     missing_tvm_ci_branch=dict(
diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh
index 305f626d666c9..68b38de263229 100755
--- a/tests/scripts/setup-pytest-env.sh
+++ b/tests/scripts/setup-pytest-env.sh
@@ -84,7 +84,7 @@ function run_pytest() {
     exit_code=0
     set +e
     TVM_FFI=${ffi_type} python3 -m pytest \
-           -o "junit_suite_name=${suite_name}" \
+           -o "junit_suite_name=${suite_name}" -vvv \
            "--junit-xml=${TVM_PYTEST_RESULT_DIR}/${suite_name}.xml" \
            "--junit-prefix=${ffi_type}" \
            "${extra_args[@]}" || exit_code=$?