fix: handling infinite loop (#812)

harshilgajera-crest · web-flow · commit d3111a65b05c · 2024-04-19T16:43:39.000+02:00
There was a bug in PSA where if in splunk_ingest_data first worker
errors out or throws exception then it does not write to a file, now
other workers would be waiting indefinitely for the file to be present,
So the execution would continue in infinitly.

To handle this scenario, we can handle the exception thrown by the first
worker and make sure that in any case it writes to a file, so that other
workers don't wait indefinitly
diff --git a/.github/workflows/build-test-release.yml b/.github/workflows/build-test-release.yml
@@ -120,18 +120,15 @@ jobs:
         run: |
           pip install git+https://github.com/pixelb/crudini
           mkdir test-results-${{ matrix.splunk.version }}
-      - name: Splunk Up
+      - name: Test
         run: |
-          export SPLUNK_APP_PACKAGE=./tests/e2e/addons/TA_fiction
-          export SPLUNK_ADDON=TA_fiction
-          export SPLUNK_APP_ID=TA_fiction
+          export SPLUNK_APP_PACKAGE=./tests/e2e/addons/TA_fiction_indextime
+          export SPLUNK_ADDON=TA_fiction_indextime
+          export SPLUNK_APP_ID=TA_fiction_indextime
           export SPLUNK_VERSION=${{ matrix.splunk.version }}
+          export SPLUNK_HEC_TOKEN="9b741d03-43e9-4164-908b-e09102327d22"
           echo $SPLUNK_VERSION
           docker compose -f "docker-compose-ci.yml" build
-          SPLUNK_PASSWORD=Chang3d! docker compose -f docker-compose-ci.yml up -d splunk
-          sleep 90
-      - name: Test
-        run: |
           SPLUNK_PASSWORD=Chang3d! docker compose -f docker-compose-ci.yml up --abort-on-container-exit
           docker volume ls
       - name: Collect Results
diff --git a/pytest_splunk_addon/splunk.py b/pytest_splunk_addon/splunk.py
@@ -750,18 +750,21 @@ def splunk_ingest_data(request, splunk_hec_uri, sc4s, uf, splunk_events_cleanup)
         }
         thread_count = int(request.config.getoption("thread_count"))
         store_events = request.config.getoption("store_events")
-        IngestorHelper.ingest_events(
-            ingest_meta_data,
-            addon_path,
-            config_path,
-            thread_count,
-            store_events,
-        )
-        sleep(50)
-        if "PYTEST_XDIST_WORKER" in os.environ:
-            with open(os.environ.get("PYTEST_XDIST_TESTRUNUID") + "_wait", "w+"):
-                PYTEST_XDIST_TESTRUNUID = os.environ.get("PYTEST_XDIST_TESTRUNUID")
-
+        try:
+            IngestorHelper.ingest_events(
+                ingest_meta_data,
+                addon_path,
+                config_path,
+                thread_count,
+                store_events,
+            )
+            sleep(50)
+        except Exception as e:
+            raise e
+        finally:
+            if "PYTEST_XDIST_WORKER" in os.environ:
+                with open(os.environ.get("PYTEST_XDIST_TESTRUNUID") + "_wait", "w+"):
+                    PYTEST_XDIST_TESTRUNUID = os.environ.get("PYTEST_XDIST_TESTRUNUID")
     else:
         while not os.path.exists(os.environ.get("PYTEST_XDIST_TESTRUNUID") + "_wait"):
             sleep(1)
diff --git a/tests/e2e/test_splunk_addon.py b/tests/e2e/test_splunk_addon.py
@@ -84,7 +84,7 @@ def test_splunk_connection_external(testdir, request):
     # fnmatch_lines does an assertion internally
     result.assert_outcomes(passed=1, failed=0)
 
-    # make sure that that we get a '0' exit code for the testsuite
+    # make sure that we get a '0' exit code for the testsuite
     assert result.ret == 0
 
 
@@ -117,7 +117,7 @@ def test_splunk_connection_docker(testdir, request):
     # fnmatch_lines does an assertion internally
     result.assert_outcomes(passed=1, failed=0)
 
-    # make sure that that we get a '0' exit code for the testsuite
+    # make sure that we get a '0' exit code for the testsuite
     assert result.ret == 0
 
 
@@ -165,7 +165,7 @@ def empty_method():
         skipped=len(constants.TA_FICTION_SKIPPED),
     )
 
-    # make sure that that we get a '0' exit code for the testsuite
+    # make sure that we get a '0' exit code for the testsuite
     assert result.ret == 0
 
 
@@ -328,7 +328,7 @@ def empty_method():
         skipped=len(constants.TA_CIM_FICTION_SKIPPED),
     )
 
-    # make sure that that we get a '0' exit code for the testsuite
+    # make sure that we get a '0' exit code for the testsuite
     assert result.ret == 0
 
 
@@ -439,7 +439,7 @@ def empty_method():
         failed=0,
     )
 
-    # make sure that that we get a '0' exit code for the testsuite
+    # make sure that we get a '0' exit code for the testsuite
     assert result.ret == 0
 
 
@@ -627,7 +627,7 @@ def empty_method():
         skipped=len(constants.TA_REQ_TRANSITION_SKIPPED),
     )
 
-    # make sure that that we get a non '0' exit code for the testsuite as it contains failure
+    # make sure that we get a non '0' exit code for the testsuite as it contains failure
     assert result.ret == 0, "result not equal to 0"
 
 
@@ -682,7 +682,7 @@ def empty_method():
         skipped=len(constants.TA_REQ_BROKEN_SKIPPED),
     )
 
-    # make sure that that we get a non '0' exit code for the testsuite as it contains failure
+    # make sure that we get a non '0' exit code for the testsuite as it contains failure
     assert result.ret != 0
 
 
@@ -737,5 +737,50 @@ def empty_method():
         skipped=len(constants.TA_REQ_TRANSITION_SKIPPED),
     )
 
-    # make sure that that we get a non '0' exit code for the testsuite as it contains failure
+    # make sure that we get a non '0' exit code for the testsuite as it contains failure
     assert result.ret == 0, "result not equal to 0"
+
+
+@pytest.mark.test_infinite_loop_fixture
+@pytest.mark.external
+def test_infinite_loop_in_ingest_data_fixture(testdir, request):
+    """Make sure that pytest accepts our fixture."""
+
+    testdir.makepyfile(
+        """
+        from pytest_splunk_addon.standard_lib.addon_basic import Basic
+        class Test_App(Basic):
+            def empty_method():
+                pass
+    """
+    )
+
+    shutil.copytree(
+        os.path.join(testdir.request.fspath.dirname, "addons/TA_fiction_indextime"),
+        os.path.join(testdir.tmpdir, "package"),
+    )
+
+    shutil.copytree(
+        os.path.join(testdir.request.fspath.dirname, "test_data_models"),
+        os.path.join(testdir.tmpdir, "tests/data_models"),
+    )
+
+    setup_test_dir(testdir)
+    SampleGenerator.clean_samples()
+    Rule.clean_rules()
+
+    # run pytest with the following cmd args
+    # we are providing wrong sc4s service details here so that we can recreate scenario where first worked raises exception and other workers get stuck
+    result = testdir.runpytest(
+        "--splunk-app=addons/TA_fiction_indextime",
+        "--splunk-type=external",
+        "--splunk-host=splunk",
+        "--splunk-data-generator=tests/addons/TA_fiction_indextime/default",
+        "--sc4s-host=splunk",
+        "--sc4s-port=100",
+        "-n 2",
+    )
+
+    # Here we are not interested in the failures or errors,
+    # we are basically checking that we get results and test execution does not get stuck
+    assert result.parseoutcomes().get("passed") > 0