Terminate analyzer invocation once a timeout passes to prevent hanging

Ericsson · Dec 2, 2017 · 3528366 · 3528366
1 parent de66e70
commit 3528366
Show file tree

Hide file tree

Showing 8 changed files with 253 additions and 13 deletions.
diff --git a/docs/user_guide.md b/docs/user_guide.md
@@ -184,7 +184,7 @@ usage: CodeChecker check [-h] [-o OUTPUT_DIR] [-q] [-f]
                          [--analyzers ANALYZER [ANALYZER ...]]
                          [--add-compiler-defaults]
                          [--saargs CLANGSA_ARGS_CFG_FILE]
-                         [--tidyargs TIDY_ARGS_CFG_FILE]
+                         [--tidyargs TIDY_ARGS_CFG_FILE] [--timeout TIMEOUT]
                          [-e checker/group/profile] [-d checker/group/profile]
                          [--print-steps]
                          [--verbose {info,debug,debug_analyzer}]
@@ -228,6 +228,7 @@ analyzer arguments:
   --capture-analysis-output
   --saargs CLANGSA_ARGS_CFG_FILE
   --tidyargs TIDY_ARGS_CFG_FILE
+  --timeout TIMEOUT
 
 cross translation unit analysis arguments:
   These arguments are only available if the Clang Static Analyzer supports
@@ -360,7 +361,7 @@ usage: CodeChecker analyze [-h] [-j JOBS] [-i SKIPFILE] -o OUTPUT_PATH
                            [--add-compiler-defaults]
                            [--capture-analysis-output]
                            [--saargs CLANGSA_ARGS_CFG_FILE]
-                           [--tidyargs TIDY_ARGS_CFG_FILE]
+                           [--tidyargs TIDY_ARGS_CFG_FILE] [--timeout TIMEOUT]
                            [-e checker/group/profile]
                            [-d checker/group/profile] [--enable-all]
                            [--verbose {info,debug,debug_analyzer}]
@@ -453,6 +454,11 @@ analyzer arguments:
   --tidyargs TIDY_ARGS_CFG_FILE
                         File containing argument which will be forwarded
                         verbatim for Clang-Tidy.
+  --timeout TIMEOUT     The amount of time (in seconds) that each analyzer can
+                        spend, individually, to analyze the project. If the
+                        analysis of a particular file takes longer than this
+                        time, the analyzer is killed and the analysis is
+                        considered as a failed one.
 ~~~~~~~~~~~~~~~~~~~~~
 
 CodeChecker supports several analyzer tools. Currently, these analyzers are

diff --git a/libcodechecker/analyze/analysis_manager.py b/libcodechecker/analyze/analysis_manager.py
@@ -189,7 +189,7 @@ def check(check_data):
 
     action, context, analyzer_config_map, \
         output_dir, skip_handler, quiet_output_on_stdout, \
-        capture_analysis_output = check_data
+        capture_analysis_output, analysis_timeout = check_data
 
     skipped = False
     reanalyzed = False
@@ -250,8 +250,37 @@ def check(check_data):
             if os.path.exists(rh.analyzer_result_file):
                 reanalyzed = True
 
-            # Fills up the result handler with the analyzer information.
-            source_analyzer.analyze(rh, analyzer_environment)
+            # Need to capture the "function pointer" returned by
+            # setup_process_timeout as reference, so that we may call it later.
+            # (By default, let's say that the process finished gracefully.)
+            timeout_cleanup = [lambda _: False]
+
+            if analysis_timeout:
+                def __create_timeout(analyzer_process):
+                    """
+                    Once the analyzer process is started, this method is
+                    called. Set up a timeout for the analysis.
+                    """
+                    timeout_cleanup[0] = util.setup_process_timeout(
+                        analyzer_process, analysis_timeout, signal.SIGKILL)
+            else:
+                def __create_timeout(analyzer_process):
+                    # If no timeout is given by the client, this callback
+                    # shouldn't do anything.
+                    pass
+
+            source_analyzer.analyze(rh, analyzer_environment,
+                                    __create_timeout)
+
+            # If execution reaches this line, the analyzer process has quit.
+            if timeout_cleanup[0]():
+                LOG.warning("Analyzer ran too long, exceeding time limit "
+                            "of {0} seconds.".format(analysis_timeout))
+                LOG.warning("Considering this analysis as failed...")
+                rh.analyzer_returncode = -1
+                rh.analyzer_stderr = (">>> CodeChecker: Analysis timed out "
+                                      "after {0} seconds. <<<\n{1}") \
+                    .format(analysis_timeout, rh.analyzer_stderr)
 
             # If source file contains escaped spaces ("\ " tokens), then
             # clangSA writes the plist file with removing this escape
@@ -435,7 +464,7 @@ def check(check_data):
 
 def start_workers(actions, context, analyzer_config_map,
                   jobs, output_path, skip_handler, metadata,
-                  quiet_analyze, capture_analysis_output):
+                  quiet_analyze, capture_analysis_output, timeout):
     """
     Start the workers in the process pool.
     For every build action there is worker which makes the analysis.
@@ -471,7 +500,8 @@ def signal_handler(*arg, **kwarg):
                              output_path,
                              skip_handler,
                              quiet_analyze,
-                             capture_analysis_output)
+                             capture_analysis_output,
+                             timeout)
                             for build_action in actions]
 
         pool.map_async(check,

diff --git a/libcodechecker/analyze/analyzer.py b/libcodechecker/analyze/analyzer.py
@@ -132,7 +132,9 @@ def perform_analysis(args, context, actions, metadata):
                                        __get_skip_handler(args),
                                        metadata,
                                        'quiet' in args,
-                                       'capture_analysis_output' in args)
+                                       'capture_analysis_output' in args,
+                                       args.timeout if 'timeout' in args
+                                       else None)
 
     end_time = time.time()
     LOG.info("Analysis length: " + str(end_time - start_time) + " sec.")

diff --git a/libcodechecker/analyze/analyzers/analyzer_base.py b/libcodechecker/analyze/analyzers/analyzer_base.py
@@ -78,7 +78,7 @@ def get_analyzer_mentioned_files(self, output):
         """
         pass
 
-    def analyze(self, res_handler, env=None):
+    def analyze(self, res_handler, env=None, proc_callback=None):
         """
         Run the analyzer.
         """
@@ -101,7 +101,8 @@ def analyze(self, res_handler, env=None):
             ret_code, stdout, stderr \
                 = SourceAnalyzer.run_proc(analyzer_cmd,
                                           env,
-                                          res_handler.buildaction.directory)
+                                          res_handler.buildaction.directory,
+                                          proc_callback)
             res_handler.analyzer_returncode = ret_code
             res_handler.analyzer_stdout = stdout
             res_handler.analyzer_stderr = stderr
@@ -120,7 +121,7 @@ def get_analyzer_checkers(self, config_handler, env):
         pass
 
     @staticmethod
-    def run_proc(command, env=None, cwd=None):
+    def run_proc(command, env=None, cwd=None, proc_callback=None):
         """
         Just run the given command and return the return code
         and the stdout and stderr outputs of the process.
@@ -145,5 +146,9 @@ def signal_handler(*args, **kwargs):
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
 
-        (stdout, stderr) = proc.communicate()
+        # Send the created analyzer process' object if somebody wanted it.
+        if proc_callback:
+            proc_callback(proc)
+
+        stdout, stderr = proc.communicate()
         return proc.returncode, stdout, stderr
diff --git a/libcodechecker/libhandlers/analyze.py b/libcodechecker/libhandlers/analyze.py
@@ -222,6 +222,19 @@ def add_arguments_to_parser(parser):
                                help="File containing argument which will be "
                                     "forwarded verbatim for Clang-Tidy.")
 
+    analyzer_opts.add_argument('--timeout',
+                               type=int,
+                               dest='timeout',
+                               required=False,
+                               default=argparse.SUPPRESS,
+                               help="The amount of time (in seconds) that "
+                                    "each analyzer can spend, individually, "
+                                    "to analyze the project. If the analysis "
+                                    "of a particular file takes longer than "
+                                    "this time, the analyzer is killed and "
+                                    "the analysis is considered as a failed "
+                                    "one.")
+
     if host_check.is_ctu_capable():
         ctu_opts = parser.add_argument_group(
             "cross translation unit analysis arguments",

diff --git a/libcodechecker/libhandlers/check.py b/libcodechecker/libhandlers/check.py
@@ -243,6 +243,19 @@ def add_arguments_to_parser(parser):
                                     "forwarded verbatim for the Clang-Tidy "
                                     "analyzer.")
 
+    analyzer_opts.add_argument('--timeout',
+                               type=int,
+                               dest='timeout',
+                               required=False,
+                               default=argparse.SUPPRESS,
+                               help="The amount of time (in seconds) that "
+                                    "each analyzer can spend, individually, "
+                                    "to analyze the project. If the analysis "
+                                    "of a particular file takes longer than "
+                                    "this time, the analyzer is killed and "
+                                    "the analysis is considered as a failed "
+                                    "one.")
+
     if host_check.is_ctu_capable():
         ctu_opts = parser.add_argument_group(
             "cross translation unit analysis arguments",
@@ -440,7 +453,8 @@ def __update_if_key_exists(source, target, key):
                           'ctu_phases',
                           'ctu_in_memory',
                           'enable_all',
-                          'ordered_checkers'  # --enable and --disable.
+                          'ordered_checkers',  # --enable and --disable.
+                          'timeout'
                           ]
         for key in args_to_update:
             __update_if_key_exists(args, analyze_args, key)

diff --git a/libcodechecker/util.py b/libcodechecker/util.py
@@ -11,8 +11,10 @@
 import hashlib
 import os
 import re
+import signal
 import socket
 import subprocess
+from threading import Timer
 
 import psutil
 
@@ -150,6 +152,87 @@ def call_command(command, env=None, cwd=None):
         return oerr.strerror, oerr.errno
 
 
+def setup_process_timeout(proc, timeout,
+                          signal_at_timeout=signal.SIGTERM,
+                          failure_callback=None):
+    """
+    Setup a timeout on a process. After `timeout` seconds, the process is
+    killed by `signal_at_timeout` signal.
+
+    :param proc: The subprocess.Process object representing the process to
+      attach the watcher to.
+    :param timeout: The timeout the process is allowed to run for, in seconds.
+      This timeout is counted down some moments after calling
+      setup_process_timeout(), and due to OS scheduling, might not be exact.
+    :param signal_at_timeout: The signal to use when the process exceeds the
+      timeout.
+    :param failure_callback: An optional function which is called when the
+      process is killed by the timeout. This is called with no arguments
+      passed.
+
+    :return: A function is returned which should be called when the client code
+      (usually via subprocess.Process.wait() or
+      subprocess.Process.communicate())) figures out that the called process
+      has terminated. (See below for what this called function returns.)
+    """
+
+    # The watch dict encapsulated the captured variables for the timeout watch.
+    watch = {'pid': proc.pid,
+             'timer': None,  # Will be created later.
+             'counting': False,
+             'killed': False}
+
+    def __kill():
+        """
+        Helper function to execute the killing of a hung process.
+        """
+        LOG.debug("Process {0} has ran for too long, killing it!"
+                  .format(watch['pid']))
+        os.kill(watch['pid'], signal_at_timeout)
+        watch['counting'] = False
+        watch['killed'] = True
+
+        if failure_callback:
+            failure_callback()
+
+    timer = Timer(timeout, __kill)
+    watch['timer'] = timer
+
+    LOG.debug("Setup timeout of {1} for PID {0}".format(proc.pid, timeout))
+    timer.start()
+    watch['counting'] = True
+
+    def __cleanup_timeout():
+        """
+        Stops the timer associated with the timeout watch if the process
+        finished within the grace period.
+
+        Due to race conditions and the possibility of the OS, or another
+        process also using signals to kill the watched process in particular,
+        it is possible that checking subprocess.Process.returncode on the
+        watched process is not enough to see if the timeout watched killed it,
+        or something else.
+
+        (Note: returncode is -N where N is the signal's value, but only on Unix
+        systems!)
+
+        It is safe to call this function multiple times to check for the result
+        of the watching.
+
+        :return: Whether or not the process was killed by the watcher. If this
+          is False, the process could have finished gracefully, or could have
+          been destroyed by other means.
+        """
+        if watch['counting'] and not watch['killed'] and watch['timer']:
+            watch['timer'].cancel()
+            watch['timer'] = None
+            watch['counting'] = False
+
+        return watch['killed']
+
+    return __cleanup_timeout
+
+
 def kill_process_tree(parent_pid):
     proc = psutil.Process(parent_pid)
     children = proc.children()

diff --git a/tests/unit/test_subprocess_timeout.py b/tests/unit/test_subprocess_timeout.py
@@ -0,0 +1,87 @@
+# -----------------------------------------------------------------------------
+#                     The CodeChecker Infrastructure
+#   This file is distributed under the University of Illinois Open Source
+#   License. See LICENSE.TXT for details.
+# -----------------------------------------------------------------------------
+
+"""
+Test if the subprocess timeout watcher works properly.
+"""
+
+import signal
+import subprocess
+import unittest
+
+import psutil
+
+from libcodechecker.util import setup_process_timeout
+
+
+class subprocess_timeoutTest(unittest.TestCase):
+    """
+    Test the process timeout watcher functionality.
+    """
+
+    def testTimeoutWithProcessFinishing(self):
+        """
+        Test if process timeout watcher recognises if a process ended
+        gracefully before the timeout expired.
+        """
+        # Create a process that executes quickly.
+        proc = subprocess.Popen(['echo', 'This process executes quickly!'],
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE)
+        print("Started `echo` with PID {0}".format(proc.pid))
+
+        future = setup_process_timeout(proc, 5, signal.SIGKILL)
+
+        # Simulate waiting for the process.
+        proc.wait()
+
+        # Execution reaches this spot, the process exited, one way or another.
+        killed = future()
+        self.assertFalse(killed,
+                         "Process timeout watcher said it killed the "
+                         "process, but it should have exited long beforehand.")
+
+    def testTimeoutWithLongRunning(self):
+        """
+        Test if process timeout watcher kills the process that runs too long,
+        and properly reports that it was killed.
+        """
+        # Create a process that runs infinitely.
+        proc = subprocess.Popen(['yes'],
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE)
+        print("Started `yes` with PID {0}".format(proc.pid))
+
+        future = setup_process_timeout(proc, 5, signal.SIGKILL)
+
+        # Simulate waiting for the process.
+        proc.wait()
+
+        # Execution reaches this spot, which means the process was killed.
+        # (Or it ran way too long and the OS killed it. Usually tests run
+        # quick enough this isn't the case...)
+        killed = future()
+        self.assertTrue(killed,
+                        "Process timeout watcher said it did not kill the "
+                        "process, but it should have.")
+
+        with self.assertRaises(psutil.NoSuchProcess):
+            # Try to fetch the process from the system. It shouldn't exist.
+            osproc = psutil.Process(proc.pid)
+
+            # There can be rare cases that the OS so quickly recycles the PID.
+            if osproc.exe() != 'yes':
+                # (Let's just say it's very, very, VERY rare that it recycles
+                # the PID to another execution of 'yes'...)
+
+                # If the process exists but it isn't the process we started,
+                # it's the same as if it doesn't existed.
+                raise psutil.NoSuchProcess(proc.pid)
+
+        # NOTE: This assertion is only viable on Unix systems!
+        self.assertEquals(proc.returncode, -signal.SIGKILL,
+                          "`yes` died in a way that it wasn't the process "
+                          "timeout watcher killing it.")