Fix reported times for batched instrumentation tests

Changes the test runner to add a status report to each test, reporting the duration in ms of the test. I've chosen to account for the overhead of starting instrumentation, etc. by adding the overhead duration to the runtime of the first test in the batch. This means that one unlucky test in the batch will seem really slow from looking at reported durations, but means the overall duration for the batch will be correct. See bot results for android-marshmallow-arm64-rel, chrome_public_test_apk, org.chromium.components.external_intents.ExternalNavigationHandlerTest to see what this looks like in practice. Bug: 989569 Change-Id: I93aae5e52ea51bb635bb13ce6b6db6e4e8bb55ca Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2212491 Commit-Queue: Michael Thiessen <mthiesse@chromium.org> Reviewed-by: Andrew Grieve <agrieve@chromium.org> Cr-Commit-Position: refs/heads/master@{#771855}
marcosholgado · May 26, 2020 · 56935ae · 56935ae
1 parent 1849602
commit 56935ae
Show file tree

Hide file tree

Showing 8 changed files with 109 additions and 55 deletions.
diff --git a/base/test/android/javatests/src/org/chromium/base/test/BaseJUnit4ClassRunner.java b/base/test/android/javatests/src/org/chromium/base/test/BaseJUnit4ClassRunner.java
@@ -6,6 +6,8 @@
 
 import android.app.Application;
 import android.content.Context;
+import android.os.Bundle;
+import android.os.SystemClock;
 import android.support.test.InstrumentationRegistry;
 import android.support.test.internal.runner.junit4.AndroidJUnit4ClassRunner;
 import android.support.test.internal.util.AndroidRunnerParams;
@@ -16,6 +18,7 @@
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;
 import org.junit.runner.Description;
+import org.junit.runner.notification.RunListener;
 import org.junit.runner.notification.RunNotifier;
 import org.junit.runners.model.FrameworkMethod;
 import org.junit.runners.model.InitializationError;
@@ -51,6 +54,11 @@ public class BaseJUnit4ClassRunner extends AndroidJUnit4ClassRunner {
     private static final String EXTRA_TRACE_FILE =
             "org.chromium.base.test.BaseJUnit4ClassRunner.TraceFile";
 
+    // Arbirary int that must not overlap with status codes defined by
+    // https://developer.android.com/reference/android/test/InstrumentationTestRunner.html#REPORT_VALUE_ID
+    private static final int STATUS_CODE_TEST_DURATION = 1337;
+    private static final String DURATION_BUNDLE_ID = "duration_ms";
+
     /**
      * Create a BaseJUnit4ClassRunner to run {@code klass} and initialize values.
      *
@@ -250,7 +258,25 @@ protected void runChild(FrameworkMethod method, RunNotifier notifier) {
 
         runPreTestHooks(method);
 
+        final long start = SystemClock.uptimeMillis();
+        RunListener listener = new RunListener() {
+            @Override
+            public void testFinished(Description description) {
+                long end = SystemClock.uptimeMillis();
+                Bundle b = new Bundle();
+                b.putLong(DURATION_BUNDLE_ID, end - start);
+                InstrumentationRegistry.getInstrumentation().sendStatus(
+                        STATUS_CODE_TEST_DURATION, b);
+            }
+        };
+        // Add listener ahead of other listeners to make parsing easier -
+        // Instrumentation prints the status code after the values corresponding
+        // to that status code, so if we output this status first, we can just
+        // ignore the status code line and have the parser include the values
+        // with the test compete bundle.
+        notifier.addFirstListener(listener);
         super.runChild(method, notifier);
+        notifier.removeListener(listener);
 
         TestTraceEvent.end(testName);
 

diff --git a/base/test/android/javatests/src/org/chromium/base/test/util/Batch.java b/base/test/android/javatests/src/org/chromium/base/test/util/Batch.java
@@ -10,43 +10,39 @@
 import java.lang.annotation.Target;
 
 /**
- * Annotation to indicate that this collection of tests is safe to run in batches, where the
- * Instrumentation Runner (and hence the process) does not need to be restarted between these tests.
+ * Annotation to indicate that this collection of tests is safe to run in
+ * batches, where the Instrumentation Runner (and hence the process) does not
+ * need to be restarted between these tests.
  *
- * The value passed to this annotation determines which test suites may be run together in the same
- * batch - batches that share a batch name will run in the same Instrumentation invocation. The
- * default value (empty) means the suite runs as its own batch, and the process is restarted before
- * and after the suite.
+ * The value passed to this annotation determines which test suites may be run
+ * together in the same batch - batches that share a batch name will run in the
+ * same Instrumentation invocation. The default value (empty) means the suite
+ * runs as its own batch, and the process is restarted before and after the
+ * suite.
  *
- * This makes the tests run significantly faster, but you must be careful not to persist changes to
- * global state that could cause other tests in the batch to fail.
+ * This makes the tests run significantly faster, but you must be careful not to
+ * persist changes to global state that could cause other tests in the batch to
+ * fail.
  *
  * @Before/@After will run as expected before/after each test method.
- * @BeforeClass/@AfterClass may be used for one-time initialization across all tests within a single
- * suite. Tests wishing to share one-time initialization across suites in the same batch will need
- * to explicitly coordinate.
+ * @BeforeClass/@AfterClass may be used for one-time initialization across all
+ * tests within a single suite. Tests wishing to share one-time initialization
+ * across suites in the same batch will need to explicitly coordinate.
  */
 @Target(ElementType.TYPE)
 @Retention(RetentionPolicy.RUNTIME)
 public @interface Batch {
-    public String value();
-
-    /**
-     * Batch name for test suites that are not safe to run batched across multiple suites. The
-     * process will not be restarted before each test within this suite, but will be restarted
-     * before and after this suite runs.
-     */
-    public static final String PER_CLASS = "";
+    public String value() default "";
 
     /**
      * Globally shared name for unit tests that can all be batched together.
      *
-     * Unit tests must be careful not to persist any changes to global state, or flakes are likely
-     * to occur.
+     * Unit tests must be careful not to persist any changes to global state, or
+     * flakes are likely to occur.
      *
-     * An exception to this is loading Chrome's native library (eg. using NativeLibraryTestRule).
-     * Your unit tests must assume that the native library may have already been loaded by another
-     * test.
+     * An exception to this is loading Chrome's native library (eg. using
+     * NativeLibraryTestRule). Your unit tests must assume that the native
+     * library may have already been loaded by another test.
      */
     public static final String UNIT_TESTS = "UnitTests";
 }
diff --git a/build/android/pylib/instrumentation/instrumentation_parser.py b/build/android/pylib/instrumentation/instrumentation_parser.py
@@ -20,6 +20,8 @@
 # http://junit.org/junit4/javadoc/4.12/org/junit/AssumptionViolatedException.html
 STATUS_CODE_ASSUMPTION_FAILURE = -4
 
+STATUS_CODE_TEST_DURATION = 1337
+
 # http://developer.android.com/reference/android/app/Activity.html
 RESULT_CODE_OK = -1
 RESULT_CODE_CANCELED = 0
@@ -77,6 +79,10 @@ def join_bundle_values(bundle):
           key, value = value.split('=', 1)
           bundle[header][key] = [value]
         elif header == 'STATUS_CODE':
+          # To make parsing easier treat this as part of the next status by
+          # skipping it.
+          if int(value) == STATUS_CODE_TEST_DURATION:
+            continue
           yield int(value), join_bundle_values(bundle['STATUS'])
           bundle['STATUS'] = {}
         elif header == 'CODE':

diff --git a/build/android/pylib/instrumentation/instrumentation_test_instance.py b/build/android/pylib/instrumentation/instrumentation_test_instance.py
@@ -61,6 +61,23 @@
 _NATIVE_CRASH_RE = re.compile('(process|native) crash', re.IGNORECASE)
 _PICKLE_FORMAT_VERSION = 12
 
+# The ID of the bundle value Instrumentation uses to report which test index the
+# results are for in a collection of tests. Note that this index is 1-based.
+_BUNDLE_CURRENT_ID = 'current'
+# The ID of the bundle value Instrumentation uses to report the test class.
+_BUNDLE_CLASS_ID = 'class'
+# The ID of the bundle value Instrumentation uses to report the test name.
+_BUNDLE_TEST_ID = 'test'
+# The ID of the bundle value Instrumentation uses to report if a test was
+# skipped.
+_BUNDLE_SKIPPED_ID = 'test_skipped'
+# The ID of the bundle value Instrumentation uses to report the crash stack, if
+# the test crashed.
+_BUNDLE_STACK_ID = 'stack'
+
+# The ID of the bundle value Chrome uses to report the test duration.
+_BUNDLE_DURATION_ID = 'duration_ms'
+
 
 class MissingSizeAnnotationError(test_exception.TestException):
   def __init__(self, class_name):
@@ -103,9 +120,8 @@ def ParseAmInstrumentRawOutput(raw_output):
   return (code, bundle, statuses)
 
 
-def GenerateTestResults(
-    result_code, result_bundle, statuses, start_ms, duration_ms, device_abi,
-    symbolizer):
+def GenerateTestResults(result_code, result_bundle, statuses, duration_ms,
+                        device_abi, symbolizer):
   """Generate test results from |statuses|.
 
   Args:
@@ -116,7 +132,6 @@ def GenerateTestResults(
       - the bundle dump as a dict mapping string keys to string values
       Note that this is the same as the third item in the 3-tuple returned by
       |_ParseAmInstrumentRawOutput|.
-    start_ms: The start time of the test in milliseconds.
     duration_ms: The duration of the test in milliseconds.
     device_abi: The device_abi, which is needed for symbolization.
     symbolizer: The symbolizer used to symbolize stack.
@@ -129,10 +144,11 @@ def GenerateTestResults(
   results = []
 
   current_result = None
+  cumulative_duration = 0
 
   for status_code, bundle in statuses:
-    test_class = bundle.get('class', '')
-    test_method = bundle.get('test', '')
+    test_class = bundle.get(_BUNDLE_CLASS_ID, '')
+    test_method = bundle.get(_BUNDLE_TEST_ID, '')
     if test_class and test_method:
       test_name = '%s#%s' % (test_class, test_method)
     else:
@@ -142,10 +158,18 @@ def GenerateTestResults(
       if current_result:
         results.append(current_result)
       current_result = test_result.InstrumentationTestResult(
-          test_name, base_test_result.ResultType.UNKNOWN, start_ms, duration_ms)
+          test_name, base_test_result.ResultType.UNKNOWN, duration_ms)
     else:
+      # For the first result, duration will be set below to the difference
+      # between the reported and actual durations to account for overhead like
+      # starting instrumentation.
+      if bundle.get(_BUNDLE_CURRENT_ID, 1) != 1:
+        current_duration = int(bundle.get(_BUNDLE_DURATION_ID, duration_ms))
+        current_result.SetDuration(current_duration)
+        cumulative_duration += current_duration
+
       if status_code == instrumentation_parser.STATUS_CODE_OK:
-        if bundle.get('test_skipped', '').lower() in ('true', '1', 'yes'):
+        if bundle.get(_BUNDLE_SKIPPED_ID, '').lower() in ('true', '1', 'yes'):
           current_result.SetType(base_test_result.ResultType.SKIP)
         elif current_result.GetType() == base_test_result.ResultType.UNKNOWN:
           current_result.SetType(base_test_result.ResultType.PASS)
@@ -159,15 +183,13 @@ def GenerateTestResults(
           logging.error('Unrecognized status code %d. Handling as an error.',
                         status_code)
         current_result.SetType(base_test_result.ResultType.FAIL)
-    if 'stack' in bundle:
+    if _BUNDLE_STACK_ID in bundle:
       if symbolizer and device_abi:
-        current_result.SetLog(
-            '%s\n%s' % (
-              bundle['stack'],
-              '\n'.join(symbolizer.ExtractAndResolveNativeStackTraces(
-                  bundle['stack'], device_abi))))
+        current_result.SetLog('%s\n%s' % (bundle[_BUNDLE_STACK_ID], '\n'.join(
+            symbolizer.ExtractAndResolveNativeStackTraces(
+                bundle[_BUNDLE_STACK_ID], device_abi))))
       else:
-        current_result.SetLog(bundle['stack'])
+        current_result.SetLog(bundle[_BUNDLE_STACK_ID])
 
   if current_result:
     if current_result.GetType() == base_test_result.ResultType.UNKNOWN:
@@ -179,6 +201,9 @@ def GenerateTestResults(
 
     results.append(current_result)
 
+  if results:
+    results[0].SetDuration(duration_ms - cumulative_duration)
+
   return results
 
 
@@ -1016,11 +1041,10 @@ def ParseAmInstrumentRawOutput(raw_output):
     return ParseAmInstrumentRawOutput(raw_output)
 
   @staticmethod
-  def GenerateTestResults(
-      result_code, result_bundle, statuses, start_ms, duration_ms,
-      device_abi, symbolizer):
+  def GenerateTestResults(result_code, result_bundle, statuses, duration_ms,
+                          device_abi, symbolizer):
     return GenerateTestResults(result_code, result_bundle, statuses,
-                               start_ms, duration_ms, device_abi, symbolizer)
+                               duration_ms, device_abi, symbolizer)
 
   #override
   def TearDown(self):

diff --git a/build/android/pylib/instrumentation/instrumentation_test_instance_test.py b/build/android/pylib/instrumentation/instrumentation_test_instance_test.py
@@ -761,7 +761,7 @@ def testGetTests_multipleAnnotationValuesRequested(self):
 
   def testGenerateTestResults_noStatus(self):
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, [], 0, 1000, None, None)
+        None, None, [], 1000, None, None)
     self.assertEqual([], results)
 
   def testGenerateTestResults_testPassed(self):
@@ -776,7 +776,7 @@ def testGenerateTestResults_testPassed(self):
       }),
     ]
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, statuses, 0, 1000, None, None)
+        None, None, statuses, 1000, None, None)
     self.assertEqual(1, len(results))
     self.assertEqual(base_test_result.ResultType.PASS, results[0].GetType())
 
@@ -797,7 +797,7 @@ def testGenerateTestResults_testSkipped_true(self):
       }),
     ]
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, statuses, 0, 1000, None, None)
+        None, None, statuses, 1000, None, None)
     self.assertEqual(1, len(results))
     self.assertEqual(base_test_result.ResultType.SKIP, results[0].GetType())
 
@@ -816,7 +816,7 @@ def testGenerateTestResults_testSkipped_false(self):
       }),
     ]
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, statuses, 0, 1000, None, None)
+        None, None, statuses, 1000, None, None)
     self.assertEqual(1, len(results))
     self.assertEqual(base_test_result.ResultType.PASS, results[0].GetType())
 
@@ -832,7 +832,7 @@ def testGenerateTestResults_testFailed(self):
       }),
     ]
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, statuses, 0, 1000, None, None)
+        None, None, statuses, 1000, None, None)
     self.assertEqual(1, len(results))
     self.assertEqual(base_test_result.ResultType.FAIL, results[0].GetType())
 
@@ -850,7 +850,7 @@ def testGenerateTestResults_testUnknownException(self):
       }),
     ]
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, statuses, 0, 1000, None, None)
+        None, None, statuses, 1000, None, None)
     self.assertEqual(1, len(results))
     self.assertEqual(base_test_result.ResultType.FAIL, results[0].GetType())
     self.assertEqual(stacktrace, results[0].GetLog())
@@ -867,7 +867,7 @@ def testGenerateJUnitTestResults_testSkipped_true(self):
       }),
     ]
     results = instrumentation_test_instance.GenerateTestResults(
-        None, None, statuses, 0, 1000, None, None)
+        None, None, statuses, 1000, None, None)
     self.assertEqual(1, len(results))
     self.assertEqual(base_test_result.ResultType.SKIP, results[0].GetType())
 

diff --git a/build/android/pylib/instrumentation/test_result.py b/build/android/pylib/instrumentation/test_result.py
@@ -8,13 +8,12 @@
 class InstrumentationTestResult(base_test_result.BaseTestResult):
   """Result information for a single instrumentation test."""
 
-  def __init__(self, full_name, test_type, start_date, dur, log=''):
+  def __init__(self, full_name, test_type, dur, log=''):
     """Construct an InstrumentationTestResult object.
 
     Args:
       full_name: Full name of the test.
       test_type: Type of the test result as defined in ResultType.
-      start_date: Date in milliseconds when the test began running.
       dur: Duration of the test run in milliseconds.
       log: A string listing any errors.
     """
@@ -27,4 +26,7 @@ def __init__(self, full_name, test_type, start_date, dur, log=''):
     else:
       self._class_name = full_name
       self._test_name = full_name
-    self._start_date = start_date
+
+  def SetDuration(self, duration):
+    """Set the test duration."""
+    self._duration = duration
diff --git a/build/android/pylib/local/device/local_device_instrumentation_test_run.py b/build/android/pylib/local/device/local_device_instrumentation_test_run.py
@@ -624,7 +624,7 @@ def name_and_timeout(t):
         result_code, result_bundle, statuses = (
             self._test_instance.ParseAmInstrumentRawOutput(output))
         results = self._test_instance.GenerateTestResults(
-            result_code, result_bundle, statuses, start_ms, duration_ms,
+            result_code, result_bundle, statuses, duration_ms,
             device.product_cpu_abi, self._test_instance.symbolizer)
 
       if self._env.trace_output:

diff --git a/testing/android/docs/instrumentation.md b/testing/android/docs/instrumentation.md
@@ -74,7 +74,7 @@ others are pulled in from outside. They include:
 
 #### Test Batching
 
-The [`@Batch("group_name")`](https://chromium.googlesource.com/chromium/src/+/master/base/test/android/javatests/src/org/chromium/base/test/util/UnitTest.java)
+The [`@Batch("group_name")`](https://chromium.googlesource.com/chromium/src/+/master/base/test/android/javatests/src/org/chromium/base/test/util/Batch.java)
 annotation is used to run all tests with the same batch group name in the same
 instrumentation invocation. In other words, the browser process is not
 restarted between these tests, and so any changes to global state, like