Skip to content

Commit

Permalink
Requiring confidence in initial regression range before bisecting.
Browse files Browse the repository at this point in the history
BUG=422727

Review URL: https://codereview.chromium.org/644323002

Cr-Commit-Position: refs/heads/master@{#300196}
  • Loading branch information
ro-berto authored and Commit bot committed Oct 18, 2014
1 parent 7beb8e1 commit 4436048
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 4 deletions.
22 changes: 22 additions & 0 deletions tools/auto_bisect/bisect_perf_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
os.path.dirname(__file__), os.path.pardir, 'telemetry'))

from bisect_results import BisectResults
from bisect_results import ConfidenceScore
import bisect_utils
import builder
import math_utils
Expand Down Expand Up @@ -169,6 +170,9 @@

# The percentage at which confidence is considered high.
HIGH_CONFIDENCE = 95
# The confidence percentage we require to consider the initial range a
# regression based on the test results of the inital good and bad revisions.
REGRESSION_CONFIDENCE = 95

# Patch template to add a new file, DEPS.sha under src folder.
# This file contains SHA1 value of the DEPS changes made while bisecting
Expand Down Expand Up @@ -2471,6 +2475,19 @@ def Run(self, command_to_run, bad_revision_in, good_revision_in, metric):
return results
print message, "Therefore we continue to bisect."

# Check how likely it is that the good and bad results are different
# beyond chance-induced variation.
if not self.opts.debug_ignore_regression_confidence:
regression_confidence = ConfidenceScore(known_bad_value['values'],
known_good_value['values'])
if regression_confidence < REGRESSION_CONFIDENCE:
results.error = ('We could not reproduce the regression with this '
'test/metric/platform combination with enough '
'confidence. There\'s still a chance that this is '
'actually a regression, but you may need to bisect '
'a different platform.')
return results

# Can just mark the good and bad revisions explicitly here since we
# already know the results.
bad_revision_data = revision_data[revision_list[0]]
Expand Down Expand Up @@ -2968,6 +2985,7 @@ def __init__(self):
self.debug_ignore_build = None
self.debug_ignore_sync = None
self.debug_ignore_perf_test = None
self.debug_ignore_regression_confidence = None
self.debug_fake_first_test_mean = 0
self.gs_bucket = None
self.target_arch = 'ia32'
Expand Down Expand Up @@ -3135,6 +3153,10 @@ def _CreateCommandLineParser():
group.add_option('--debug_ignore_perf_test',
action='store_true',
help='DEBUG: Don\'t perform performance tests.')
group.add_option('--debug_ignore_regression_confidence',
action='store_true',
help='DEBUG: Don\'t score the confidence of the initial '
'good and bad revisions\' test results.')
group.add_option('--debug_fake_first_test_mean',
type='int',
default='0',
Expand Down
92 changes: 88 additions & 4 deletions tools/auto_bisect/bisect_perf_regression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,75 @@
import source_control


# Regression confidence: 0%
CLEAR_NON_REGRESSION = [
# Mean: 30.223 Std. Dev.: 11.383
[[16.886], [16.909], [16.99], [17.723], [17.952], [18.118], [19.028],
[19.552], [21.954], [38.573], [38.839], [38.965], [40.007], [40.572],
[41.491], [42.002], [42.33], [43.109], [43.238]],
# Mean: 34.76 Std. Dev.: 11.516
[[16.426], [17.347], [20.593], [21.177], [22.791], [27.843], [28.383],
[28.46], [29.143], [40.058], [40.303], [40.558], [41.918], [42.44],
[45.223], [46.494], [50.002], [50.625], [50.839]]
]
# Regression confidence: ~ 90%
ALMOST_REGRESSION = [
# Mean: 30.042 Std. Dev.: 2.002
[[26.146], [28.04], [28.053], [28.074], [28.168], [28.209], [28.471],
[28.652], [28.664], [30.862], [30.973], [31.002], [31.897], [31.929],
[31.99], [32.214], [32.323], [32.452], [32.696]],
# Mean: 33.008 Std. Dev.: 4.265
[[34.963], [30.741], [39.677], [39.512], [34.314], [31.39], [34.361],
[25.2], [30.489], [29.434]]
]
# Regression confidence: ~ 98%
BARELY_REGRESSION = [
# Mean: 28.828 Std. Dev.: 1.993
[[26.96], [27.605], [27.768], [27.829], [28.006], [28.206], [28.393],
[28.911], [28.933], [30.38], [30.462], [30.808], [31.74], [31.805],
[31.899], [32.077], [32.454], [32.597], [33.155]],
# Mean: 31.156 Std. Dev.: 1.980
[[28.729], [29.112], [29.258], [29.454], [29.789], [30.036], [30.098],
[30.174], [30.534], [32.285], [32.295], [32.552], [32.572], [32.967],
[33.165], [33.403], [33.588], [33.744], [34.147], [35.84]]
]
# Regression confidence: 99.5%
CLEAR_REGRESSION = [
# Mean: 30.254 Std. Dev.: 2.987
[[26.494], [26.621], [26.701], [26.997], [26.997], [27.05], [27.37],
[27.488], [27.556], [31.846], [32.192], [32.21], [32.586], [32.596],
[32.618], [32.95], [32.979], [33.421], [33.457], [34.97]],
# Mean: 33.190 Std. Dev.: 2.972
[[29.547], [29.713], [29.835], [30.132], [30.132], [30.33], [30.406],
[30.592], [30.72], [34.486], [35.247], [35.253], [35.335], [35.378],
[35.934], [36.233], [36.41], [36.947], [37.982]]
]
# Default options for the dry run
DEFAULT_OPTIONS = {
'debug_ignore_build': True,
'debug_ignore_sync': True,
'debug_ignore_perf_test': True,
'debug_ignore_regression_confidence': True,
'command': 'fake_command',
'metric': 'fake/metric',
'good_revision': 280000,
'bad_revision': 280005,
}

# This global is a placeholder for a generator to be defined by the testcases
# that use _MockRunTest
_MockResultsGenerator = (x for x in [])

def _FakeTestResult(values):
result_dict = {'mean': 0.0, 'std_err': 0.0, 'std_dev': 0.0, 'values': values}
success_code = 0
return (result_dict, success_code)


def _MockRunTests(*args, **kwargs):
_, _ = args, kwargs
return _FakeTestResult(_MockResultsGenerator.next())


def _GetBisectPerformanceMetricsInstance(options_dict):
"""Returns an instance of the BisectPerformanceMetrics class."""
Expand All @@ -37,12 +95,13 @@ def _GetBisectPerformanceMetricsInstance(options_dict):
return bisect_instance


def _GetExtendedOptions(d, f):
def _GetExtendedOptions(improvement_dir, fake_first, ignore_confidence=True):
"""Returns the a copy of the default options dict plus some options."""
result = dict(DEFAULT_OPTIONS)
result.update({
'improvement_direction': d,
'debug_fake_first_test_mean': f})
'improvement_direction': improvement_dir,
'debug_fake_first_test_mean': fake_first,
'debug_ignore_regression_confidence': ignore_confidence})
return result


Expand Down Expand Up @@ -301,11 +360,11 @@ def testDryRun(self):

def testBisectImprovementDirectionFails(self):
"""Dry run of a bisect with an improvement instead of regression."""

# Test result goes from 0 to 100 where higher is better
results = _GenericDryRun(_GetExtendedOptions(1, 100))
self.assertIsNotNone(results.error)
self.assertIn('not a regression', results.error)

# Test result goes from 0 to -100 where lower is better
results = _GenericDryRun(_GetExtendedOptions(-1, -100))
self.assertIsNotNone(results.error)
Expand All @@ -320,6 +379,31 @@ def testBisectImprovementDirectionSucceeds(self):
results = _GenericDryRun(_GetExtendedOptions(1, -100))
self.assertIsNone(results.error)

@mock.patch('bisect_perf_regression.BisectPerformanceMetrics.'
'RunPerformanceTestAndParseResults', _MockRunTests)
def testBisectStopsOnDoubtfulRegression(self):
global _MockResultsGenerator
_MockResultsGenerator = (rs for rs in CLEAR_NON_REGRESSION)
results = _GenericDryRun(_GetExtendedOptions(0, 0, False))
self.assertIsNotNone(results.error)
self.assertIn('could not reproduce the regression', results.error)

_MockResultsGenerator = (rs for rs in ALMOST_REGRESSION)
results = _GenericDryRun(_GetExtendedOptions(0, 0, False))
self.assertIsNotNone(results.error)
self.assertIn('could not reproduce the regression', results.error)

@mock.patch('bisect_perf_regression.BisectPerformanceMetrics.'
'RunPerformanceTestAndParseResults', _MockRunTests)
def testBisectContinuesOnClearRegression(self):
global _MockResultsGenerator
_MockResultsGenerator = (rs for rs in CLEAR_REGRESSION)
with self.assertRaises(StopIteration):
_GenericDryRun(_GetExtendedOptions(0, 0, False))

_MockResultsGenerator = (rs for rs in BARELY_REGRESSION)
with self.assertRaises(StopIteration):
_GenericDryRun(_GetExtendedOptions(0, 0, False))

def testGetCommitPosition(self):
cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'
Expand Down

0 comments on commit 4436048

Please sign in to comment.