Skip to content

Added documentation and test coverage. #10035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ compile_commands.json
8
4
SortedCFDatabase.def
htmlcov
.coverage
206 changes: 126 additions & 80 deletions benchmark/scripts/compare_perf_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,22 @@
from math import sqrt


class PerformanceTestResult:
class PerformanceTestResult(object):
"""PerformanceTestResult holds results from executing an individual
benchmark from the Swift Benchmark Suite as reported by the test driver
(Benchmark_O, Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).

It depends on the log format emitted by the test driver in the form:
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)

The last column, MAX_RSS, is emitted only for runs instrumented by the
Benchmark_Driver to measure rough memory use during the execution of the
benchmark.
"""
def __init__(self, csv_row):
"""PerformanceTestResult instance is created from an iterable with
length of 8 or 9. (Like a row provided by the CSV parser.)
"""
# csv_row[0] is just an ordinal number of the test - skip that
self.name = csv_row[1] # Name of the performance test
self.samples = int(csv_row[2]) # Number of measurement samples taken
Expand All @@ -36,25 +50,41 @@ def __init__(self, csv_row):
self.median = int(csv_row[7]) # Median runtime (ms)
self.max_rss = ( # Maximum Resident Set Size (B)
int(csv_row[8]) if len(csv_row) > 8 else None)
# TODO if we really want to compute mean MAX_RSS: self.S_memory

def __repr__(self):
return (
'<PerformanceTestResult name:{0.name!r} '
'samples:{0.samples!r} min:{0.min!r} max:{0.max!r} '
'mean:{0.mean!r} sd:{0.sd!r} median:{0.median!r}>'.format(self))

@property
def sd(self): # Standard Deviation (ms)
def sd(self):
"""Standard Deviation (ms)"""
return (0 if self.samples < 2 else
sqrt(self.S_runtime / (self.samples - 1)))

# Compute running variance, B. P. Welford's method
# See Knuth TAOCP vol 2, 3rd edition, page 232, or
# https://www.johndcook.com/blog/standard_deviation/
# M is mean, Standard Deviation is defined as sqrt(S/k-1)
@staticmethod
def running_mean_variance((k, M_, S_), x):
"""
Compute running variance, B. P. Welford's method
See Knuth TAOCP vol 2, 3rd edition, page 232, or
https://www.johndcook.com/blog/standard_deviation/
M is mean, Standard Deviation is defined as sqrt(S/k-1)
"""
k = float(k + 1)
M = M_ + (x - M_) / k
S = S_ + (x - M_) * (x - M)
return (k, M, S)

def merge(self, r):
"""Merging test results recomputes min and max.
It attempts to recompute mean and standard deviation when all_samples
are available. There is no correct way to compute these values from
test results that are summaries from more than 3 samples.

The use case here is comparing tests results parsed from concatenated
log files from multiple runs of benchmark driver.
"""
self.min = min(self.min, r.min)
self.max = max(self.max, r.max)
# self.median = None # unclear what to do here
Expand All @@ -65,23 +95,31 @@ def push(x):
(self.samples, self.mean, self.S_runtime) = state

# Merging test results with up to 3 samples is exact
# TODO investigate how to best handle merge of higher sample counts
values = [r.min, r.max, r.median, r.mean][:min(r.samples, 4)]
values = [r.min, r.max, r.median][:min(r.samples, 3)]
map(push, values)

# Column labels for header row in results table
header = ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS')

# Tuple of values formatted for display in results table:
# (name, min value, max value, mean value, max_rss)
def values(self):
return (self.name, str(self.min), str(self.max), str(int(self.mean)),
str(self.max_rss) if self.max_rss else '-')


class ResultComparison:
"""Values property for display in results table comparisons
in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
"""
return (
self.name,
str(self.min), str(self.max), str(int(self.mean)),
str(self.max_rss) if self.max_rss else '—'
)


class ResultComparison(object):
"""ResultComparison compares MINs from new and old PerformanceTestResult.
It computes speedup ratio and improvement delta (%).
"""
def __init__(self, old, new):
self.old = old
self.new = new
assert(old.name == new.name)
self.name = old.name # Test name, convenience accessor

# Speedup ratio
Expand All @@ -91,27 +129,43 @@ def __init__(self, old, new):
ratio = (new.min + 0.001) / (old.min + 0.001)
self.delta = ((ratio - 1) * 100)

self.is_dubious = ( # FIXME this is legacy
# Add ' (?)' to the speedup column as indication of dubious changes:
# result's MIN falls inside the (MIN, MAX) interval of result they are
# being compared with.
self.is_dubious = (
' (?)' if ((old.min < new.min and new.min < old.max) or
(new.min < old.min and old.min < new.max))
else '')

# Column labels for header row in results table
header = ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP')

# Tuple of values formatted for display in results table:
# (name, old value, new value, delta [%], speedup ratio)
def values(self):
return (self.name, str(self.old.min), str(self.new.min),
"""Values property for display in results table comparisons
in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
"""
return (self.name,
str(self.old.min), str(self.new.min),
'{0:+.1f}%'.format(self.delta),
'{0:.2f}x{1}'.format(self.ratio, self.is_dubious))


class TestComparator:
def __init__(self, old_file, new_file, delta_threshold, changes_only):
class TestComparator(object):
"""TestComparator parses `PerformanceTestResult`s from CSV log files.
Then it determines which tests were `added`, `removed` and which can be
compared. It then splits the `ResultComparison`s into 3 groups according to
the `delta_threshold` by the change in performance: `increased`,
`descreased` and `unchanged`.

The lists of `added`, `removed` and `unchanged` tests are sorted
alphabetically. The `increased` and `decreased` lists are sorted in
descending order by the amount of change.
"""
def __init__(self, old_file, new_file, delta_threshold):

def load_from_CSV(filename): # handles output from Benchmark_O and
def skip_totals(row): # Benchmark_Driver (added MAX_RSS column)
return len(row) > 7 and row[0].isdigit()
return len(row) > 7 and row[0].isdigit()
tests = map(PerformanceTestResult,
filter(skip_totals, csv.reader(open(filename))))

Expand All @@ -131,9 +185,9 @@ def add_or_merge(names, r):
added_tests = new_tests.difference(old_tests)
removed_tests = old_tests.difference(new_tests)

self.added = sorted(map(lambda t: new_results[t], added_tests),
self.added = sorted([new_results[t] for t in added_tests],
key=lambda r: r.name)
self.removed = sorted(map(lambda t: old_results[t], removed_tests),
self.removed = sorted([old_results[t] for t in removed_tests],
key=lambda r: r.name)

def compare(name):
Expand All @@ -144,24 +198,28 @@ def compare(name):
def partition(l, p):
return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], []))

# TODO take standard deviation (SD) into account
decreased, not_decreased = partition(
comparisons, lambda c: c.ratio < (1 - delta_threshold))
increased, unchanged = partition(
not_decreased, lambda c: c.ratio > (1 + delta_threshold))

# sorted partitions
names = map(lambda c: c.name, comparisons)
names = [c.name for c in comparisons]
comparisons = dict(zip(names, comparisons))
self.decreased = map(lambda c: comparisons[c.name],
sorted(decreased, key=lambda c: -c.delta))
self.increased = map(lambda c: comparisons[c.name],
sorted(increased, key=lambda c: c.delta))
self.unchanged = map(lambda c: comparisons[c.name],
sorted(unchanged, key=lambda c: c.name))


class ReportFormatter:
self.decreased = [comparisons[c.name]
for c in sorted(decreased, key=lambda c: -c.delta)]
self.increased = [comparisons[c.name]
for c in sorted(increased, key=lambda c: c.delta)]
self.unchanged = [comparisons[c.name]
for c in sorted(unchanged, key=lambda c: c.name)]


class ReportFormatter(object):
"""ReportFormatter formats the `PerformanceTestResult`s and
`ResultComparison`s provided by `TestComparator` using their `header` and
`values()` into report table. Supported formats are: `markdown` (used for
displaying benchmark results on GitHub), `git` and `html`.
"""
def __init__(self, comparator, old_branch, new_branch, changes_only):
self.comparator = comparator
self.old_branch = old_branch
Expand All @@ -178,38 +236,39 @@ def __init__(self, comparator, old_branch, new_branch, changes_only):
{0} ({1}): {2}"""

def markdown(self):
return self.__formatted_text(
return self._formatted_text(
ROW='{0} | {1} | {2} | {3} | {4} \n',
HEADER_SEPARATOR='---',
DETAIL=self.MARKDOWN_DETAIL)

def git(self):
return self.__formatted_text(
return self._formatted_text(
ROW='{0} {1} {2} {3} {4} \n',
HEADER_SEPARATOR=' ',
DETAIL=self.GIT_DETAIL)

def __column_widths(self):
def _column_widths(self):
changed = self.comparator.decreased + self.comparator.increased
comparisons = (changed if self.changes_only else
changed + self.comparator.unchanged)
comparisons += self.comparator.added + self.comparator.removed

values = map(lambda c: c.values(), comparisons)
widths = map(lambda columns: map(len, columns),
[PerformanceTestResult.header, ResultComparison.header] +
values)
widths = [
map(len, columns) for columns in
[PerformanceTestResult.header, ResultComparison.header] +
[c.values() for c in comparisons]
]

def max_widths(maximum, widths):
return tuple(map(max, zip(maximum, widths)))

return reduce(max_widths, widths, tuple([0] * 5))

def __formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL):
widths = self.__column_widths()
def _formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL):
widths = self._column_widths()

def justify_columns(contents):
return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents)))
return tuple([c.ljust(w) for w, c in zip(widths, contents)])

def row(contents):
return ROW.format(*justify_columns(contents))
Expand Down Expand Up @@ -318,8 +377,8 @@ def table(title, results, speedup_color):
]))


def main():

def parse_args(args):
"""Parse command line arguments and set default values."""
parser = argparse.ArgumentParser(description='Compare Performance tests.')
parser.add_argument('--old-file',
help='Baseline performance test suite (csv file)',
Expand All @@ -339,42 +398,29 @@ def main():
parser.add_argument('--old-branch',
help='Name of the old branch', default='OLD_MIN')
parser.add_argument('--delta-threshold',
help='Delta threshold. Default 0.05.', default='0.05')
help='Delta threshold. Default 0.05.',
type=float, default=0.05)
return parser.parse_args(args)


args = parser.parse_args()
def main():
args = parse_args(sys.argv[1:])
comparator = TestComparator(args.old_file, args.new_file,
float(args.delta_threshold), args.changes_only)
args.delta_threshold)
formatter = ReportFormatter(comparator, args.old_branch, args.new_branch,
args.changes_only)

if args.format:
if args.format.lower() != 'markdown':
print(formatter.git())
else:
print(formatter.markdown())

if args.format:
if args.format.lower() == 'html':
if args.output:
write_to_file(args.output, formatter.html())
else:
print('Error: missing --output flag.')
sys.exit(1)
elif args.format.lower() == 'markdown':
if args.output:
write_to_file(args.output, formatter.markdown())
elif args.format.lower() != 'git':
print('{0} is unknown format.'.format(args.format))
sys.exit(1)


def write_to_file(file_name, data):
"""
Write data to given file
"""
file = open(file_name, 'w')
file.write(data)
file.close
formats = {
'markdown': formatter.markdown,
'git': formatter.git,
'html': formatter.html
}

report = formats[args.format]()
print(report)

if args.output:
with open(args.output, 'w') as f:
f.write(report)


if __name__ == '__main__':
Expand Down
Loading