-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_summary
670 lines (606 loc) · 26.8 KB
/
test_summary
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
#!/usr/bin/perl
# Copyright (C) 2004-2011, The Perl Foundation.
## The "make spectest" target tells us how many tests we failed
## (hopefully zero!), but doesn't say how many were actually passed.
## This script runs the spectest tests and summarizes
## passed, failed, todoed, skipped, executed and planned test results.
##
## Usage:
## tools/test_summary [--timing | --view] <implementation> [testlist]
##
## The --timing option enables microsecond timing per test saved
## in docs/test_summary.times.
## The --view option renders docs/test_summary.times in various reports
## If supplied, C<testlist> identifies an alternate list of tests
## to use (e.g., t/localtest.data).
## Rakudo Note:
## Using this script with rakudo requires setting the PERL6LIB env var
## to point to its lib directory so we can find Test.pm
use strict;
use warnings;
use Time::Local;
use Time::HiRes;
use Getopt::Long;
my $timing;
my $view;
unless (GetOptions('timing' => \$timing, 'view' => \$view)) {
die "$0 cannot handle the unknown option\n";
}
if ($view) { Simple::Relative::Benchmarking::view(); exit(0); }
my $benchmark;
# Comment out the next line to skip benchmarking; see docs below
$benchmark = Simple::Relative::Benchmarking::begin() if $timing;
# Which implementation are we running?
my $implementation = $ARGV[0] ||
die "Must specify an implementation";
# Build the list of test scripts to run in @tfiles
my $testlist = $ARGV[1] || 't/spectest.data';
my $fh;
open($fh, '<', $testlist) || die "Can't read $testlist: $!";
my (@tfiles, %tname); # @tfiles lists all test file names before fudging
while (<$fh>) {
/^ *#/ && next;
my ($specfile) = split ' ', $_;
next unless $specfile;
push @tfiles, "t/spec/$specfile";
}
close $fh or die $!;
# Fudge any implementation specific tests by running the fudgeall script
{
my $cmd = join ' ', $^X, 't/spec/fudgeall', $implementation, @tfiles;
# Fudgeall prints the name of each test script, but changes the name
# ending to match the implementation instead of .t if tests were fudged.
print "$cmd\n";
@tfiles = split ' ', `$cmd`; # execute fudgeall, collect test names
}
# Put test names in %tname, with the 't/spec/' removed from the start
# and truncated to 49 characters. Keep track of the maximum name length.
@tfiles = sort @tfiles;
my $max = 0;
for my $tfile (@tfiles) {
my $tname = $tfile;
$tname =~ s{^t/spec/}{};
$tname = substr($tname, 0, 49);
if (length($tname) > $max) {
$max = length($tname);
}
$tname{$tfile} = $tname;
}
# Prepare arrays and hashes to gather and accumulate test statistics
my @col = qw(pass fail todo skip plan spec);
my @syn = qw(S01 S02 S03 S04 S05 S06 S07 S09 S10 S11 S12 S13 S14 S16 S17 S19 S24 S26 S28 S29 S32 int);
my %syn; # number of test scripts per Synopsis
my %sum; # total pass/fail/todo/skip/test/plan per Synposis
my $syn;
for $syn (@syn) {
$syn{$syn} = 0;
for my $col (@col) {
$sum{"$syn-$col"} = 0;
}
}
$syn = ''; # to reliably trigger the display of column headings
# Execute all test scripts, aggregate the results, display the failures
$| = 1;
my ( @fail, @plan_hint );
my %plan_per_file;
for my $tfile (@tfiles) {
my $th;
open($th, '<', $tfile) || die "Can't read $tfile: $!\n";
my ($pass,$fail,$todo,$skip,$plan,$abort,$bonus) = (0,0,0,0,0,0,0);
my $no_plan = 0; # planless works, but is unhelpful for statistics
# http://www.shadowcat.co.uk/blog/matt-s-trout/a-cunning-no_plan/
while (<$th>) { # extract the number of tests planned
if (/^\s*plan\D*(\d+)/) { $plan = $1; last; }
elsif (/^\s*plan\s+\*;/) { $no_plan = 1; last; }
}
close $th or die $!;
my $tname = $tname{$tfile};
# Repeat the column headings at the start of each Synopsis
if ( $syn ne substr($tname, 0, 3) ) {
$syn = substr($tname, 0, 3);
printf( "%s pass fail todo skip plan\n", ' ' x $max );
unless ( exists $syn{$syn} ) {
push @fail, "note: test_summary.pl \@syn does not have $syn";
}
}
$syn{$syn}++;
printf "%s%s..", $tname, '.' x ($max - length($tname));
my $cmd = "./perl6 $tfile";
# Run the test, collecting all stdout in @results
my @results = split "\n", qx{$cmd};
my (%skip, %todopass, %todofail);
my ($time1, $time2, $testnumber, $test_comment ) = ( 0, 0, 0, '' );
my @times = (); my @comments = ();
for (@results) {
# Pass over the optional line containing "1..$planned"
if (/^1\.\.(\d+)/) { $plan = $1 if $1 > 0; next; }
# Handle lines containing test times
if (/^# t=(\d+)/) {
my $microseconds = $1;
if ( $testnumber > 0 ) {
# Do this only if the time was after a test result
$times[ $testnumber] = $microseconds;
$comments[$testnumber] = $test_comment;
$testnumber = 0; # must see require another "ok $n" first
}
next;
}
# Ignore lines not beginning with "ok $$test" or "not ok $test"
next unless /^(not )?ok +(\d+)/;
if (/#\s*SKIP\s*(.*)/i) { $skip++; $skip{$1}++; }
elsif (/#\s*TODO\s*(.*)/i) { $todo++;
my $reason = $1;
if (/^ok /) { $todopass{$reason}++ }
else { $todofail{$reason}++ }
}
elsif (/^not ok +(.*)/) { $fail++; push @fail, "$tname $1"; }
elsif (/^ok +(\d+) - (.*)$/) {
$pass++; $testnumber = $1; $test_comment = $2;
}
elsif (/^ok +(\d+)$/) {
$pass++; $testnumber = $1; $test_comment = "";
}
}
my $test = $pass + $fail + $todo + $skip;
if ($plan > $test) {
$abort = $plan - $test;
$fail += $abort;
push @fail, "$tname aborted $abort test(s)";
}
elsif ($plan < $test) {
$bonus = $test - $plan;
push @fail, "$tname passed $bonus unplanned test(s)";
}
if ($no_plan) {
push @plan_hint, "'plan *;' could become 'plan $plan;' in $tname";
}
printf "%4d %4d %4d %4d %4d\n",
$pass, $fail, $todo, $skip, $plan;
$sum{'pass'} += $pass; $sum{"$syn-pass"} += $pass;
$sum{'fail'} += $fail; $sum{"$syn-fail"} += $fail;
$sum{'todo'} += $todo; $sum{"$syn-todo"} += $todo;
$sum{'skip'} += $skip; $sum{"$syn-skip"} += $skip;
$sum{'plan'} += $plan; $sum{"$syn-plan"} += $plan;
{
my $f = $tfile;
$f =~ s/\.$implementation$/.t/;
$plan_per_file{$f} = $plan;
}
for (keys %skip) {
printf " %3d skipped: %s\n", $skip{$_}, $_;
}
for (keys %todofail) {
printf " %3d todo : %s\n", $todofail{$_}, $_;
}
for (keys %todopass) {
printf " %3d todo PASSED: %s\n", $todopass{$_}, $_;
}
if ($abort) {
printf " %3d tests aborted (missing ok/not ok)\n", $abort;
}
if ($bonus) {
printf " %3d tests more than planned were run\n", $bonus;
}
defined $benchmark && $benchmark->log_script_times($tfile,\@times,\@comments);
} # for my $tfile (@tfiles)
defined $benchmark && $benchmark->end(); # finish simple relative benchmarking
# Calculate plan totals from test scripts grouped by Synopsis and overall.
# This ignores any test list and processes all unfudged files in t/spec/.
# Implementing 'no_plan' or 'plan *' in test scripts makes this total
# inaccurate.
for my $syn (sort keys %syn) {
my $grepcmd = "grep ^plan t/spec/$syn*/* -rHn"; # recurse, always say filename, include line number for troubleshooting
my @grep_output = `$grepcmd`; # gets an array of all the plan lines
my $total_tests_planned_per_synopsis = 0;
for (@grep_output) {
# Most test scripts have a conventional 'plan 42;' or so near
# the beginning which is what we need. Unfortunately some have
# 'plan $x*$y;' or so, which we cannot dynamically figure out.
# Example grep output: t/spec/S02-names/our.t:4:plan 10;
# Extract the filename and plan count from that if possible.
if ( m/ ^ ([^:]*) : \d+ : plan (.*) $ /x ) {
my ( $filename, $planexpression ) = ( $1, $2 );
my $script_planned_tests = 0;
if ( $filename =~ m/\.t$/ ) {
if ( $planexpression =~ m/ ^ \s* (\d+) \s* ; $ /x ) {
# A conventional 'plan 42;' type of line
$script_planned_tests = $1;
}
else {
# It is some other plan argument, either * or variables.
# A workaround is to get the actual number of tests run
# from the output and just assume is the same number,
# but sometimes that is missing too.
if ( exists $plan_per_file{$filename} ) {
$script_planned_tests = $plan_per_file{$filename};
}
}
}
$total_tests_planned_per_synopsis += $script_planned_tests;
}
}
$sum{"$syn-spec"} = $total_tests_planned_per_synopsis;
$sum{'spec'} += $total_tests_planned_per_synopsis;
}
# Planless testing (eg 'plan *;') is useless for static analysis, making
# tools jump through hoops to calculate the number of planned tests.
# This part display hints about the scripts that could easily be edited
# make life easier on the reporting side.
# A test suite author can follow the hints and write the automatically
# counted number of tests into the test script, changing it back from
# planless to planned.
if (@plan_hint) {
print "----------------\n";
foreach (@plan_hint) {
print " $_\n";
}
}
# Show test totals grouped by Synopsys, followed by overall totals
print "----------------\n";
my $sumfmt = qq(%-11.11s %6s,%6s,%6s,%6s,%6s,%6s\n);
printf $sumfmt, qq{"Synopsis",}, map { qq{"$_"} } @col;
for my $syn (sort keys %syn) {
printf $sumfmt, qq{"$syn",}, map { $sum{"$syn-$_"} } @col;
}
my $total = scalar(@tfiles).' regression files';
printf $sumfmt, qq{"total",}, map { $sum{$_} } @col;
print "----------------\n";
# Optionally show the statistics that can be manually appended to
# docs/spectest-progress.csv
if ($ENV{'REV'}) {
my @gmt = gmtime;
my $testdate = sprintf '"%4d-%02d-%02d %02d:%02d"', $gmt[5]+1900,
$gmt[4]+1, $gmt[3], $gmt[2], $gmt[1];
my $filecount = scalar(@tfiles);
my $passpercent = 100 * $sum{'pass'} / $sum{'spec'};
print join(',', $ENV{'REV'}, (map { $sum{$_} } @col),
$filecount), "\n";
printf "spectest-progress.csv update: " .
"%d files, %d (%.1f%% of %d) pass, %d fail\n",
$filecount, $sum{'pass'}, $passpercent, $sum{'spec'}, $sum{'fail'};
}
# List descriptions of the tests that failed
if (@fail) {
print "Failure summary:\n";
foreach (@fail) {
print "$_\n";
}
}
else {
print "No failures!\n";
}
# End of main program
#-------------------- Simple Relative Benchmarking ---------------------
package Simple::Relative::Benchmarking;
# begin
# Initialize simple relative benchmarking. Called before the first test
sub begin {
my $timings = shift || 5; # number of timings to keep (default 5)
my $self = {};
my @test_history;
$self->{'Timings'} = $timings;
$self->{'Last_test_loaded'} = '';
if ( open( $self->{'file_in'}, '<', 'docs/test_summary.times') ) {
my $file_in = $self->{'file_in'};
my $line = <$file_in>; chomp $line;
if ( $line =~ m/{"test_.+":\[/i ) { # should be Test_history
$line = <$file_in>; chomp $line;
while ( $line =~ m/\s\s(.+\d\d\d\d-\d\d-\d\d.\d\d:\d\d:\d\d.+)/ ) {
my $history_line = $1;
$history_line =~ s/,$//; # trim possible trailing comma
push @test_history, $history_line;
$line = <$file_in>; chomp $line;
} # ends on the ' ],' line after the test_history
$line = <$file_in>; chomp $line;
# if ( $line =~ m/ "test_microseconds":{/i ) {
# warn "begin reached 'test_microseconds'\n";
# }
}
}
open( $self->{'file_out'}, '>', 'docs/test_summary.times.tmp') or die "cannot create docs/test_summary.times.tmp: $!";
my $parrot_version = qx{./perl6 -e'print \$*VM<config><revision>'};
my $impl_version = qx{git log --pretty=oneline --abbrev-commit --max-count=1 .}; chomp $impl_version;
$impl_version =~ s/^([0-9a-f])+\.\.\./$1/; # delete possible ...
$impl_version =~ s/\\/\\\\/g; # escape all backslashes
$impl_version =~ s/\"/\\\"/g; # escape all double quotes
my $file_out = $self->{'file_out'};
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time());
push @test_history, sprintf("[\"%4d-%02d-%02d %02d:%02d:%02d\",%d,\"%s\"]",
$year+1900, $mon+1, $mday, $hour, $min, $sec,
$parrot_version, $impl_version );
# Delete the oldest test test_history if there are too many.
while ( @test_history > $self->{'Timings'} ) { shift @test_history; }
print $file_out qq!{"test_history":[\n!;
print $file_out " " . join(",\n ",@test_history) . "\n ],\n";
print $file_out qq! "test_microseconds":{!;
# tell Test.pm to output per-test timestamps
$ENV{'PERL6_TEST_TIMES'} = 'true';
return bless $self;
}
# Track simple relative benchmarking. Called after running each test script.
sub log_script_times {
my $self = shift;
my $test_name = shift;
my $ref_times = shift;
my $ref_comments = shift;
# Make local arrays of the execution times in microseconds, and test
# comments (or descriptions). Since tests are being added to and
# removed from the test suite, test numbers change over time. The
# comments are sometimes empty or duplicated, but they are the only
# way to correlate test results if the test suite is edited.
my (@times) = @$ref_times;
my (@comments) = @$ref_comments;
shift @times; # offset by 1: the first result becomes $times[0];
shift @comments;
for ( my $i=0; $i<=@times; $i++ ) {
if ( not defined $comments[$i] ) { $comments[$i] = ''; }
$comments[$i] =~ s/\\/\\\\/g; # escape all backslashes
$comments[$i] =~ s/\"/\\\"/g; # escape all double quotes
}
my ( $line );
my $file_in = $self->{'file_in'};
my $file_out = $self->{'file_out'};
$test_name =~ s{^t/spec/}{}; # eg 'S02-literals/numeric.t'
my $test_separator;
if ( $self->{'Last_test_loaded'} eq '' ) {
$test_separator = "\n";
}
else {
$test_separator = ",\n";
}
while ( not eof($file_in) and $self->{'Last_test_loaded'} lt $test_name ) {
$line = <$file_in>; chomp $line;
if ( $line =~ m/^\s\s"(.+)":.$/ ) {
$self->{'Last_test_loaded'} = $1;
}
}
my @logged_results;
if ( not eof($file_in) and $self->{'Last_test_loaded'} eq $test_name ) {
my $line = <$file_in>; chomp $line;
while ( not eof($file_in) and $line =~ m/^\s\s\s\[(\d+),\[(.+?)\],?/ ) {
my $test_number = $1;
my @timings = split /,/ , $2;
$logged_results[$test_number-1] = [ @timings ];
$line = <$file_in>; chomp $line;
}
}
my $microseconds = [];
my $testcount = @times;
for ( my $test_number=0; $test_number<$testcount; $test_number++) {
unless ( defined($times[$test_number]) ) { $times[$test_number] = 0; }
my ( @times_in_file );
if ( defined @{$logged_results[$test_number]} ) {
@times_in_file = ( @{$logged_results[$test_number]} );
}
push @times_in_file, $times[$test_number];
if ( not defined( $times_in_file[0] ) ) { shift @times_in_file; }
# Delete the oldest test timings if there are too many.
while ( @times_in_file > $self->{'Timings'} ) { shift @times_in_file; }
$$microseconds[$test_number] = [ @times_in_file ];
}
my $test_number = 1; # start from number 1 again
print $file_out
$test_separator .
qq' "$test_name":[\n' .
join(",\n", map {' ['.$test_number++.',['.join(',',@$_).'],"'.$comments[$test_number-2].'"]'} @$microseconds) .
qq'\n ]';
}
# Finish simple relative benchmarking. Called after the first test
sub end {
my $self = shift;
my $file_in = $self->{'file_in'};
my $file_out = $self->{'file_out'};
print $file_out "\n }\n}\n";
close $file_out or warn $!;
close $file_in or warn $!;
unlink 'docs/test_summary.times';
rename 'docs/test_summary.times.tmp', 'docs/test_summary.times';
}
# Report on simple relative benchmarking. Does the --view option
sub view
{
my $choice = '1'; my $choiceA; my $choiceB;
do {
my ($input, $output, $t, @timings, $script, @z, @sorted, @runs);
my @ordername = ('', 'sorted by time', 'sorted by %change', 'sorted by time change', 'in test order' );
open($input, '<', 'docs/test_summary.times') or die "$0 cannot open docs/test_summary.times\n";
while (<$input>) { # custom parser to avoid dependency on JSON.pm
# a commit identification line
if (/^\s\s\[\"([^"]*)\",\d+,\"([^"]*)\"/) { push @runs, { 'time'=>$1, 'comment'=>$2 }; }
# test script name
if (/^\s\s\"(.+)\":\[$/x) { $script = $1; }
# individual test times
if (/^\s\s\s\[(\d+),\[([0-9,]+)\],\"(.*)\"\],/x) {
unless (defined $choiceA) { $choiceB = $#runs; $choiceA = $choiceB-1; }
my $testnumber = $1;
my @times = split /,/, $2;
push @times, 0 while @times < 5;
my $testcomment = $3;
if ($times[$choiceA] > 0 && $times[$choiceB] > 0) {
push @timings, [ [@times], $testcomment, $testnumber, $script];
}
}
}
close($input);
@z=(); # Prepare to sort using a Schwartzian transform
if ($choice eq '1') { # by execution time
for my $t ( @timings ) { push @z, $$t[0][$choiceB]; }
}
elsif ($choice eq '2') { # by relative speedup/slowdown
for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA])/$$t[0][$choiceA]; }
}
elsif ($choice eq '3') { # by absolute speedup/slowdown
for my $t ( @timings ) { push @z, ($$t[0][$choiceB]-$$t[0][$choiceA]); }
}
else {
@sorted = @timings; # choice '4' is unsorted, meaning in order of execution
}
@sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ] if @z;
# Send the results to 'less' for viewing
open $output, ">", "/tmp/test_summary.$$" or die "$0 cannot output to 'less'\n";
print $output "Microseconds and relative change of spec tests $ordername[$choice]. Commits:\n";
print $output "A: $runs[$choiceA]{'time'} $runs[$choiceA]{'comment'}\n";
print $output "B: $runs[$choiceB]{'time'} $runs[$choiceB]{'comment'}\n";
print $output " A B Chg Test description (script#test)\n";
for $t (@sorted) {
printf $output "%6d %5d %+3.0f%% %s (%s#%d)\n", $$t[0][$choiceA], $$t[0][$choiceB],
($$t[0][$choiceB]-$$t[0][$choiceA])*100/$$t[0][$choiceA], $$t[1], $$t[3], $$t[2];
}
close $output;
system "less --chop-long-lines /tmp/test_summary.$$";
do { # Prompt for user choice of sort order or commits
print 'view: sort by 1)time 2)%change 3)change 4)none, other 5)commits q)uit> ';
$choice = <STDIN>; chomp $choice;
if ($choice eq '5') { # choose a commit
for (my $r=0; $r<@runs; ++$r) {
print "$r: $runs[$r]{'time'} $runs[$r]{'comment'}\n";
}
print 'commit for column A: ';
$choiceA = <STDIN>; chomp $choiceA;
print 'commit for column B: ';
$choiceB = <STDIN>; chomp $choiceB;
}
} while index('5', $choice) >= 0; # if user chose commits, must still choose sort order
} while index('1234', $choice) >= 0; # if valid sort order (not 'q') then do another report
}
package main;
=pod
=head1 NAME
tools/test_summary.pl -- run spectests and make statistical reports
=head1 DESCRIPTION
This test harness written in Perl 5, runs the Perl 6 specification test
suite. It uses the same Test Anything Protocol (TAP) as for example
L<TAP::Harness>, but does not depend those modules.
The names of the tests are listed in t/spectest.data, or another file
whose name is passed on the command line.
=head2 OUTPUT
The harness prints the name of each test script before running it.
After completion it prints the total number of tests passed, failed,
to do, skipped, and planned. The descriptions of any tests failed,
skipped or left to do are also listed.
After running all the tests listed, the harness prints a set of
subtotals per Synopsis.
If you set the REV environment variable (with the first 7 characters of
a git commit id), the harness prints an additional set of grand
totals suitable for adding to F<docs/spectest_progress.csv>.
=head1 SIMPLE RELATIVE BENCHMARKING
Too little information can mislead, hence this self deprecating title.
For example, these measurements overlook variation in test times
('jitter'), kernel versus user process times, and measurement overheads.
But these results are better than no information at all.
If activated, this tool logs the most recent 5 timings in microseconds
in F<docs/test_summary.times> in a specific JSON format, for later
analysis. Measurement and logging add less than 2% to the testing time
and makes a log file of about 2.5MB.
=head2 Methods
=head3 begin
Accepts an optional parameter, the number of timings per test to keep.
Creates a temporary file for new results, and returns an object that
updates the log file. (F<begin> acts as the constructor).
=head3 log_script_times
Takes these parameters: test script name, reference to an array of times
in microseconds, reference to an array of test description strings.
Appends the results to the temporary log file.
=head3 end
Closes and renames the temporary log file.
=head2 Timing results file
All results are stored in F<docs/test_summary.times> in a specific JSON
format. With 35000 test result lines and 5 runs it occupies just under
2.5 MB.
Here is an example with a few semi fictitious results:
{"test_history":[
["2010-05-05 10:15:45",46276,"925629d Make $x does (R1, R2) work."],
["2010-05-07 08:58:07",46276,"5713af2 run two more test files"],
["2010-05-08 18:08:43",46405,"ab23221 bump PARROT_REVISION"],
["2010-05-09 05:53:25",46405,"c49d32b run S04-phasers/rvalues.t"],
["2010-05-10 00:44:46",46405,"118f4aa Overhaul sqrt for Numeric / Real."]
],
"test_microseconds":{
"S02-builtin_data_types/anon_block.rakudo":[
[1,[6139,7559,6440,6289,5520],"The object is-a 'Sub()'"],
[2,[6610,6599,6690,6580,6010],"sub { } works"]
],
"S02-builtin_data_types/array.rakudo":[
[1,[9100,8889,9739,9140,9169],"for 1, 2, 3 does 3 iterations"],
[2,[5650,5599,6119,9819,5140],"for (1, 2, 3).item 3 iterations"],
[3,[3920,3770,4190,4410,3350],"for [1, 2, 3] does one iteration"]
]
}
}
The "test_history" section lists the starting times for all the runs of
F<tools/test_summary.pl> that are recorded in the file. Then the
"test_microseconds" records show each test filename, possibly fudged,
followed by the test numbers, followed by the times obtained from each
run. If a test has fewer than the usual number of timings, the timings
will be from the most recent test runs.
The file is read and written by custom code and not a JSON module, to
reduce dependencies. Altering the file format might cause reading to
fail and could result in data loss. General purpose JSON parsers should
be able to read the data. For example the following ranks the tests
from best speedup to worst slowdown.
#!/usr/bin/perl
use File::Slurp qw( slurp );
use JSON;
my $log_text = slurp('docs/test_summary.times');
my $log = JSON->new->decode( $log_text );
# Flatten the data structure to a 2-D array of nonzero test times
my @timings;
my $script_hash = $$log{'test_microseconds'};
for my $script_name ( sort keys %$script_hash ) {
my $test_list = $$script_hash{$script_name};
for my $t ( @$test_list ) {
my $times_count = @{$$t[1]};
if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
push @timings, [$script_name, $$t[0], $$t[2], ${$$t[1]}[$times_count-2], ${$$t[1]}[$times_count-1] ];
}
}
}
# Sort the timings into improved/worsened order with a Schwartzian transform
my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[4]; }
my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
# Display the results: quicker is minus, slower is plus.
for my $s ( @sorted ) {
printf "%+3.0f%% %6d %6d %s:%d:%s\n",
($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
} # %change, prev-time, latest-time, script, test-num, test-desc
A second example shows another way to read the results file, and ranks
the tests from most to least consistent in execution time.
#!/usr/bin/perl
use JSON;
my $log_text = qx{$^X -MExtUtils::Command -e cat docs/test_summary.times};
my $log = JSON->new->decode( $log_text );
# Flatten the data structure to a 2-D array of nonzero test times
my @timings;
my $script_hash = $$log{'test_microseconds'};
for my $script_name ( sort keys %$script_hash ) {
my $test_list = $$script_hash{$script_name};
for my $t ( @$test_list ) {
my $times_count = @{$$t[1]};
if ( $times_count >= 2 and ${$$t[1]}[$times_count-1] > 0 ) {
my $min = my $max = ${$$t[1]}[0];
for my $i (1..$times_count-1) {
$min = ${$$t[1]}[$i] if $min > ${$$t[1]}[$i];
$max = ${$$t[1]}[$i] if $max < ${$$t[1]}[$i];
}
push @timings, [$script_name, $$t[0], $$t[2], $min, $max ] if $min > 0;
}
}
}
# Sort the timings into most/least consistent order by Schwartzian transform
my @z; for my $t ( @timings ) { push @z, ($$t[4]-$$t[3])/$$t[3]; }
my @sorted = @timings[ sort { $z[$a] <=> $z[$b] } 0..$#timings ];
# Display the results from most to least consistent
for my $s ( @sorted ) {
printf "%3.1f%% %6d %6d %s:%d:%s\n",
($$s[4]-$$s[3])*100/$$s[3], $$s[3], $$s[4], $$s[0], $$s[1], $$s[2];
} # %difference, min-time, max-time, script, test-num, test-desc
=head2 TODO
Detect changes in number of tests or descriptions of tests in each
test script, and discard all previous results for that script if there
has been a change. Consider whether to log total execution time per
test script.
Analyse and report useful results, such as the slowest n tests.
Parse the `say now` output as well as `print pir::__time()`.
=head1 SEE ALSO
The L<perlperf> module. The L<http://json.org/> site.
=cut