Skip to content

Commit 69186d5

Browse files
committed
feat: enhance performance metrics with P95 and statistical calculations
1 parent f6a4b18 commit 69186d5

File tree

2 files changed

+532
-184
lines changed

2 files changed

+532
-184
lines changed

.github/workflows/perf-tests.yml

Lines changed: 72 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -151,20 +151,28 @@ jobs:
151151
if: startsWith(matrix.os, 'ubuntu')
152152
id: perf-linux
153153
run: |
154-
# Extract PR metrics
154+
# Extract PR metrics (P50 values at top level for backwards compatibility)
155155
PR_STARTUP=$(jq -r '.server_startup_ms // 0' metrics.json)
156156
PR_REFRESH=$(jq -r '.full_refresh_ms // 0' metrics.json)
157157
PR_ENVS=$(jq -r '.environments_count // 0' metrics.json)
158158
159+
# Extract P95 values from stats object (if available)
160+
PR_STARTUP_P95=$(jq -r '.stats.server_startup.p95 // .server_startup_ms // 0' metrics.json)
161+
PR_REFRESH_P95=$(jq -r '.stats.full_refresh.p95 // .full_refresh_ms // 0' metrics.json)
162+
159163
# Extract baseline metrics (default to 0 if not available)
160164
if [ -f baseline-perf/metrics.json ]; then
161165
BASELINE_STARTUP=$(jq -r '.server_startup_ms // 0' baseline-perf/metrics.json)
162166
BASELINE_REFRESH=$(jq -r '.full_refresh_ms // 0' baseline-perf/metrics.json)
163167
BASELINE_ENVS=$(jq -r '.environments_count // 0' baseline-perf/metrics.json)
168+
BASELINE_STARTUP_P95=$(jq -r '.stats.server_startup.p95 // .server_startup_ms // 0' baseline-perf/metrics.json)
169+
BASELINE_REFRESH_P95=$(jq -r '.stats.full_refresh.p95 // .full_refresh_ms // 0' baseline-perf/metrics.json)
164170
else
165171
BASELINE_STARTUP=0
166172
BASELINE_REFRESH=0
167173
BASELINE_ENVS=0
174+
BASELINE_STARTUP_P95=0
175+
BASELINE_REFRESH_P95=0
168176
fi
169177
170178
# Calculate diff (positive means slowdown, negative means speedup)
@@ -200,8 +208,12 @@ jobs:
200208
# Set outputs
201209
echo "pr_startup=$PR_STARTUP" >> $GITHUB_OUTPUT
202210
echo "pr_refresh=$PR_REFRESH" >> $GITHUB_OUTPUT
211+
echo "pr_startup_p95=$PR_STARTUP_P95" >> $GITHUB_OUTPUT
212+
echo "pr_refresh_p95=$PR_REFRESH_P95" >> $GITHUB_OUTPUT
203213
echo "baseline_startup=$BASELINE_STARTUP" >> $GITHUB_OUTPUT
204214
echo "baseline_refresh=$BASELINE_REFRESH" >> $GITHUB_OUTPUT
215+
echo "baseline_startup_p95=$BASELINE_STARTUP_P95" >> $GITHUB_OUTPUT
216+
echo "baseline_refresh_p95=$BASELINE_REFRESH_P95" >> $GITHUB_OUTPUT
205217
echo "startup_diff=$STARTUP_DIFF" >> $GITHUB_OUTPUT
206218
echo "refresh_diff=$REFRESH_DIFF" >> $GITHUB_OUTPUT
207219
echo "startup_pct=$STARTUP_PCT" >> $GITHUB_OUTPUT
@@ -211,33 +223,41 @@ jobs:
211223
# Write step summary
212224
echo "## Performance Report (Linux)" >> $GITHUB_STEP_SUMMARY
213225
echo "" >> $GITHUB_STEP_SUMMARY
214-
echo "| Metric | PR | Baseline | Delta | Change |" >> $GITHUB_STEP_SUMMARY
215-
echo "|--------|-----|----------|-------|--------|" >> $GITHUB_STEP_SUMMARY
216-
echo "| Server Startup | ${PR_STARTUP}ms | ${BASELINE_STARTUP}ms | ${STARTUP_DIFF}ms | ${STARTUP_PCT}% |" >> $GITHUB_STEP_SUMMARY
217-
echo "| Full Refresh | ${PR_REFRESH}ms | ${BASELINE_REFRESH}ms | ${REFRESH_DIFF}ms | ${REFRESH_PCT}% ${DELTA_INDICATOR} |" >> $GITHUB_STEP_SUMMARY
218-
echo "| Environments | ${PR_ENVS} | ${BASELINE_ENVS} | - | - |" >> $GITHUB_STEP_SUMMARY
226+
echo "| Metric | PR (P50) | PR (P95) | Baseline (P50) | Delta | Change |" >> $GITHUB_STEP_SUMMARY
227+
echo "|--------|----------|----------|----------------|-------|--------|" >> $GITHUB_STEP_SUMMARY
228+
echo "| Server Startup | ${PR_STARTUP}ms | ${PR_STARTUP_P95}ms | ${BASELINE_STARTUP}ms | ${STARTUP_DIFF}ms | ${STARTUP_PCT}% |" >> $GITHUB_STEP_SUMMARY
229+
echo "| Full Refresh | ${PR_REFRESH}ms | ${PR_REFRESH_P95}ms | ${BASELINE_REFRESH}ms | ${REFRESH_DIFF}ms | ${REFRESH_PCT}% ${DELTA_INDICATOR} |" >> $GITHUB_STEP_SUMMARY
230+
echo "| Environments | ${PR_ENVS} | - | ${BASELINE_ENVS} | - | - |" >> $GITHUB_STEP_SUMMARY
219231
shell: bash
220232

221233
- name: Generate Performance Report (Windows)
222234
if: startsWith(matrix.os, 'windows')
223235
id: perf-windows
224236
run: |
225-
# Extract PR metrics
237+
# Extract PR metrics (P50 values at top level for backwards compatibility)
226238
$prMetrics = Get-Content -Path "metrics.json" -Raw | ConvertFrom-Json
227239
$prStartup = $prMetrics.server_startup_ms
228240
$prRefresh = $prMetrics.full_refresh_ms
229241
$prEnvs = $prMetrics.environments_count
230242
243+
# Extract P95 values from stats object (if available)
244+
$prStartupP95 = if ($prMetrics.stats.server_startup.p95) { $prMetrics.stats.server_startup.p95 } else { $prStartup }
245+
$prRefreshP95 = if ($prMetrics.stats.full_refresh.p95) { $prMetrics.stats.full_refresh.p95 } else { $prRefresh }
246+
231247
# Extract baseline metrics (default to 0 if not available)
232248
if (Test-Path "baseline-perf/metrics.json") {
233249
$baselineMetrics = Get-Content -Path "baseline-perf/metrics.json" -Raw | ConvertFrom-Json
234250
$baselineStartup = $baselineMetrics.server_startup_ms
235251
$baselineRefresh = $baselineMetrics.full_refresh_ms
236252
$baselineEnvs = $baselineMetrics.environments_count
253+
$baselineStartupP95 = if ($baselineMetrics.stats.server_startup.p95) { $baselineMetrics.stats.server_startup.p95 } else { $baselineStartup }
254+
$baselineRefreshP95 = if ($baselineMetrics.stats.full_refresh.p95) { $baselineMetrics.stats.full_refresh.p95 } else { $baselineRefresh }
237255
} else {
238256
$baselineStartup = 0
239257
$baselineRefresh = 0
240258
$baselineEnvs = 0
259+
$baselineStartupP95 = 0
260+
$baselineRefreshP95 = 0
241261
}
242262
243263
# Calculate diff
@@ -273,8 +293,12 @@ jobs:
273293
# Set outputs
274294
echo "pr_startup=$prStartup" >> $env:GITHUB_OUTPUT
275295
echo "pr_refresh=$prRefresh" >> $env:GITHUB_OUTPUT
296+
echo "pr_startup_p95=$prStartupP95" >> $env:GITHUB_OUTPUT
297+
echo "pr_refresh_p95=$prRefreshP95" >> $env:GITHUB_OUTPUT
276298
echo "baseline_startup=$baselineStartup" >> $env:GITHUB_OUTPUT
277299
echo "baseline_refresh=$baselineRefresh" >> $env:GITHUB_OUTPUT
300+
echo "baseline_startup_p95=$baselineStartupP95" >> $env:GITHUB_OUTPUT
301+
echo "baseline_refresh_p95=$baselineRefreshP95" >> $env:GITHUB_OUTPUT
278302
echo "startup_diff=$startupDiff" >> $env:GITHUB_OUTPUT
279303
echo "refresh_diff=$refreshDiff" >> $env:GITHUB_OUTPUT
280304
echo "startup_pct=$startupPct" >> $env:GITHUB_OUTPUT
@@ -284,31 +308,39 @@ jobs:
284308
# Write step summary
285309
echo "## Performance Report (Windows)" >> $env:GITHUB_STEP_SUMMARY
286310
echo "" >> $env:GITHUB_STEP_SUMMARY
287-
echo "| Metric | PR | Baseline | Delta | Change |" >> $env:GITHUB_STEP_SUMMARY
288-
echo "|--------|-----|----------|-------|--------|" >> $env:GITHUB_STEP_SUMMARY
289-
echo "| Server Startup | ${prStartup}ms | ${baselineStartup}ms | ${startupDiff}ms | ${startupPct}% |" >> $env:GITHUB_STEP_SUMMARY
290-
echo "| Full Refresh | ${prRefresh}ms | ${baselineRefresh}ms | ${refreshDiff}ms | ${refreshPct}% ${deltaIndicator} |" >> $env:GITHUB_STEP_SUMMARY
291-
echo "| Environments | ${prEnvs} | ${baselineEnvs} | - | - |" >> $env:GITHUB_STEP_SUMMARY
311+
echo "| Metric | PR (P50) | PR (P95) | Baseline (P50) | Delta | Change |" >> $env:GITHUB_STEP_SUMMARY
312+
echo "|--------|----------|----------|----------------|-------|--------|" >> $env:GITHUB_STEP_SUMMARY
313+
echo "| Server Startup | ${prStartup}ms | ${prStartupP95}ms | ${baselineStartup}ms | ${startupDiff}ms | ${startupPct}% |" >> $env:GITHUB_STEP_SUMMARY
314+
echo "| Full Refresh | ${prRefresh}ms | ${prRefreshP95}ms | ${baselineRefresh}ms | ${refreshDiff}ms | ${refreshPct}% ${deltaIndicator} |" >> $env:GITHUB_STEP_SUMMARY
315+
echo "| Environments | ${prEnvs} | - | ${baselineEnvs} | - | - |" >> $env:GITHUB_STEP_SUMMARY
292316
shell: pwsh
293317

294318
- name: Generate Performance Report (macOS)
295319
if: startsWith(matrix.os, 'macos')
296320
id: perf-macos
297321
run: |
298-
# Extract PR metrics
322+
# Extract PR metrics (P50 values at top level for backwards compatibility)
299323
PR_STARTUP=$(jq -r '.server_startup_ms // 0' metrics.json)
300324
PR_REFRESH=$(jq -r '.full_refresh_ms // 0' metrics.json)
301325
PR_ENVS=$(jq -r '.environments_count // 0' metrics.json)
302326
327+
# Extract P95 values from stats object (if available)
328+
PR_STARTUP_P95=$(jq -r '.stats.server_startup.p95 // .server_startup_ms // 0' metrics.json)
329+
PR_REFRESH_P95=$(jq -r '.stats.full_refresh.p95 // .full_refresh_ms // 0' metrics.json)
330+
303331
# Extract baseline metrics (default to 0 if not available)
304332
if [ -f baseline-perf/metrics.json ]; then
305333
BASELINE_STARTUP=$(jq -r '.server_startup_ms // 0' baseline-perf/metrics.json)
306334
BASELINE_REFRESH=$(jq -r '.full_refresh_ms // 0' baseline-perf/metrics.json)
307335
BASELINE_ENVS=$(jq -r '.environments_count // 0' baseline-perf/metrics.json)
336+
BASELINE_STARTUP_P95=$(jq -r '.stats.server_startup.p95 // .server_startup_ms // 0' baseline-perf/metrics.json)
337+
BASELINE_REFRESH_P95=$(jq -r '.stats.full_refresh.p95 // .full_refresh_ms // 0' baseline-perf/metrics.json)
308338
else
309339
BASELINE_STARTUP=0
310340
BASELINE_REFRESH=0
311341
BASELINE_ENVS=0
342+
BASELINE_STARTUP_P95=0
343+
BASELINE_REFRESH_P95=0
312344
fi
313345
314346
# Calculate diff
@@ -318,19 +350,23 @@ jobs:
318350
# Set outputs
319351
echo "pr_startup=$PR_STARTUP" >> $GITHUB_OUTPUT
320352
echo "pr_refresh=$PR_REFRESH" >> $GITHUB_OUTPUT
353+
echo "pr_startup_p95=$PR_STARTUP_P95" >> $GITHUB_OUTPUT
354+
echo "pr_refresh_p95=$PR_REFRESH_P95" >> $GITHUB_OUTPUT
321355
echo "baseline_startup=$BASELINE_STARTUP" >> $GITHUB_OUTPUT
322356
echo "baseline_refresh=$BASELINE_REFRESH" >> $GITHUB_OUTPUT
357+
echo "baseline_startup_p95=$BASELINE_STARTUP_P95" >> $GITHUB_OUTPUT
358+
echo "baseline_refresh_p95=$BASELINE_REFRESH_P95" >> $GITHUB_OUTPUT
323359
echo "startup_diff=$STARTUP_DIFF" >> $GITHUB_OUTPUT
324360
echo "refresh_diff=$REFRESH_DIFF" >> $GITHUB_OUTPUT
325361
326362
# Write step summary
327363
echo "## Performance Report (macOS)" >> $GITHUB_STEP_SUMMARY
328364
echo "" >> $GITHUB_STEP_SUMMARY
329-
echo "| Metric | PR | Baseline | Delta |" >> $GITHUB_STEP_SUMMARY
330-
echo "|--------|-----|----------|-------|" >> $GITHUB_STEP_SUMMARY
331-
echo "| Server Startup | ${PR_STARTUP}ms | ${BASELINE_STARTUP}ms | ${STARTUP_DIFF}ms |" >> $GITHUB_STEP_SUMMARY
332-
echo "| Full Refresh | ${PR_REFRESH}ms | ${BASELINE_REFRESH}ms | ${REFRESH_DIFF}ms |" >> $GITHUB_STEP_SUMMARY
333-
echo "| Environments | ${PR_ENVS} | ${BASELINE_ENVS} | - |" >> $GITHUB_STEP_SUMMARY
365+
echo "| Metric | PR (P50) | PR (P95) | Baseline (P50) | Delta |" >> $GITHUB_STEP_SUMMARY
366+
echo "|--------|----------|----------|----------------|-------|" >> $GITHUB_STEP_SUMMARY
367+
echo "| Server Startup | ${PR_STARTUP}ms | ${PR_STARTUP_P95}ms | ${BASELINE_STARTUP}ms | ${STARTUP_DIFF}ms |" >> $GITHUB_STEP_SUMMARY
368+
echo "| Full Refresh | ${PR_REFRESH}ms | ${PR_REFRESH_P95}ms | ${BASELINE_REFRESH}ms | ${REFRESH_DIFF}ms |" >> $GITHUB_STEP_SUMMARY
369+
echo "| Environments | ${PR_ENVS} | - | ${BASELINE_ENVS} | - |" >> $GITHUB_STEP_SUMMARY
334370
shell: bash
335371

336372
- name: Post Performance Comment (Linux)
@@ -341,10 +377,12 @@ jobs:
341377
message: |
342378
## Performance Report (Linux) ${{ steps.perf-linux.outputs.delta_indicator }}
343379
344-
| Metric | PR | Baseline | Delta | Change |
345-
|--------|-----|----------|-------|--------|
346-
| Server Startup | ${{ steps.perf-linux.outputs.pr_startup }}ms | ${{ steps.perf-linux.outputs.baseline_startup }}ms | ${{ steps.perf-linux.outputs.startup_diff }}ms | ${{ steps.perf-linux.outputs.startup_pct }}% |
347-
| Full Refresh | ${{ steps.perf-linux.outputs.pr_refresh }}ms | ${{ steps.perf-linux.outputs.baseline_refresh }}ms | ${{ steps.perf-linux.outputs.refresh_diff }}ms | ${{ steps.perf-linux.outputs.refresh_pct }}% |
380+
| Metric | PR (P50) | PR (P95) | Baseline (P50) | Delta | Change |
381+
|--------|----------|----------|----------------|-------|--------|
382+
| Server Startup | ${{ steps.perf-linux.outputs.pr_startup }}ms | ${{ steps.perf-linux.outputs.pr_startup_p95 }}ms | ${{ steps.perf-linux.outputs.baseline_startup }}ms | ${{ steps.perf-linux.outputs.startup_diff }}ms | ${{ steps.perf-linux.outputs.startup_pct }}% |
383+
| Full Refresh | ${{ steps.perf-linux.outputs.pr_refresh }}ms | ${{ steps.perf-linux.outputs.pr_refresh_p95 }}ms | ${{ steps.perf-linux.outputs.baseline_refresh }}ms | ${{ steps.perf-linux.outputs.refresh_diff }}ms | ${{ steps.perf-linux.outputs.refresh_pct }}% |
384+
385+
> Results based on 10 iterations. P50 = median, P95 = 95th percentile.
348386
349387
---
350388
<details>
@@ -365,10 +403,12 @@ jobs:
365403
message: |
366404
## Performance Report (Windows) ${{ steps.perf-windows.outputs.delta_indicator }}
367405
368-
| Metric | PR | Baseline | Delta | Change |
369-
|--------|-----|----------|-------|--------|
370-
| Server Startup | ${{ steps.perf-windows.outputs.pr_startup }}ms | ${{ steps.perf-windows.outputs.baseline_startup }}ms | ${{ steps.perf-windows.outputs.startup_diff }}ms | ${{ steps.perf-windows.outputs.startup_pct }}% |
371-
| Full Refresh | ${{ steps.perf-windows.outputs.pr_refresh }}ms | ${{ steps.perf-windows.outputs.baseline_refresh }}ms | ${{ steps.perf-windows.outputs.refresh_diff }}ms | ${{ steps.perf-windows.outputs.refresh_pct }}% |
406+
| Metric | PR (P50) | PR (P95) | Baseline (P50) | Delta | Change |
407+
|--------|----------|----------|----------------|-------|--------|
408+
| Server Startup | ${{ steps.perf-windows.outputs.pr_startup }}ms | ${{ steps.perf-windows.outputs.pr_startup_p95 }}ms | ${{ steps.perf-windows.outputs.baseline_startup }}ms | ${{ steps.perf-windows.outputs.startup_diff }}ms | ${{ steps.perf-windows.outputs.startup_pct }}% |
409+
| Full Refresh | ${{ steps.perf-windows.outputs.pr_refresh }}ms | ${{ steps.perf-windows.outputs.pr_refresh_p95 }}ms | ${{ steps.perf-windows.outputs.baseline_refresh }}ms | ${{ steps.perf-windows.outputs.refresh_diff }}ms | ${{ steps.perf-windows.outputs.refresh_pct }}% |
410+
411+
> Results based on 10 iterations. P50 = median, P95 = 95th percentile.
372412
373413
---
374414
<details>
@@ -389,10 +429,12 @@ jobs:
389429
message: |
390430
## Performance Report (macOS)
391431
392-
| Metric | PR | Baseline | Delta |
393-
|--------|-----|----------|-------|
394-
| Server Startup | ${{ steps.perf-macos.outputs.pr_startup }}ms | ${{ steps.perf-macos.outputs.baseline_startup }}ms | ${{ steps.perf-macos.outputs.startup_diff }}ms |
395-
| Full Refresh | ${{ steps.perf-macos.outputs.pr_refresh }}ms | ${{ steps.perf-macos.outputs.baseline_refresh }}ms | ${{ steps.perf-macos.outputs.refresh_diff }}ms |
432+
| Metric | PR (P50) | PR (P95) | Baseline (P50) | Delta |
433+
|--------|----------|----------|----------------|-------|
434+
| Server Startup | ${{ steps.perf-macos.outputs.pr_startup }}ms | ${{ steps.perf-macos.outputs.pr_startup_p95 }}ms | ${{ steps.perf-macos.outputs.baseline_startup }}ms | ${{ steps.perf-macos.outputs.startup_diff }}ms |
435+
| Full Refresh | ${{ steps.perf-macos.outputs.pr_refresh }}ms | ${{ steps.perf-macos.outputs.pr_refresh_p95 }}ms | ${{ steps.perf-macos.outputs.baseline_refresh }}ms | ${{ steps.perf-macos.outputs.refresh_diff }}ms |
436+
437+
> Results based on 10 iterations. P50 = median, P95 = 95th percentile.
396438
397439
---
398440
<details>

0 commit comments

Comments
 (0)