AztecProtocol · ludamad · Jan 28, 2026 · Feb 3, 2026
diff --git a/.claude/skills/adding-benchmarks/SKILL.md b/.claude/skills/adding-benchmarks/SKILL.md
@@ -81,6 +81,35 @@ All benchmark files must be arrays using the `customSmallerIsBetter` format:
 - `value` must be numeric (lower is better)
 - File must end with `.bench.json`
 
+**Optional fields** (preserved by benchmark-action):
+- `range` (string): Variance info (e.g., `"± 5%"`)
+- `extra` (string): Metadata — used for stacked chart grouping (see below)
+
+## Stacked Charts
+
+To render multiple metrics as a **single stacked area chart** (e.g., component breakdowns), add an `extra` field with a `stacked:GROUP_NAME` value. Entries sharing the same GROUP_NAME are overlaid on one chart.
+
+```json
+[
+  {"name": "proving/cpus-8/total_ms", "value": 31663, "unit": "ms"},
+  {"name": "proving/cpus-8/oink_prove_ms", "value": 4992, "unit": "ms", "extra": "stacked:proving/cpus-8/components"},
+  {"name": "proving/cpus-8/sumcheck_ms", "value": 3318, "unit": "ms", "extra": "stacked:proving/cpus-8/components"},
+  {"name": "proving/cpus-8/circuit_ms", "value": 4642, "unit": "ms", "extra": "stacked:proving/cpus-8/components"}
+]
+```
+
+**How it works:**
+- `extra: "stacked:GROUP_NAME"` → entries with the same GROUP_NAME are rendered as one stacked chart
+- No `extra` field → individual line chart (default behavior)
+- Stacked entries still appear as individual charts on the main benchmark-action dashboard; the stacked view is rendered by a custom dashboard page
+- The GROUP_NAME becomes the chart title (after `bench_merge` prefixing, same as `name`)
+- The `extra` field is one of the 5 fields preserved by the benchmark-action Zod schema (`name`, `value`, `unit`, `range`, `extra`); any other custom fields will be stripped
+
+**When to use stacked charts:**
+- Component-level timing breakdowns (e.g., sumcheck, PCS, circuit construction)
+- Resource allocation views (e.g., memory by subsystem)
+- Any case where you want to see how a total decomposes into parts over time
+
 ## Adding a New Benchmark
 
 ### Step 1: Create the Benchmark

diff --git a/barretenberg/cpp/scripts/ci_benchmark_ultrahonk_circuits.sh b/barretenberg/cpp/scripts/ci_benchmark_ultrahonk_circuits.sh
@@ -0,0 +1,195 @@
+#!/usr/bin/env bash
+# Benchmarks UltraHonk proving for protocol circuits (e.g., base parity).
+# This script runs bb prove with different HARDWARE_CONCURRENCY values and captures hierarchical timing breakdowns.
+#
+# Usage: ci_benchmark_ultrahonk_circuits.sh <circuit_name> <inputs_folder> <cpus>
+# Example: ci_benchmark_ultrahonk_circuits.sh parity_base ../../yarn-project/end-to-end/ultrahonk-bench-inputs 8
+#
+# The inputs_folder should contain:
+#   - <circuit_name>.json (the circuit artifact with bytecode)
+#   - witness.gz (the compressed witness)
+
+source $(git rev-parse --show-toplevel)/ci3/source
+source $(git rev-parse --show-toplevel)/ci3/source_redis
+source $(git rev-parse --show-toplevel)/ci3/source_cache
+
+if [[ $# -ne 3 ]]; then
+  echo "Usage: $0 <circuit_name> <inputs_folder> <cpus>"
+  echo "Example: $0 parity_base ./bench-inputs/base-parity 8"
+  exit 1
+fi
+
+cd ..
+
+circuit_name="$1"
+inputs_folder="$2"
+cpus="$3"
+
+echo_header "UltraHonk benchmark: $circuit_name (CPUS=$cpus)"
+
+export HARDWARE_CONCURRENCY="$cpus"
+export native_build_dir=$(scripts/native-preset-build-dir)
+
+# Verify inputs exist
+bytecode_path="$inputs_folder/${circuit_name}.json"
+witness_path="$inputs_folder/witness.gz"
+
+if [[ ! -f "$bytecode_path" ]]; then
+  echo "Error: Bytecode not found at $bytecode_path"
+  exit 1
+fi
+
+if [[ ! -f "$witness_path" ]]; then
+  echo "Error: Witness not found at $witness_path"
+  exit 1
+fi
+
+# Set up output directory
+name_path="ultrahonk-bench/$circuit_name/cpus-$cpus"
+output="bench-out/$name_path"
+rm -rf "$output"
+mkdir -p "$output"
+
+export MEMUSAGE_OUT="$output/peak-memory-mb.txt"
+
+# Run bb prove with hierarchical benchmark output
+# Use --write_vk to compute and write the correct VK for this proving run
+echo "Running bb prove --scheme ultra_honk --verifier_target noir-rollup with HARDWARE_CONCURRENCY=$cpus..."
+start=$(date +%s%N)
+
+memusage "./$native_build_dir/bin/bb" prove \
+  --scheme ultra_honk \
+  --verifier_target noir-rollup \
+  -b "$bytecode_path" \
+  -w "$witness_path" \
+  -o "$output" \
+  --write_vk \
+  --bench_out_hierarchical "$output/benchmark_breakdown.json" \
+  -v || {
+    echo "bb prove failed"
+    exit 1
+  }
+
+end=$(date +%s%N)
+elapsed_ns=$(( end - start ))
+elapsed_ms=$(( elapsed_ns / 1000000 ))
+memory_taken_mb=$(cat "$MEMUSAGE_OUT")
+
+echo "$circuit_name (cpus=$cpus) proved in $((elapsed_ms / 1000))s with peak memory ${memory_taken_mb}MB"
+
+# Verify the proof (use the VK from the output directory since we computed it with --write_vk)
+echo "Verifying proof..."
+"./$native_build_dir/bin/bb" verify \
+  --scheme ultra_honk \
+  --verifier_target noir-rollup \
+  -p "$output/proof" \
+  -i "$output/public_inputs" \
+  -k "$output/vk" || {
+    echo "Proof verification failed"
+    exit 1
+  }
+echo "Proof verified successfully"
+
+# Get proof size
+proof_size_bytes=$(stat -c%s "$output/proof" 2>/dev/null || stat -f%z "$output/proof")
+proof_size_kb=$(( proof_size_bytes / 1024 ))
+
+# Generate benchmark JSON output
+cat > "$output/benchmarks.bench.json" <<EOF
+[
+  {
+    "name": "$name_path/total_ms",
+    "unit": "ms",
+    "value": ${elapsed_ms}
+  },
+  {
+    "name": "$name_path/memory_mb",
+    "unit": "MB",
+    "value": ${memory_taken_mb}
+  },
+  {
+    "name": "$name_path/proof_size_kb",
+    "unit": "KB",
+    "value": ${proof_size_kb}
+  }
+]
+EOF
+
+# Extract component timings from hierarchical breakdown if available
+if [[ -f "$output/benchmark_breakdown.json" ]]; then
+  echo "Extracting component timings from hierarchical breakdown..."
+
+  # Use Python to extract key component timings
+  # The breakdown JSON format is: { "operation_name": [{"parent": "...", "time": nanoseconds, ...}], ... }
+  python3 << PYTHON_SCRIPT
+import json
+import sys
+
+try:
+    with open("$output/benchmark_breakdown.json", "r") as f:
+        data = json.load(f)
+
+    benchmarks = []
+
+    # Key components to track (case-insensitive matching)
+    key_components = ["sumcheck", "pcs", "pippenger", "commitment", "circuit", "oink", "compute"]
+
+    for op_name, entries in data.items():
+        # Check if this is a key component we want to track
+        if any(comp.lower() in op_name.lower() for comp in key_components):
+            # Sum up all timings for this operation (there may be multiple entries with different parents)
+            total_time_ns = sum(entry.get("time", 0) for entry in entries)
+            time_ms = total_time_ns / 1_000_000
+
+            # Create a safe benchmark name (replace special chars)
+            safe_name = op_name.replace("::", "_").replace(" ", "_")
+
+            benchmarks.append({
+                "name": f"$name_path/{safe_name}_ms",
+                "unit": "ms",
+                "value": round(time_ms, 2),
+                "extra": f"stacked:$name_path/components"
+            })
+
+    # Append to existing benchmarks file
+    with open("$output/benchmarks.bench.json", "r") as f:
+        existing = json.load(f)
+
+    existing.extend(benchmarks)
+
+    with open("$output/benchmarks.bench.json", "w") as f:
+        json.dump(existing, f, indent=2)
+
+    print(f"Extracted {len(benchmarks)} component timings")
+except Exception as e:
+    print(f"Warning: Could not extract component timings: {e}", file=sys.stderr)
+PYTHON_SCRIPT
+fi
+
+echo "Benchmark complete. Results in $output/"
+echo "  - benchmarks.bench.json (benchmark results)"
+echo "  - benchmark_breakdown.json (hierarchical timing breakdown)"
+echo "  - proof (the generated proof)"
+
+# Upload benchmark breakdown to disk if running in CI
+if [[ "${CI:-}" == "1" ]] && [[ "${CI_USE_BUILD_INSTANCE_KEY:-0}" == "1" ]]; then
+  echo_header "Uploading UltraHonk benchmark breakdown for $circuit_name (cpus=$cpus)"
+
+  if [[ -f "$output/benchmark_breakdown.json" ]]; then
+    set +e
+    current_sha=$(git rev-parse HEAD)
+
+    # Copy to /tmp with unique name
+    tmp_breakdown_file="/tmp/benchmark_breakdown_ultrahonk_${circuit_name}_cpus${cpus}_$$.json"
+    cp "$output/benchmark_breakdown.json" "$tmp_breakdown_file"
+
+    # Upload to disk
+    disk_key="ultrahonk-${circuit_name}-cpus${cpus}-${current_sha}"
+    {
+      cat "$tmp_breakdown_file" | gzip | cache_disk_transfer_to "bench/ultrahonk-breakdown" "$disk_key"
+      rm -f "$tmp_breakdown_file"
+    } &
+
+    echo "Uploaded benchmark breakdown to disk: bench/ultrahonk-breakdown/$disk_key"
+  fi
+fi
diff --git a/yarn-project/end-to-end/.gitignore b/yarn-project/end-to-end/.gitignore
@@ -3,6 +3,7 @@ results
 bench-out
 example-app-ivc-inputs-out
 dumped-avm-circuit-inputs
+ultrahonk-bench-inputs
 web/main.js*
 consensys_web3signer_25.11.0
 scripts/ha/postgres_data/
diff --git a/yarn-project/end-to-end/bootstrap.sh b/yarn-project/end-to-end/bootstrap.sh
@@ -4,6 +4,7 @@ source $(git rev-parse --show-toplevel)/ci3/source_bootstrap
 hash=$(../bootstrap.sh hash)
 bench_fixtures_dir=example-app-ivc-inputs-out
 default_avm_inputs_dump_dir=dumped-avm-circuit-inputs
+ultrahonk_bench_dir=ultrahonk-bench-inputs
 
 function build {
   cache_load_image consensys/web3signer:25.11.0
@@ -125,26 +126,41 @@ function bench_cmds {
     done
   done
   echo "$hash:ISOLATE=1:NET=1:CPUS=8 barretenberg/cpp/scripts/ci_benchmark_browser_memory.sh ../../yarn-project/end-to-end/example-app-ivc-inputs-out/ecdsar1+transfer_0_recursions+sponsored_fpc"
+
+  # UltraHonk circuit benchmarks at different CPU counts
+  for cpus in 8 16 32; do
+    echo "$hash:CPUS=$cpus barretenberg/cpp/scripts/ci_benchmark_ultrahonk_circuits.sh parity_base ../../yarn-project/end-to-end/$ultrahonk_bench_dir $cpus"
+  done
 }
 
-# Builds the benchmark fixtures.
+# Builds all benchmark fixtures (chonk IVC captures + UltraHonk circuit inputs).
 function build_bench {
+  rm -rf bench-out && mkdir -p bench-out
+
+  # Build chonk IVC captures
   export CAPTURE_IVC_FOLDER=$bench_fixtures_dir
   export BENCHMARK_CONFIG=key_flows
   export LOG_LEVEL=error
   export ENV_VARS_TO_INJECT="BENCHMARK_CONFIG CAPTURE_IVC_FOLDER LOG_LEVEL"
   rm -rf $CAPTURE_IVC_FOLDER && mkdir -p $CAPTURE_IVC_FOLDER
-  rm -rf bench-out && mkdir -p bench-out
-  if cache_download bb-chonk-captures-$hash.tar.gz; then
-    return
+  if ! cache_download bb-chonk-captures-$hash.tar.gz; then
+    parallel --tag --line-buffer --halt now,fail=1 'docker_isolate "scripts/run_test.sh simple {}"' ::: \
+      client_flows/account_deployments \
+      client_flows/deployments \
+      client_flows/bridging \
+      client_flows/transfers \
+      client_flows/amm
+    cache_upload bb-chonk-captures-$hash.tar.gz $CAPTURE_IVC_FOLDER
+  fi
+
+  # Build UltraHonk circuit benchmark inputs (bytecode + witness pairs)
+  rm -rf $ultrahonk_bench_dir && mkdir -p $ultrahonk_bench_dir
+  if ! cache_download bb-ultrahonk-bench-inputs-$hash.tar.gz; then
+    # Generate base parity circuit inputs (use absolute path since test runs from ivc-integration)
+    export BASE_PARITY_BENCH_DIR=$(pwd)/$ultrahonk_bench_dir
+    yarn workspace @aztec/ivc-integration test src/base_parity_inputs.test.ts
+    cache_upload bb-ultrahonk-bench-inputs-$hash.tar.gz $ultrahonk_bench_dir
   fi
-  parallel --tag --line-buffer --halt now,fail=1 'docker_isolate "scripts/run_test.sh simple {}"' ::: \
-    client_flows/account_deployments \
-    client_flows/deployments \
-    client_flows/bridging \
-    client_flows/transfers \
-    client_flows/amm
-  cache_upload bb-chonk-captures-$hash.tar.gz $CAPTURE_IVC_FOLDER
 }
 
 function bench {

diff --git a/yarn-project/ivc-integration/src/base_parity_inputs.test.ts b/yarn-project/ivc-integration/src/base_parity_inputs.test.ts
@@ -0,0 +1,76 @@
+/**
+ * Generates base parity circuit inputs (bytecode + witness) for UltraHonk benchmarks.
+ * Only runs when BASE_PARITY_BENCH_DIR env var is set (during build_bench).
+ *
+ * Run with: BASE_PARITY_BENCH_DIR=./bench-out yarn workspace @aztec/ivc-integration test src/base_parity_inputs.test.ts
+ */
+import { NUMBER_OF_L1_L2_MESSAGES_PER_ROLLUP } from '@aztec/constants';
+import { Fr } from '@aztec/foundation/curves/bn254';
+import { createLogger } from '@aztec/foundation/log';
+import { Noir } from '@aztec/noir-noir_js';
+import { ServerCircuitArtifacts } from '@aztec/noir-protocol-circuits-types/server';
+import { getVKTreeRoot } from '@aztec/noir-protocol-circuits-types/vk-tree';
+import { ParityBasePrivateInputs } from '@aztec/stdlib/parity';
+
+import { jest } from '@jest/globals';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+
+const logger = createLogger('bench:base-parity');
+
+jest.setTimeout(120_000);
+
+describe('Base Parity Benchmark Inputs', () => {
+  it('generates bytecode and witness files for base parity benchmarking', async () => {
+    const outputDir = process.env.BASE_PARITY_BENCH_DIR;
+    if (!outputDir) {
+      logger.info('Skipping base parity bench input generation (BASE_PARITY_BENCH_DIR not set)');
+      return;
+    }
+    logger.info(`Generating base parity bench inputs to ${outputDir}`);
+
+    await fs.mkdir(outputDir, { recursive: true });
+
+    // Generate random L1-to-L2 messages
+    logger.info(`Generating ${NUMBER_OF_L1_L2_MESSAGES_PER_ROLLUP} random L1-to-L2 messages...`);
+    const l1ToL2Messages = new Array(NUMBER_OF_L1_L2_MESSAGES_PER_ROLLUP).fill(null).map(() => Fr.random());
+
+    // Create base parity inputs for the first slice
+    const vkTreeRoot = getVKTreeRoot();
+    const baseParityInputs = ParityBasePrivateInputs.fromSlice(l1ToL2Messages, 0, vkTreeRoot);
+    logger.info('Created base parity inputs');
+
+    // Convert inputs to Noir format (inline the mapping since it's simple)
+    const noirInputs = {
+      msgs: baseParityInputs.msgs.map(m => m.toString()),
+      // eslint-disable-next-line camelcase
+      vk_tree_root: baseParityInputs.vkTreeRoot.toString(),
+    };
+    logger.info('Converted inputs to Noir format');
+
+    // Get the circuit artifact
+    const artifact = ServerCircuitArtifacts.ParityBaseArtifact;
+
+    // Execute the circuit with Noir to generate witness
+    logger.info('Executing circuit with Noir to generate witness...');
+    const program = new Noir(artifact as any);
+    const { witness } = await program.execute({ inputs: noirInputs });
+    logger.info('Witness generated');
+
+    // Save bytecode as JSON (bb expects the full JSON artifact)
+    const bytecodeOutputPath = path.join(outputDir, 'parity_base.json');
+    await fs.writeFile(bytecodeOutputPath, JSON.stringify(artifact));
+    logger.info(`Wrote bytecode to ${bytecodeOutputPath}`);
+
+    // Save witness (already gzipped by Noir) - bb expects .gz format
+    const witnessOutputPath = path.join(outputDir, 'witness.gz');
+    await fs.writeFile(witnessOutputPath, witness);
+    logger.info(`Wrote witness to ${witnessOutputPath}`);
+
+    logger.info('Base parity bench inputs generated successfully');
+    logger.info(`Output directory: ${outputDir}`);
+    logger.info('Files:');
+    logger.info(`  - ${bytecodeOutputPath} (circuit bytecode)`);
+    logger.info(`  - ${witnessOutputPath} (compressed witness)`);
+  });
+});