|
| 1 | +# build_haiku_20260223_124805 |
| 2 | + |
| 3 | +## baseline-local-direct |
| 4 | + |
| 5 | +- Valid tasks: `19` |
| 6 | +- Mean reward: `0.511` |
| 7 | +- Pass rate: `0.789` |
| 8 | + |
| 9 | +| Task | Status | Reward | MCP Ratio | Tool Calls | Trace | |
| 10 | +|---|---|---:|---:|---:|---| |
| 11 | +| [cgen-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--cgen-deps-install-001.md) | `failed` | 0.000 | 0.000 | 32 | traj, tx | |
| 12 | +| [codecoverage-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--codecoverage-deps-install-001.md) | `failed` | 0.000 | 0.000 | 16 | traj, tx | |
| 13 | +| [dotnetkoans-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--dotnetkoans-deps-install-001.md) | `failed` | 0.000 | 0.000 | 63 | traj, tx | |
| 14 | +| [vscode-stale-diagnostics-feat-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--vscode-stale-diagnostics-feat-001.md) | `failed` | 0.000 | 0.000 | 44 | traj, tx | |
| 15 | +| [bustub-hyperloglog-impl-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--bustub-hyperloglog-impl-001.md) | `passed` | 0.167 | 0.000 | 175 | traj, tx | |
| 16 | +| [dotenv-expand-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--dotenv-expand-deps-install-001.md) | `passed` | 1.000 | 0.000 | 20 | traj, tx | |
| 17 | +| [envoy-grpc-server-impl-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--envoy-grpc-server-impl-001.md) | `passed` | 0.400 | 0.000 | 32 | traj, tx | |
| 18 | +| [eslint-markdown-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--eslint-markdown-deps-install-001.md) | `passed` | 1.000 | 0.000 | 32 | traj, tx | |
| 19 | +| [flipt-dep-refactor-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--flipt-dep-refactor-001.md) | `passed` | 0.700 | 0.000 | 69 | traj, tx | |
| 20 | +| [flipt-flagexists-refactor-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--flipt-flagexists-refactor-001.md) | `passed` | 0.450 | 0.000 | 86 | traj, tx | |
| 21 | +| [iamactionhunter-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--iamactionhunter-deps-install-001.md) | `passed` | 1.000 | 0.000 | 30 | traj, tx | |
| 22 | +| [k8s-runtime-object-impl-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--k8s-runtime-object-impl-001.md) | `passed` | 0.110 | 0.000 | 53 | traj, tx | |
| 23 | +| [kafka-batch-accumulator-refac-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--kafka-batch-accumulator-refac-001.md) | `passed` | 0.320 | 0.000 | 96 | traj, tx | |
| 24 | +| [pcap-parser-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--pcap-parser-deps-install-001.md) | `passed` | 1.000 | 0.000 | 26 | traj, tx | |
| 25 | +| [python-http-class-naming-refac-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--python-http-class-naming-refac-001.md) | `passed` | 0.840 | 0.000 | 53 | traj, tx | |
| 26 | +| [similar-asserts-deps-install-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--similar-asserts-deps-install-001.md) | `passed` | 1.000 | 0.000 | 40 | traj, tx | |
| 27 | +| [strata-cds-tranche-feat-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--strata-cds-tranche-feat-001.md) | `passed` | 0.410 | 0.000 | 65 | traj, tx | |
| 28 | +| [strata-fx-european-refac-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--strata-fx-european-refac-001.md) | `passed` | 0.320 | 0.000 | 29 | traj, tx | |
| 29 | +| [tensorrt-mxfp4-quant-feat-001](../tasks/build_haiku_20260223_124805--baseline-local-direct--tensorrt-mxfp4-quant-feat-001.md) | `passed` | 1.000 | 0.000 | 97 | traj, tx | |
| 30 | + |
| 31 | +## mcp-remote-direct |
| 32 | + |
| 33 | +- Valid tasks: `25` |
| 34 | +- Mean reward: `0.372` |
| 35 | +- Pass rate: `0.640` |
| 36 | + |
| 37 | +| Task | Status | Reward | MCP Ratio | Tool Calls | Trace | |
| 38 | +|---|---|---:|---:|---:|---| |
| 39 | +| [sgonly_bustub-hyperloglog-impl-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_bustub-hyperloglog-impl-001.md) | `failed` | 0.000 | 0.100 | 120 | traj, tx | |
| 40 | +| [sgonly_cgen-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_cgen-deps-install-001.md) | `failed` | 0.000 | 0.929 | 14 | traj, tx | |
| 41 | +| [sgonly_codecoverage-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_codecoverage-deps-install-001.md) | `failed` | 0.000 | 0.654 | 26 | traj, tx | |
| 42 | +| [sgonly_dotenv-expand-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_dotenv-expand-deps-install-001.md) | `failed` | 0.000 | 0.692 | 13 | traj, tx | |
| 43 | +| [sgonly_dotnetkoans-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_dotnetkoans-deps-install-001.md) | `failed` | 0.000 | 0.632 | 38 | traj, tx | |
| 44 | +| [sgonly_eslint-markdown-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_eslint-markdown-deps-install-001.md) | `failed` | 0.000 | 0.711 | 38 | traj, tx | |
| 45 | +| [sgonly_pcap-parser-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_pcap-parser-deps-install-001.md) | `failed` | 0.000 | 0.360 | 25 | traj, tx | |
| 46 | +| [sgonly_servo-scrollend-event-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_servo-scrollend-event-feat-001.md) | `failed` | 0.000 | 0.544 | 79 | traj, tx | |
| 47 | +| [sgonly_vscode-stale-diagnostics-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_vscode-stale-diagnostics-feat-001.md) | `failed` | 0.000 | 0.625 | 32 | traj, tx | |
| 48 | +| [sgonly_camel-fix-protocol-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_camel-fix-protocol-feat-001.md) | `passed` | 0.130 | 0.500 | 40 | traj, tx | |
| 49 | +| [sgonly_envoy-grpc-server-impl-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_envoy-grpc-server-impl-001.md) | `passed` | 0.440 | 0.938 | 32 | traj, tx | |
| 50 | +| [sgonly_flink-pricing-window-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_flink-pricing-window-feat-001.md) | `passed` | 0.510 | 0.565 | 46 | traj, tx | |
| 51 | +| [sgonly_flipt-dep-refactor-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_flipt-dep-refactor-001.md) | `passed` | 0.030 | 0.418 | 122 | traj, tx | |
| 52 | +| [sgonly_flipt-flagexists-refactor-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_flipt-flagexists-refactor-001.md) | `passed` | 0.750 | 0.352 | 71 | traj, tx | |
| 53 | +| [sgonly_iamactionhunter-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_iamactionhunter-deps-install-001.md) | `passed` | 1.000 | 0.390 | 41 | traj, tx | |
| 54 | +| [sgonly_k8s-noschedule-taint-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_k8s-noschedule-taint-feat-001.md) | `passed` | 0.500 | 0.279 | 104 | traj, tx | |
| 55 | +| [sgonly_k8s-runtime-object-impl-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_k8s-runtime-object-impl-001.md) | `passed` | 0.120 | 0.831 | 77 | traj, tx | |
| 56 | +| [sgonly_k8s-score-normalizer-refac-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_k8s-score-normalizer-refac-001.md) | `passed` | 0.780 | 0.385 | 39 | traj, tx | |
| 57 | +| [sgonly_kafka-batch-accumulator-refac-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_kafka-batch-accumulator-refac-001.md) | `passed` | 0.680 | 0.333 | 15 | traj, tx | |
| 58 | +| [sgonly_python-http-class-naming-refac-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_python-http-class-naming-refac-001.md) | `passed` | 0.880 | 0.293 | 58 | traj, tx | |
| 59 | +| [sgonly_rust-subtype-relation-refac-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_rust-subtype-relation-refac-001.md) | `passed` | 0.710 | 0.464 | 28 | traj, tx | |
| 60 | +| [sgonly_similar-asserts-deps-install-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_similar-asserts-deps-install-001.md) | `passed` | 1.000 | 0.500 | 34 | traj, tx | |
| 61 | +| [sgonly_strata-cds-tranche-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_strata-cds-tranche-feat-001.md) | `passed` | 0.280 | 0.567 | 30 | traj, tx | |
| 62 | +| [sgonly_strata-fx-european-refac-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_strata-fx-european-refac-001.md) | `passed` | 0.800 | 0.312 | 48 | traj, tx | |
| 63 | +| [sgonly_tensorrt-mxfp4-quant-feat-001](../tasks/build_haiku_20260223_124805--mcp-remote-direct--sgonly_tensorrt-mxfp4-quant-feat-001.md) | `passed` | 0.700 | 0.537 | 41 | traj, tx | |
0 commit comments