jeremyeder · jeremyeder · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/.github/workflows/continuous-learning.yml b/.github/workflows/continuous-learning.yml
@@ -106,12 +106,12 @@ jobs:
 
           # Commit and push
           git add .claude/skills
-          git commit -m "feat: add discovered skills from continuous learning
-
-Automatically extracted skills from latest assessment.
-
-🤖 Generated with Claude Code
-Co-Authored-By: Claude <noreply@anthropic.com>"
+          git commit -m "feat: add discovered skills from continuous learning" \
+                     -m "" \
+                     -m "Automatically extracted skills from latest assessment." \
+                     -m "" \
+                     -m "🤖 Generated with Claude Code" \
+                     -m "Co-Authored-By: Claude <noreply@anthropic.com>"
 
           git push origin "$BRANCH_NAME"
 

diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml
@@ -34,57 +34,60 @@ jobs:
             const revisionReason = process.env.REVISION_REASON;
             const actor = process.env.ACTOR;
 
+            const bodyLines = [
+              '## Documentation Update Request',
+              '',
+              '**Reason**: ' + revisionReason,
+              '',
+              '**Triggered by**: @' + actor,
+              '',
+              '**Action Required**:',
+              'This workflow creates a reminder to update the GitHub Pages documentation using the `github-pages-docs` agent in Claude Code.',
+              '',
+              '### Steps to Update Documentation:',
+              '',
+              '1. **Open Claude Code** in this repository',
+              '2. **Run the command**:',
+              '   ```',
+              '   Use the @agent-github-pages-docs to revise all documentation in docs/ based on:',
+              '   - Latest CLAUDE.md updates',
+              '   - Bootstrap feature implementation status',
+              '   - Recent code changes',
+              '   - ' + revisionReason,
+              '   ```',
+              '3. **Review the changes** generated by the agent',
+              '4. **Commit and push** to this branch or create a PR',
+              '5. **Close this issue** when complete',
+              '',
+              '### Why Manual Update?',
+              '',
+              'The `github-pages-docs` agent requires Claude Code\'s specialized documentation capabilities. We\'re tracking this issue to ensure documentation stays synchronized with codebase changes.',
+              '',
+              '### Future Automation',
+              '',
+              'See `BACKLOG.md` - "Documentation Source Truth and Cascade System" (P2) for plans to automate this workflow.',
+              '',
+              '---',
+              '',
+              '**Related Files**:',
+              '- `docs/` - GitHub Pages documentation',
+              '- `CLAUDE.md` - Project guide (source of truth)',
+              '- `README.md` - User-facing documentation',
+              '- `agent-ready-codebase-attributes.md` - Research report (source of truth)',
+              '',
+              '**Labels**: documentation, automation-needed, agent-task'
+            ];
+
             const issue = await github.rest.issues.create({
               owner: context.repo.owner,
               repo: context.repo.repo,
               title: 'docs: Update GitHub Pages documentation',
-              body: `## Documentation Update Request
-
-**Reason**: ${revisionReason}
-
-**Triggered by**: @${actor}
-
-**Action Required**:
-This workflow creates a reminder to update the GitHub Pages documentation using the \`github-pages-docs\` agent in Claude Code.
-
-### Steps to Update Documentation:
-
-1. **Open Claude Code** in this repository
-2. **Run the command**:
-   \`\`\`
-   Use the @agent-github-pages-docs to revise all documentation in docs/ based on:
-   - Latest CLAUDE.md updates
-   - Bootstrap feature implementation status
-   - Recent code changes
-   - ${revisionReason}
-   \`\`\`
-3. **Review the changes** generated by the agent
-4. **Commit and push** to this branch or create a PR
-5. **Close this issue** when complete
-
-### Why Manual Update?
-
-The \`github-pages-docs\` agent requires Claude Code's specialized documentation capabilities. We're tracking this issue to ensure documentation stays synchronized with codebase changes.
-
-### Future Automation
-
-See \`BACKLOG.md\` - "Documentation Source Truth and Cascade System" (P2) for plans to automate this workflow.
-
----
-
-**Related Files**:
-- \`docs/\` - GitHub Pages documentation
-- \`CLAUDE.md\` - Project guide (source of truth)
-- \`README.md\` - User-facing documentation
-- \`agent-ready-codebase-attributes.md\` - Research report (source of truth)
-
-**Labels**: documentation, automation-needed, agent-task
-              `,
+              body: bodyLines.join('\n'),
               labels: ['documentation', 'automation-needed', 'agent-task']
             });
 
             core.setOutput('issue_number', issue.data.number);
-            core.notice(`Created documentation update issue #${issue.data.number}`);
+            core.notice('Created documentation update issue #' + issue.data.number);
             return issue.data.number;
 
       - name: Check for recent source file changes
@@ -100,11 +103,11 @@ See \`BACKLOG.md\` - "Documentation Source Truth and Cascade System" (P2) for pl
             || true)
 
           if [ -n "$CHANGES" ]; then
-            echo "has_changes=true" >> $GITHUB_OUTPUT
+            echo "has_changes=true" >> "$GITHUB_OUTPUT"
             # Save to file to avoid injection issues
             echo "$CHANGES" > /tmp/recent_changes.txt
           else
-            echo "has_changes=false" >> $GITHUB_OUTPUT
+            echo "has_changes=false" >> "$GITHUB_OUTPUT"
           fi
 
       - name: Comment with recent changes
@@ -115,20 +118,23 @@ See \`BACKLOG.md\` - "Documentation Source Truth and Cascade System" (P2) for pl
             const fs = require('fs');
             const recentChanges = fs.readFileSync('/tmp/recent_changes.txt', 'utf-8');
 
+            const commentLines = [
+              '### Recent Changes to Source Files',
+              '',
+              'The following commits may affect documentation:',
+              '',
+              '```',
+              recentChanges.trim(),
+              '```',
+              '',
+              'Please ensure the documentation agent incorporates these changes.'
+            ];
+
             await github.rest.issues.createComment({
               owner: context.repo.owner,
               repo: context.repo.repo,
               issue_number: ${{ steps.create-issue.outputs.issue_number }},
-              body: `### Recent Changes to Source Files
-
-The following commits may affect documentation:
-
-\`\`\`
-${recentChanges}
-\`\`\`
-
-Please ensure the documentation agent incorporates these changes.
-              `
+              body: commentLines.join('\n')
             });
 
       - name: Summary

diff --git a/.markdown-link-check.json b/.markdown-link-check.json
@@ -17,6 +17,9 @@
     },
     {
       "pattern": "^\\{\\{.*\\}\\}$"
+    },
+    {
+      "pattern": "^/agentready/"
     }
   ],
   "timeout": "20s",

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,6 +12,11 @@ repos:
       - id: check-json
       - id: detect-private-key
 
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.4
+    hooks:
+      - id: actionlint
+
   - repo: https://github.com/psf/black
     rev: 24.1.1
     hooks:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,17 @@
+## [2.14.1](https://github.com/ambient-code/agentready/compare/v2.14.0...v2.14.1) (2025-12-05)
+
+
+### Bug Fixes
+
+* resolve YAML syntax error in continuous-learning workflow ([#172](https://github.com/ambient-code/agentready/issues/172)) ([3d40fcc](https://github.com/ambient-code/agentready/commit/3d40fcccd4e8d722303d322716454869ca7db9d0))
+
+# [2.14.0](https://github.com/ambient-code/agentready/compare/v2.13.0...v2.14.0) (2025-12-05)
+
+
+### Features
+
+* container support ([#171](https://github.com/ambient-code/agentready/issues/171)) ([c6874ea](https://github.com/ambient-code/agentready/commit/c6874ea035775ac86ef5012bbfdf52e7b96f556f))
+
 # [2.13.0](https://github.com/ambient-code/agentready/compare/v2.12.3...v2.13.0) (2025-12-04)
 
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -2,15 +2,15 @@
 
 **Purpose**: Assess repositories against agent-ready best practices and generate actionable reports.
 
-**Last Updated**: 2025-12-04
+**Last Updated**: 2025-12-05
 
 ---
 
 ## Overview
 
 AgentReady is a Python CLI tool that evaluates repositories against a comprehensive set of carefully researched attributes that make codebases more effective for AI-assisted development. It generates interactive HTML reports, version-control friendly Markdown reports, and machine-readable JSON output.
 
-**Current Status**: v2.13.0 - Core assessment engine complete, most essential assessors implemented, LLM-powered learning, research report management
+**Current Status**: v2.14.1 - Core assessment engine complete, most essential assessors implemented, LLM-powered learning, research report management
 
 **Self-Assessment Score**: 80.0/100 (Gold) - See `examples/self-assessment/`
 
@@ -192,6 +192,133 @@ class MyAssessor(BaseAssessor):
 
 ---
 
+## Terminal-Bench Eval Harness
+
+**Purpose**: Empirically measure the impact of AgentReady assessors on Terminal-Bench performance through systematic A/B testing.
+
+### Overview
+
+The eval harness tests each assessor independently to measure its specific impact on agentic development benchmarks. This provides evidence-based validation of AgentReady's recommendations.
+
+**Architecture**:
+1. **Baseline**: Run Terminal-Bench on unmodified repository (5 iterations)
+2. **Per-Assessor Test**: Apply single assessor remediation → measure delta
+3. **Aggregate**: Rank assessors by impact, calculate tier statistics
+4. **Dashboard**: Generate interactive visualization for GitHub Pages
+
+**Components**:
+- `src/agentready/services/eval_harness/` - Core services (TbenchRunner, BaselineEstablisher, AssessorTester, ResultsAggregator, DashboardGenerator)
+- `src/agentready/models/eval_harness.py` - Data models (TbenchResult, BaselineMetrics, AssessorImpact, EvalSummary)
+- `src/agentready/cli/eval_harness.py` - CLI commands (baseline, test-assessor, run-tier, summarize, dashboard)
+- `docs/tbench.md` - Interactive dashboard with Chart.js
+- `docs/tbench/methodology.md` - Detailed statistical methodology
+
+### Running Evaluations
+
+```bash
+# 1. Establish baseline (run Terminal-Bench 5 times on unmodified repo)
+agentready eval-harness baseline --repo . --iterations 5
+
+# 2. Test single assessor
+agentready eval-harness test-assessor \
+  --assessor-id claude_md_file \
+  --iterations 5
+
+# 3. Test all Tier 1 assessors
+agentready eval-harness run-tier --tier 1 --iterations 5
+
+# 4. Aggregate results (rank by impact, calculate statistics)
+agentready eval-harness summarize --verbose
+
+# 5. Generate dashboard data files for GitHub Pages
+agentready eval-harness dashboard --verbose
+```
+
+### File Structure
+
+```
+.agentready/eval_harness/          # Results storage (gitignored)
+├── baseline/
+│   ├── run_001.json              # Individual tbench runs
+│   ├── run_002.json
+│   ├── ...
+│   └── summary.json              # BaselineMetrics
+├── assessors/
+│   ├── claude_md_file/
+│   │   ├── finding.json          # Assessment result
+│   │   ├── fixes_applied.log     # Remediation log
+│   │   ├── run_001.json          # Post-remediation runs
+│   │   ├── ...
+│   │   └── impact.json           # AssessorImpact metrics
+│   └── ...
+└── summary.json                   # EvalSummary (ranked impacts)
+
+docs/_data/tbench/                 # Dashboard data (committed)
+├── summary.json
+├── ranked_assessors.json
+├── tier_impacts.json
+├── baseline.json
+└── stats.json
+```
+
+### Statistical Methods
+
+**Significance Criteria** (both required):
+- **P-value < 0.05**: 95% confidence (two-sample t-test)
+- **|Cohen's d| > 0.2**: Meaningful effect size
+
+**Effect Size Interpretation**:
+- **0.2 ≤ |d| < 0.5**: Small effect
+- **0.5 ≤ |d| < 0.8**: Medium effect
+- **|d| ≥ 0.8**: Large effect
+
+### Current Status
+
+**Phase 1 (MVP)**: Mocked Terminal-Bench integration ✅
+- All core services implemented and tested
+- CLI commands functional
+- Dashboard with Chart.js visualizations
+- 6 CLI unit tests + 5 integration tests passing
+
+**Phase 2 (Planned)**: Real Terminal-Bench integration
+- Harbor framework client
+- Actual benchmark submissions
+- Leaderboard integration
+
+### Testing
+
+```bash
+# Run eval harness tests
+pytest tests/unit/test_eval_harness*.py -v
+pytest tests/integration/test_eval_harness_e2e.py -v
+```
+
+**Test Coverage**:
+- Models: 90-95%
+- Services: 85-90%
+- CLI: 100% (help commands validated)
+- Integration: End-to-end workflow tested
+
+### Troubleshooting
+
+**Issue**: `FileNotFoundError: Baseline directory not found`
+**Solution**: Run `agentready eval-harness baseline` first
+
+**Issue**: `No assessor results found`
+**Solution**: Run `agentready eval-harness test-assessor` or `run-tier` first
+
+**Issue**: Mocked scores seem unrealistic
+**Solution**: This is expected in Phase 1 (mocked mode) - real integration coming in Phase 2
+
+### Documentation
+
+- **User Guide**: `docs/eval-harness-guide.md` - Step-by-step tutorials
+- **Methodology**: `docs/tbench/methodology.md` - Statistical methods explained
+- **Dashboard**: `docs/tbench.md` - Interactive results visualization
+- **Plan**: `.claude/plans/quirky-squishing-plum.md` - Implementation roadmap
+
+---
+
 ## Project Structure
 
 ```
@@ -390,6 +517,6 @@ Use the @agent-github-pages-docs to [action] based on:
 
 ---
 
-**Last Updated**: 2025-12-04 by Jeremy Eder
-**AgentReady Version**: 2.13.0
+**Last Updated**: 2025-12-05 by Jeremy Eder
+**AgentReady Version**: 2.14.1
 **Self-Assessment**: 80.0/100 (Gold) ✨
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
@@ -226,8 +226,8 @@ GEM
       jekyll-feed (~> 0.9)
       jekyll-seo-tag (~> 2.1)
     minitest (5.25.4)
-    nokogiri (1.13.10)
-      mini_portile2 (~> 2.8.0)
+    nokogiri (1.18.9)
+      mini_portile2 (~> 2.8.2)
       racc (~> 1.4)
     octokit (4.25.1)
       faraday (>= 1, < 3)
@@ -278,4 +278,4 @@ DEPENDENCIES
   webrick
 
 BUNDLED WITH
-   1.17.2
+   2.5.23
diff --git a/docs/_config.yml b/docs/_config.yml
@@ -41,6 +41,8 @@ navigation:
     url: /roadmaps
   - title: Attributes
     url: /attributes
+  - title: Terminal-Bench
+    url: /tbench
   - title: API Reference
     url: /api-reference
   - title: Examples
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,6 +17,9 @@ @@
         },
         {
           "pattern": "^\\{\\{.*\\}\\}$"
+        },
+        {
+          "pattern": "^/agentready/"
         }
       ],
       "timeout": "20s",
@@ Expand Down @@