brokle-ai · hashirventhodi · Sep 15, 2025 · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -0,0 +1,310 @@
+name: Performance Benchmarks
+
+on:
+  push:
+    branches: [ main, develop ]
+    paths:
+      - 'brokle/auto_instrumentation/**'
+      - 'benchmark_instrumentation.py'
+      - 'scripts/benchmark_ci.py'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'brokle/auto_instrumentation/**'
+      - 'benchmark_instrumentation.py'
+      - 'scripts/benchmark_ci.py'
+  schedule:
+    # Run performance tests daily at 2 AM UTC
+    - cron: '0 2 * * *'
+  workflow_dispatch:
+    inputs:
+      benchmark_type:
+        description: 'Benchmark type to run'
+        required: false
+        default: 'full'
+        type: choice
+        options:
+        - full
+        - quick
+        - memory
+
+jobs:
+  performance-benchmarks:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+      fail-fast: false
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip dependencies
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+        pip install pytest pytest-asyncio pytest-benchmark
+        # Install optional dependencies for comprehensive testing
+        pip install ulid-py psycopg2-binary || true
+
+    - name: Create benchmark results directory
+      run: mkdir -p benchmark_results
+
+    - name: Run performance benchmarks
+      id: benchmarks
+      run: |
+        echo "Running performance benchmarks..."
+        python scripts/benchmark_ci.py benchmark_results/
+        echo "benchmark_status=$?" >> $GITHUB_OUTPUT
+      continue-on-error: true
+
+    - name: Upload benchmark results
+      uses: actions/upload-artifact@v3
+      if: always()
+      with:
+        name: benchmark-results-py${{ matrix.python-version }}
+        path: benchmark_results/
+        retention-days: 30
+
+    - name: Display benchmark summary
+      if: always()
+      run: |
+        echo "📊 Benchmark Results Summary"
+        echo "============================"
+
+        if [ -f benchmark_results/benchmark_report_*.txt ]; then
+          echo "Latest benchmark report:"
+          cat benchmark_results/benchmark_report_*.txt | head -50
+        else
+          echo "No benchmark report found"
+        fi
+
+    - name: Check performance thresholds
+      if: steps.benchmarks.outputs.benchmark_status != '0'
+      run: |
+        echo "❌ Performance benchmarks failed!"
+        echo "Some operations exceeded performance thresholds."
+        echo "Please review the benchmark results and optimize if necessary."
+        exit 1
+
+    - name: Performance regression check
+      if: github.event_name == 'pull_request'
+      run: |
+        echo "🔍 Checking for performance regressions..."
+        # In a real implementation, this would compare current results
+        # with baseline results from the main branch
+        echo "Performance regression check would run here"
+
+  performance-comparison:
+    runs-on: ubuntu-latest
+    needs: performance-benchmarks
+    if: github.event_name == 'pull_request'
+
+    steps:
+    - name: Download benchmark results
+      uses: actions/download-artifact@v3
+      with:
+        name: benchmark-results-py3.10
+        path: ./current-results
+
+    - name: Compare with baseline
+      run: |
+        echo "📈 Performance Comparison"
+        echo "========================"
+        echo "This step would compare current performance with baseline"
+        echo "and flag any significant regressions."
+
+        # In a real implementation, this would:
+        # 1. Download baseline results from main branch
+        # 2. Compare key metrics
+        # 3. Generate comparison report
+        # 4. Comment on PR if regressions found
+
+  performance-report:
+    runs-on: ubuntu-latest
+    needs: performance-benchmarks
+    if: always() && github.event_name == 'schedule'
+
+    steps:
+    - name: Download all benchmark results
+      uses: actions/download-artifact@v3
+
+    - name: Generate performance trend report
+      run: |
+        echo "📊 Performance Trend Report"
+        echo "============================"
+        echo "Generating daily performance trend report..."
+
+        # In a real implementation, this would:
+        # 1. Collect results from all Python versions
+        # 2. Generate trend charts
+        # 3. Send report to team
+
+        ls -la
+        find . -name "*.json" -exec echo "Found: {}" \;
+
+    - name: Archive trend data
+      run: |
+        # In a real implementation, this would store trend data
+        # in a database or data warehouse for long-term analysis
+        echo "Archiving performance trend data..."
+
+  memory-profiling:
+    runs-on: ubuntu-latest
+    if: github.event.inputs.benchmark_type == 'memory' || github.event_name == 'schedule'
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.10"
+
+    - name: Install memory profiling tools
+      run: |
+        pip install memory-profiler pympler
+        pip install -e .
+
+    - name: Run memory profiling
+      run: |
+        echo "🧠 Memory Profiling"
+        echo "=================="
+        python -c "
+        import brokle.auto_instrumentation as brokle_ai
+        from pympler import tracker
+
+        # Track memory usage
+        tr = tracker.SummaryTracker()
+
+        # Perform operations
+        for i in range(100):
+            brokle_ai.get_status()
+            brokle_ai.get_health_report()
+            if i % 10 == 0:
+                brokle_ai.reset_all_errors()
+
+        print('Memory usage after 100 operations:')
+        tr.print_diff()
+        "
+
+  load-testing:
+    runs-on: ubuntu-latest
+    if: github.event.inputs.benchmark_type == 'full' || github.event_name == 'schedule'
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.10"
+
+    - name: Install load testing dependencies
+      run: |
+        pip install -e .
+
+    - name: Run high-load simulation
+      run: |
+        echo "🚛 High Load Simulation"
+        echo "======================"
+        python -c "
+        import time
+        import threading
+        from concurrent.futures import ThreadPoolExecutor
+        import brokle.auto_instrumentation as brokle_ai
+
+        # Reset state
+        brokle_ai.reset_all_errors()
+
+        # Simulate high load
+        def worker():
+            for i in range(1000):
+                brokle_ai.get_status()
+                if i % 100 == 0:
+                    brokle_ai.get_health_report()
+
+        start_time = time.time()
+
+        # Run with multiple threads
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            futures = [executor.submit(worker) for _ in range(5)]
+            for future in futures:
+                future.result()
+
+        end_time = time.time()
+
+        print(f'Completed high-load test in {end_time - start_time:.2f} seconds')
+        print(f'Total operations: 25,000 (5 threads × 5,000 ops each)')
+        print(f'Average ops/sec: {25000 / (end_time - start_time):.0f}')
+
+        # Check final health
+        health = brokle_ai.get_health_report()
+        print(f'Final health score: {health[\"overall_health\"][\"score\"]}%')
+
+        if health['overall_health']['score'] < 80:
+            print('❌ Health degraded under high load!')
+            exit(1)
+        else:
+            print('✅ Maintained good health under high load')
+        "
+
+  benchmark-comment:
+    runs-on: ubuntu-latest
+    needs: performance-benchmarks
+    if: github.event_name == 'pull_request' && always()
+
+    steps:
+    - name: Download benchmark results
+      uses: actions/download-artifact@v3
+      with:
+        name: benchmark-results-py3.10
+        path: ./results
+
+    - name: Comment PR with results
+      uses: actions/github-script@v6
+      with:
+        script: |
+          const fs = require('fs');
+          const path = require('path');
+
+          // Find the latest benchmark report
+          const resultsDir = './results';
+          const files = fs.readdirSync(resultsDir);
+          const reportFile = files.find(f => f.startsWith('benchmark_report_'));
+
+          if (reportFile) {
+            const reportPath = path.join(resultsDir, reportFile);
+            const report = fs.readFileSync(reportPath, 'utf8');
+
+            const body = `## 📊 Performance Benchmark Results
+
+            \`\`\`
+            ${report}
+            \`\`\`
+
+            *This comment was automatically generated by the performance benchmark workflow.*
+            `;
+
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: body
+            });
+          } else {
+            console.log('No benchmark report found');
+          }