hex-inc · jkillian · Jul 21, 2025 · May 29, 2025 · May 29, 2025 · May 29, 2025
diff --git a/.github/scripts/format_benchmark.py b/.github/scripts/format_benchmark.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+"""Format benchmark comparison output with visual indicators for GitHub markdown."""
+
+import re
+import sys
+
+
+def format_benchmark_output(content):
+    """Add visual formatting to benchmark comparison output."""
+    lines = content.split("\n")
+    formatted_lines = []
+
+    for line in lines:
+        # Skip empty lines and headers
+        if not line.strip() or line.startswith("|") and "---" in line:
+            formatted_lines.append(line)
+            continue
+
+        # Process benchmark result lines
+        if "|" in line and ("faster" in line or "slower" in line):
+            # Extract the speed factor (e.g., "1.23x faster" or "1.10x slower")
+            speed_match = re.search(r"(\d+\.\d+)x\s+(faster|slower)", line)
+            if speed_match:
+                factor = float(speed_match.group(1))
+                direction = speed_match.group(2)
+
+                # Add visual indicators based on performance
+                if direction == "faster":
+                    # Green indicator for faster
+                    if factor >= 2.0:
+                        indicator = "🟢🟢"  # Double green for 2x+ faster
+                    elif factor >= 1.1:
+                        indicator = "🟢"  # Single green for 1.1x+ faster
+                    else:
+                        indicator = "⚪"  # White for marginal improvement
+                    formatted_text = f"{indicator} **{speed_match.group(0)}**"
+                else:
+                    # Red indicator for slower
+                    if factor >= 2.0:
+                        indicator = "🔴🔴"  # Double red for 2x+ slower
+                    elif factor >= 1.1:
+                        indicator = "🔴"  # Single red for 1.1x+ slower
+                    else:
+                        indicator = "⚪"  # White for marginal slowdown
+                    formatted_text = f"{indicator} **{speed_match.group(0)}**"
+
+                # Replace the original text with formatted version
+                line = line.replace(speed_match.group(0), formatted_text)
+            elif "not significant" in line:
+                # Add neutral indicator for non-significant changes
+                line = re.sub(r"not significant", "⚪ not significant", line)
+
+        formatted_lines.append(line)
+
+    return "\n".join(formatted_lines)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python format_benchmark.py <input_file>")
+        sys.exit(1)
+
+    input_file = sys.argv[1]
+
+    try:
+        with open(input_file, "r") as f:
+            content = f.read()
+
+        formatted = format_benchmark_output(content)
+        print(formatted)
+
+    except FileNotFoundError:
+        print(f"Error: File '{input_file}' not found")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/benchmark-sqlglot.yml b/.github/workflows/benchmark-sqlglot.yml
@@ -0,0 +1,91 @@
+name: Benchmark pull requests
+
+on:
+  issue_comment:
+    types: [created, edited, deleted]
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  run-benchmark:
+    name: run benchmark
+    runs-on: ubuntu-latest
+    if: |
+      (github.event_name == 'issue_comment' && 
+       contains(github.event.comment.body, '/benchmark') &&
+       github.event.issue.pull_request) ||
+      (github.event_name == 'pull_request' &&
+       contains(github.event.pull_request.body, '/benchmark'))
+    steps:
+      - name: Checkout PR branch
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Needed to fetch main branch too
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.13
+      - name: Create a virtual environment
+        run: |
+          python -m venv .venv
+          source ./.venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install pyperf
+      - name: Run benchmark on PR branch
+        run: |
+          source ./.venv/bin/activate
+          make install-dev
+          make install-dev-rs-release
+          python benchmarks/parse.py --quiet --output bench_parse_pr.json
+          python benchmarks/optimize.py --quiet --fast --output bench_optimize_pr.json
+      - name: Checkout main branch into subdir
+        run: |
+          git fetch origin main
+          git worktree add main-branch origin/main
+      - name: Reset virtual environment
+        run: |
+          rm -rf .venv
+          python -m venv .venv
+          source ./.venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install pyperf
+      - name: Run benchmark on main branch
+        run: |
+          source ./.venv/bin/activate
+          cd main-branch
+          make install-dev
+          make install-dev-rs-release
+          python benchmarks/parse.py --quiet --output ../bench_parse_main.json
+          python benchmarks/optimize.py --quiet --fast --output ../bench_optimize_main.json
+          cd ..
+      - name: Compare benchmarks and save results
+        run: |
+          source ./.venv/bin/activate
+          python -m pyperf compare_to bench_parse_main.json bench_parse_pr.json --table --table-format=md > bench_parse_comparison_raw.txt
+          python -m pyperf compare_to bench_optimize_main.json bench_optimize_pr.json --table --table-format=md > bench_optimize_comparison_raw.txt
+
+          # Format with colors
+          python .github/scripts/format_benchmark.py bench_parse_comparison_raw.txt > bench_parse_comparison.txt
+          python .github/scripts/format_benchmark.py bench_optimize_comparison_raw.txt > bench_optimize_comparison.txt
+      - name: Combine benchmark outputs
+        run: |
+          echo "## Benchmark Results" > combined_benchmarks.md
+          echo "" >> combined_benchmarks.md
+          echo "**Legend:**" >> combined_benchmarks.md
+          echo "- 🟢🟢 = 2x+ faster" >> combined_benchmarks.md
+          echo "- 🟢 = 1.1x - 2x faster" >> combined_benchmarks.md
+          echo "- ⚪ = No significant change (< 1.1x)" >> combined_benchmarks.md
+          echo "- 🔴 = 1.1x - 2x slower" >> combined_benchmarks.md
+          echo "- 🔴🔴 = 2x+ slower" >> combined_benchmarks.md
+          echo "" >> combined_benchmarks.md
+          echo "### Parsing Benchmark" >> combined_benchmarks.md
+          cat bench_parse_comparison.txt >> combined_benchmarks.md
+          echo -e "\n---\n" >> combined_benchmarks.md
+          echo "### Optimization Benchmark" >> combined_benchmarks.md
+          cat bench_optimize_comparison.txt >> combined_benchmarks.md
+      - name: Comment on PR for parse benchmark results
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          issue-number: ${{ github.event.issue.number || github.event.pull_request.number }}
+          body-file: combined_benchmarks.md
@@ -9,6 +9,10 @@
 jobs:
  run-benchmark:
    name: run benchmark
+    permissions:
+      contents: read
+      issues: write
+      pull-requests: write
    runs-on: ubuntu-latest
    if: |
      (github.event_name == 'issue_comment' && 
@@ -9,6 +9,10 @@
 jobs:
  run-benchmark:
    name: run benchmark
+    permissions:
+      contents: read
+      issues: write
+      pull-requests: write
    runs-on: ubuntu-latest
    if: |
      (github.event_name == 'issue_comment' && 
diff --git a/.github/workflows/rust-bench.yml → .github/workflows/benchmark-sqlglotrs.yml b/.github/workflows/rust-bench.yml → .github/workflows/benchmark-sqlglotrs.yml
@@ -1,8 +1,10 @@
+name: Benchmark Rust tokenizer changes
+
 on:
   pull_request:
     paths:
       - 'sqlglotrs/**'
-name: benchmark pull requests
+
 jobs:
   run-benchmark:
     name: run benchmark

diff --git a/.github/workflows/python-publish.yml → .github/workflows/package-publish.yml b/.github/workflows/python-publish.yml → .github/workflows/package-publish.yml
@@ -1,4 +1,4 @@
-name: Publish Python Release to PyPI
+name: Publish sqlglot and sqlglotrs to PyPI
 
 on:
   push:
@@ -55,13 +55,13 @@ jobs:
     - uses: actions/setup-python@v5
       if: matrix.os == 'windows'
       with:
-        python-version: '3.7'
+        python-version: '3.9'
         architecture: ${{ matrix.python-architecture || 'x64' }}
     - name: Build wheels
       uses: PyO3/maturin-action@v1
       with:
         target: ${{ matrix.target }}
-        args: --release --out dist --interpreter 3.7 3.8 3.9 3.10 3.11 3.12 3.13
+        args: --release --out dist --interpreter 3.9 3.10 3.11 3.12 3.13
         sccache: 'true'
         manylinux: auto
         working-directory: ./sqlglotrs
@@ -122,15 +122,15 @@ jobs:
         python -m venv .venv
         source ./.venv/bin/activate
         python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+        pip install build twine
         make install-dev
     - name: Build and publish
       env:
         TWINE_USERNAME: __token__
         TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
       run: |
         source ./.venv/bin/activate
-        python setup.py sdist bdist_wheel
+        python -m build
         twine upload dist/*
     - name: Update API docs
       run: |

diff --git a/.github/workflows/python-package.yml → .github/workflows/package-test.yml b/.github/workflows/python-package.yml → .github/workflows/package-test.yml
@@ -1,22 +1,24 @@
-name: Test and Lint Python Package
+name: Run tests and linter checks
 
 on:
   push:
     branches: [ main ]
   pull_request:
     branches: [ main ]
+
 jobs:
-  build:
+  run-checks:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
+        cache: pip
     - name: Create a virtual environment
       run: |
         python -m venv .venv
@@ -25,11 +27,7 @@ jobs:
         source ./.venv/bin/activate
         python -m pip install --upgrade pip
         make install-dev
-    - name: Run checks (linter, code style, tests)
+    - name: Run tests and linter checks
       run: |
         source ./.venv/bin/activate
-        if [[ ${{ matrix.python-version }} == "3.7" ]]; then
-          make test test-rs
-        else
-          make check
-        fi
+        make check
diff --git a/.gitignore b/.gitignore
@@ -106,6 +106,7 @@ celerybeat.pid
 .venv
 env/
 venv/
+venv*/
 ENV/
 env.bak/
 venv.bak/
-Original file line number
+Diff line change
@@ Expand Up / @@ -106,6 +106,7 @@ celerybeat.pid @@
     .venv
     env/
     venv/
+    venv*/
     ENV/
     env.bak/
     venv.bak/
@@ Expand Down @@