ci: Set up CodSpeed (#15537)

pola-rs · Apr 8, 2024 · 44f1097 · 44f1097
1 parent 0af4164
commit 44f1097
Show file tree

Hide file tree

Showing 16 changed files with 275 additions and 364 deletions.
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -52,18 +52,18 @@ jobs:
         id: cache-data
         uses: actions/cache/restore@v4
         with:
-          path: py-polars/tests/benchmark/G1_1e7_1e2_5_0.csv
+          path: py-polars/tests/benchmark/data/G1_1e7_1e2_5_0.csv
           key: benchmark-data
 
       - name: Set up R
         if: steps.cache-data.outputs.cache-hit != 'true'
         uses: r-lib/actions/setup-r@v2
         with:
-          r-version: '3.5.3'
+          r-version: '4.3.3'
 
       - name: Generate data
         if: steps.cache-data.outputs.cache-hit != 'true'
-        working-directory: py-polars/tests/benchmark
+        working-directory: py-polars/tests/benchmark/data
         run: |
           Rscript -e 'install.packages("data.table", repos="https://cloud.r-project.org")'
           Rscript groupby-datagen.R 1e7 1e2 5 0
@@ -72,7 +72,7 @@ jobs:
         if: github.ref_name == 'main'
         uses: actions/cache/save@v4
         with:
-          path: py-polars/tests/benchmark/G1_1e7_1e2_5_0.csv
+          path: py-polars/tests/benchmark/data/G1_1e7_1e2_5_0.csv
           key: ${{ steps.cache-data.outputs.cache-primary-key }}
 
       - name: Set up Rust
@@ -93,18 +93,12 @@ jobs:
         working-directory: py-polars
         run: maturin develop --release -- -C codegen-units=8 -C lto=thin -C target-cpu=native
 
-      - name: Run H2O AI database benchmark - on strings
-        working-directory: py-polars/tests/benchmark
-        run: python run_h2oai_benchmark.py on_strings
-
-      - name: Run H2O AI database benchmark - on categoricals
-        working-directory: py-polars/tests/benchmark
-        run: python run_h2oai_benchmark.py
-
-      - name: Run various benchmark tests
-        working-directory: py-polars
-        run: pytest -m release --durations 0 -v
+      - name: Run benchmark tests
+        uses: CodSpeedHQ/action@v2
+        with:
+          working-directory: py-polars
+          run: pytest -m benchmark --codspeed -v
 
       - name: Run non-benchmark tests
         working-directory: py-polars
-        run: pytest -m 'not release and not debug' -n auto --dist loadgroup
+        run: pytest -m 'not benchmark and not debug' -n auto --dist loadgroup
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
@@ -86,13 +86,13 @@ jobs:
         run: maturin develop
 
       - name: Run Python tests
-        run: pytest --cov -n auto --dist loadgroup -m "not release and not docs" --cov-report xml:main.xml
+        run: pytest --cov -n auto --dist loadgroup -m "not benchmark and not docs" --cov-report xml:main.xml
         continue-on-error: true
 
       - name: Run Python tests - async reader
         env:
           POLARS_FORCE_ASYNC: 1
-        run: pytest --cov -m "not release and not docs" tests/unit/io/ --cov-report xml:async.xml
+        run: pytest --cov -m "not benchmark and not docs" tests/unit/io/ --cov-report xml:async.xml
         continue-on-error: true
 
       - name: Report coverage

diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml
@@ -89,13 +89,13 @@ jobs:
           # Currently skipped due to performance issues in coverage:
           # https://github.com/nedbat/coveragepy/issues/1665
           COV: ${{ !(matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12') && '--cov' || '--no-cov' }}
-        run: pytest $COV -n auto --dist loadgroup -m "not release and not docs"
+        run: pytest $COV -n auto --dist loadgroup -m "not benchmark and not docs"
 
       - name: Run tests async reader tests
         if: github.ref_name != 'main' && matrix.os != 'windows-latest'
         env:
           POLARS_FORCE_ASYNC: 1
-        run: pytest -m "not release and not docs" tests/unit/io/
+        run: pytest -m "not benchmark and not docs" tests/unit/io/
 
       - name: Check import without optional dependencies
         if: github.ref_name != 'main' && matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest'

diff --git a/docs/development/contributing/test.md b/docs/development/contributing/test.md
@@ -95,26 +95,27 @@ In addition to the [regular options](https://docs.python.org/3/library/doctest.h
 
 ## Benchmark tests
 
-The `benchmark` folder contains code for running the [H2O AI database benchmark](https://github.com/h2oai/db-benchmark).
-It also contains various other benchmark tests.
-
+The `benchmark` folder contains code for running various benchmark tests.
 The aim of this part of the test suite is to spot performance regressions in the code, and to verify that Polars functionality works as expected when run on a release build or at a larger scale.
 
-### Running the H2O AI database benchmark
+Polars uses [CodSpeed](https://codspeed.io/pola-rs/polars) for tracking the performance of the benchmark tests.
 
-The benchmark is somewhat cumbersome to run locally. You must first generate the dataset using the R script provided in the `benchmark` folder. Afterwards, you can simply run the Python script to run the benchmark.
+### Generating data
 
-Make sure to install a release build of Polars before running the benchmark to guarantee the best results.
+For many tests, a relatively large dataset must be generated first.
+We use an [R](https://www.r-project.org/) script to generate this data.
+The script was taken from the [H2O AI database benchmark](https://github.com/h2oai/db-benchmark), which is the foundation for many of the benchmark tests.
 
-Refer to the [benchmark workflow](https://github.com/pola-rs/polars/blob/main/.github/workflows/benchmark.yml) for detailed steps.
+For the exact steps to generate the data, please refer to the [benchmark workflow](https://github.com/pola-rs/polars/blob/main/.github/workflows/benchmark.yml).
+It involves [installing R](https://cran.r-project.org/), installing the [data.table](https://cran.r-project.org/web/packages/data.table/) dependency, and executing a data generation script.
 
-### Running other benchmark tests
+### Running the benchmark tests
 
-The other benchmark tests are run using pytest.
-Run `pytest -m release --durations 0 -v` to run these tests and report run duration.
+The benchmark tests can be run using pytest.
+Run `pytest -m benchmark --durations 0 -v` to run these tests and report run duration.
 
 Note that benchmark tests are excluded by default when running `pytest`.
-You must explicitly specify `-m release` to run them.
+You must explicitly specify `-m benchmark` to run them.
 They will also be excluded when calculating test coverage.
 
 These tests _will_ be run as part of the `make test-all` make command.
diff --git a/py-polars/Makefile b/py-polars/Makefile
@@ -100,7 +100,7 @@ test-all: .venv build  ## Run all tests
 
 .PHONY: coverage
 coverage: .venv build  ## Run tests and report coverage
-	$(VENV_BIN)/pytest --cov -n auto --dist loadgroup -m "not release"
+	$(VENV_BIN)/pytest --cov -n auto --dist loadgroup -m "not benchmark"
 
 .PHONY: clean
 clean:  ## Clean up caches and build artifacts

diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
@@ -207,12 +207,11 @@ addopts = [
   "--strict-markers",
   "--import-mode=importlib",
   # Default to running fast tests only. To run ALL tests, run: pytest -m ""
-  "-m not slow and not hypothesis and not release and not write_disk and not docs",
+  "-m not slow and not write_disk and not benchmark and not hypothesis and not docs",
 ]
 markers = [
-  "write_disk: Tests that write to disk",
   "slow: Tests with a longer than average runtime.",
-  "release: Tests that should be run on a Polars release build.",
+  "write_disk: Tests that write to disk",
   "debug: Tests that should be run on a Polars debug build.",
   "docs: Documentation code snippets",
 ]

diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt
@@ -60,6 +60,7 @@ nest_asyncio
 
 hypothesis==6.97.4
 pytest==8.1.1
+pytest-codspeed==2.2.1
 pytest-cov==5.0.0
 pytest-xdist==3.5.0
 

diff --git a/py-polars/tests/benchmark/__init__.py b/py-polars/tests/benchmark/__init__.py
@@ -0,0 +1,8 @@
+"""
+Benchmark tests.
+
+These tests are skipped by default as a large dataset must be generated first.
+
+See the documentation on how to run these tests:
+https://docs.pola.rs/development/contributing/test/#benchmark-tests
+"""
diff --git a/py-polars/tests/benchmark/conftest.py b/py-polars/tests/benchmark/conftest.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+
+import pytest
+
+import polars as pl
+
+
+@pytest.fixture(scope="module")
+def data_path() -> Path:
+    return Path(__file__).parent / "data"
+
+
+@pytest.fixture(scope="module")
+def h2aoi_groupby_data_path(data_path: Path) -> Path:
+    return data_path / "G1_1e7_1e2_5_0.csv"
+
+
+@pytest.fixture(scope="module")
+def h2oai_groupby_data(h2aoi_groupby_data_path: Path) -> pl.DataFrame:
+    if not h2aoi_groupby_data_path.is_file():
+        pytest.skip("Dataset must be generated before running this test.")
+
+    df = pl.read_csv(
+        h2aoi_groupby_data_path,
+        dtypes={
+            "id4": pl.Int32,
+            "id5": pl.Int32,
+            "id6": pl.Int32,
+            "v1": pl.Int32,
+            "v2": pl.Int32,
+            "v3": pl.Float64,
+        },
+    )
+    return df
diff --git a/py-polars/tests/benchmark/groupby-datagen.R → ...rs/tests/benchmark/data/groupby-datagen.R b/py-polars/tests/benchmark/groupby-datagen.R → ...rs/tests/benchmark/data/groupby-datagen.R