Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into row_wise_udfs
Browse files Browse the repository at this point in the history
  • Loading branch information
dchigarev committed Jul 14, 2023
2 parents 3f3cc5b + d6a6cf3 commit 52eeecc
Show file tree
Hide file tree
Showing 98 changed files with 2,510 additions and 1,709 deletions.
44 changes: 44 additions & 0 deletions .github/actions/mamba-env/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: "Install environment using Mamba"
description: "Prepare the environment to run Modin"
inputs:
python-version:
description: "Python version to install"
default: "3.8"
environment-file:
description: "Conda environment yml"
required: true
activate-environment:
description: "Conda environment to activate"
default: "modin"

runs:
using: "composite"
steps:
- name: Get current week
id: get-week
# use current week as cache key to periodically refresh the cache,
# as cache is based on requirements, but dependencies push
# updated versions at some irregular pace
run: echo "thisweek=$(/bin/date -u '+%Y.w%W')" >> $GITHUB_OUTPUT
shell: bash
- name: Cache conda
id: cache-conda
uses: actions/cache@v3
with:
path: |
~/conda_pkgs_dir
~/.cache/pip
key:
${{ runner.os }}-conda-${{ steps.get-week.outputs.thisweek }}-${{ hashFiles(inputs.environment-file) }}
- uses: conda-incubator/setup-miniconda@v2
with:
miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
activate-environment: ${{ inputs.activate-environment }}
environment-file: ${{ inputs.environment-file }}
python-version: ${{ inputs.python-version }}
channel-priority: strict
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
use-only-tar-bz2: false
15 changes: 15 additions & 0 deletions .github/actions/python-only/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: "Install Python only"
description: "Prepare the environment to run simple tasks"
inputs:
python-version:
description: "Python version to install"
default: "3.8.x"

runs:
using: "composite"
steps:
- uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python-version }}
architecture: "x64"
cache: 'pip'
29 changes: 29 additions & 0 deletions .github/actions/run-core-tests/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: "Run core Modin tests"
description: "Run core Modin tests like dataframe or groupby"
inputs:
runner:
description: "Runner for tests"
default: "python -m pytest"
parallel:
description: "How to run tests in parallel"
default: "-n 2"

runs:
using: "composite"
steps:
- uses: ./.github/actions/run-core-tests/group_1
with:
runner: ${{ inputs.runner }}
parallel: ${{ inputs.parallel }}
- uses: ./.github/actions/run-core-tests/group_2
with:
runner: ${{ inputs.runner }}
parallel: ${{ inputs.parallel }}
- uses: ./.github/actions/run-core-tests/group_3
with:
runner: ${{ inputs.runner }}
parallel: ${{ inputs.parallel }}
- uses: ./.github/actions/run-core-tests/group_4
with:
runner: ${{ inputs.runner }}
parallel: ${{ inputs.parallel }}
21 changes: 21 additions & 0 deletions .github/actions/run-core-tests/group_1/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: "Run core Modin tests - group 1"
description: "Run core Modin tests like dataframe or groupby"
inputs:
runner:
description: "Runner for tests"
default: "python -m pytest"
parallel:
description: "How to run tests in parallel"
default: "-n 2"

runs:
using: "composite"
steps:
- run: |
echo "::group::Running dataframe tests (group 1)..."
${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/dataframe/test_binary.py \
modin/pandas/test/dataframe/test_default.py \
modin/pandas/test/dataframe/test_indexing.py \
modin/pandas/test/dataframe/test_iter.py
echo "::endgroup::"
shell: bash -l {0}
22 changes: 22 additions & 0 deletions .github/actions/run-core-tests/group_2/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: "Run core Modin tests - group 2"
description: "Run core Modin tests like dataframe or groupby"
inputs:
runner:
description: "Runner for tests"
default: "python -m pytest"
parallel:
description: "How to run tests in parallel"
default: "-n 2"

runs:
using: "composite"
steps:
- run: |
echo "::group::Running dataframe tests (group 2)..."
${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/dataframe/test_join_sort.py \
modin/pandas/test/dataframe/test_reduce.py \
modin/pandas/test/dataframe/test_udf.py \
modin/pandas/test/dataframe/test_window.py \
modin/pandas/test/dataframe/test_pickle.py
echo "::endgroup::"
shell: bash -l {0}
24 changes: 24 additions & 0 deletions .github/actions/run-core-tests/group_3/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: "Run core Modin tests - group 3"
description: "Run core Modin tests like dataframe or groupby"
inputs:
runner:
description: "Runner for tests"
default: "python -m pytest"
parallel:
description: "How to run tests in parallel"
default: "-n 2"

runs:
using: "composite"
steps:
- run: |
echo "::group::Running tests (group 3)..."
${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/test_series.py \
modin/pandas/test/dataframe/test_map_metadata.py
echo "::endgroup::"
shell: bash -l {0}
- run: |
echo "::group::Running experimental groupby tests (group 3)..."
MODIN_EXPERIMENTAL_GROUPBY=1 ${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/test_groupby.py
echo "::endgroup::"
shell: bash -l {0}
27 changes: 27 additions & 0 deletions .github/actions/run-core-tests/group_4/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: "Run core Modin tests - group 4"
description: "Run core Modin tests like dataframe or groupby"
inputs:
runner:
description: "Runner for tests"
default: "python -m pytest"
parallel:
description: "How to run tests in parallel"
default: "-n 2"

runs:
using: "composite"
steps:
- run: |
echo "::group::Running tests (group 4)..."
${{ inputs.runner }} ${{ inputs.parallel }} modin/pandas/test/test_rolling.py \
modin/pandas/test/test_expanding.py \
modin/pandas/test/test_groupby.py \
modin/pandas/test/test_reshape.py \
modin/pandas/test/test_general.py
echo "::endgroup::"
shell: bash -l {0}
- run: |
echo "::group::Running concat tests (group 4)..."
${{ inputs.runner }} modin/pandas/test/test_concat.py # Ray and Dask versions fails with -n 2
echo "::endgroup::"
shell: bash -l {0}
File renamed without changes.
31 changes: 5 additions & 26 deletions .github/workflows/ci-notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
- .github/workflows/ci-notebooks.yml
- setup.cfg
- setup.py
- requirements/env_hdk.yml
concurrency:
# Cancel other jobs in the same branch. We don't care whether CI passes
# on old commits.
Expand All @@ -26,41 +27,19 @@ jobs:
execution: [pandas_on_ray, pandas_on_dask, pandas_on_unidist, hdk_on_native]
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 1
- uses: actions/setup-python@v4
with:
python-version: "3.8.x"
architecture: "x64"
- uses: ./.github/actions/python-only
if: matrix.execution != 'hdk_on_native'
- name: Cache conda
uses: actions/cache@v3
with:
path: |
~/conda_pkgs_dir
~/.cache/pip
key:
${{ runner.os }}-conda-${{ hashFiles('requirements/env_hdk.yml') }}
if: matrix.execution == 'hdk_on_native'
- uses: conda-incubator/setup-miniconda@v2
- uses: ./.github/actions/mamba-env
with:
miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
activate-environment: modin_on_hdk
environment-file: requirements/env_hdk.yml
python-version: 3.8
channel-priority: strict
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
use-only-tar-bz2: false
activate-environment: modin_on_hdk
if: matrix.execution == 'hdk_on_native'
- name: Cache datasets
uses: actions/cache@v2
with:
path: taxi.csv
# update cache only if notebooks require it to be changed
key: hashFiles("examples/tutorial/jupyter/**")
key: taxi-csv-dataset-${{ hashFiles('examples/tutorial/jupyter/**') }}
# replace modin with . in the tutorial requirements file for `pandas_on_ray` and
# `pandas_on_dask` since we need Modin built from sources
- run: sed -i 's/modin/./g' examples/tutorial/jupyter/execution/${{ matrix.execution }}/requirements.txt
Expand Down
81 changes: 79 additions & 2 deletions .github/workflows/ci-required.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ concurrency:
# on old commits.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
env:
MODIN_GITHUB_CI: true

jobs:

check-pr-title:
runs-on: ubuntu-latest
steps:
Expand All @@ -14,6 +18,7 @@ jobs:
# NOTE: If you change the allowed prefixes here, update
# the documentation about them in /docs/development/contributing.rst
regexp: '^(?:FEAT|DOCS|FIX|REFACTOR|TEST|PERF)-#\d+:'

build-docs:
name: build docs
runs-on: ubuntu-latest
Expand All @@ -29,5 +34,77 @@ jobs:
cache-dependency-path: '**/requirements-doc.txt'
- run: pip install -r docs/requirements-doc.txt
- run: cd docs && sphinx-build -T -E -W -b html . build
env:
MODIN_GITHUB_CI: true

lint-pydocstyle:
name: lint (pydocstyle)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/python-only
# The `numpydoc` version here MUST match the versions in the dev requirements files.
- run: pip install pytest pytest-cov pydocstyle numpydoc==1.1.0 xgboost
- run: python -m pytest scripts/test
- run: pip install -e ".[all]"
- run: |
python scripts/doc_checker.py --add-ignore=D101,D102,D103,D105 --disable-numpydoc \
modin/pandas/dataframe.py modin/pandas/series.py \
modin/pandas/groupby.py \
modin/pandas/series_utils.py modin/pandas/general.py \
modin/pandas/plotting.py modin/pandas/utils.py \
modin/pandas/iterator.py modin/pandas/indexing.py \
- run: python scripts/doc_checker.py modin/core/dataframe
- run: python scripts/doc_checker.py modin/core/execution/dask
- run: |
python scripts/doc_checker.py \
modin/pandas/accessor.py modin/pandas/general.py \
modin/pandas/groupby.py modin/pandas/indexing.py \
modin/pandas/iterator.py modin/pandas/plotting.py \
modin/pandas/series_utils.py modin/pandas/utils.py \
modin/pandas/base.py \
modin/pandas/io.py \
asv_bench/benchmarks/utils \
asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
asv_bench/benchmarks/scalability/__init__.py \
modin/core/io \
modin/experimental/core/execution/ray/implementations/pandas_on_ray \
modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \
modin/pandas/series.py \
modin/core/execution/python \
modin/pandas/dataframe.py \
modin/config/__init__.py \
modin/config/__main__.py \
modin/config/envvars.py \
modin/config/pubsub.py
- run: python scripts/doc_checker.py modin/distributed
- run: python scripts/doc_checker.py modin/utils.py
- run: python scripts/doc_checker.py modin/experimental/sklearn
- run: |
python scripts/doc_checker.py modin/experimental/xgboost/__init__.py \
modin/experimental/xgboost/utils.py modin/experimental/xgboost/xgboost.py \
modin/experimental/xgboost/xgboost_ray.py
- run: python scripts/doc_checker.py modin/core/execution/ray
- run: |
python scripts/doc_checker.py modin/core/execution/dispatching/factories/factories.py \
modin/core/execution/dispatching/factories/dispatcher.py \
- run: python scripts/doc_checker.py scripts/doc_checker.py
- run: |
python scripts/doc_checker.py modin/experimental/pandas/io.py \
modin/experimental/pandas/numpy_wrap.py modin/experimental/pandas/__init__.py
- run: python scripts/doc_checker.py modin/core/storage_formats/base
- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
- run: python scripts/doc_checker.py modin/core/storage_formats/pandas
- run: |
python scripts/doc_checker.py \
modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe \
modin/experimental/core/execution/native/implementations/hdk_on_native/io \
modin/experimental/core/execution/native/implementations/hdk_on_native/partitioning \
modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_algebra.py \
modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_builder.py \
modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_serializer.py \
modin/experimental/core/execution/native/implementations/hdk_on_native/df_algebra.py \
modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py \
modin/experimental/core/execution/native/implementations/hdk_on_native/hdk_worker.py \
- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/hdk
- run: python scripts/doc_checker.py modin/experimental/core/execution/native/implementations/hdk_on_native/interchange/dataframe_protocol
- run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py
- run: python scripts/doc_checker.py modin/logging
Loading

0 comments on commit 52eeecc

Please sign in to comment.