Skip to content

Commit

Permalink
[ci] use pre-commit, update actions
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Jun 21, 2024
1 parent 6967528 commit 58a33db
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 71 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ jobs:
- task: linting
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Set up Python
uses: conda-incubator/setup-miniconda@v2
uses: conda-incubator/setup-miniconda@v3
with:
python-version: 3.11
- name: linting
if: matrix.task == 'linting'
shell: bash
run: |
pip install --upgrade black flake8 isort nbqa
make lint
pip install --upgrade pre-commit
pre-commit run --all-files
all-tests-successful:
if: always()
runs-on: ubuntu-latest
Expand Down
49 changes: 49 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
exclude: |
(?x)^(
LightGBM
)$
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-toml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
args: ["--settings-path", "pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.10.0
hooks:
- id: mypy
args: ["--config-file", "pyproject.toml"]
exclude: "tests"
additional_dependencies:
- types-requests
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.4.10
hooks:
# Run the linter.
- id: ruff
args: ["--config", "pyproject.toml"]
types_or: [jupyter, python]
# Run the formatter.
- id: ruff-format
args: ["--config", "pyproject.toml"]
types_or: [python, jupyter]
- repo: https://github.com/maxwinterstein/shfmt-py
rev: v3.7.0.1
hooks:
- id: shfmt
args: ["--indent=4", "--space-redirects", "--write"]
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
args: ["--exclude=SC2002"]
16 changes: 0 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,6 @@ ecr-details.json:
--repository-name ${CLUSTER_IMAGE_NAME} \
> ./ecr-details.json

.PHONY: format
format:
black .
isort .
nbqa isort .
nbqa black .

$(LIGHTGBM_REPO):
git clone --recursive https://github.com/microsoft/LightGBM.git

Expand All @@ -100,15 +93,6 @@ lightgbm-unit-tests:
/bin/bash -cex \
"sh ./build-python.sh install --precompile && pip install pytest && pytest -vv -rA tests/python_package_test/test_dask.py"

.PHONY: lint
lint: lint-dockerfiles
isort --check .
black --check --diff .
flake8 --count .
nbqa black --check --diff .
nbqa flake8 .
nbqa isort --check .

.PHONY: lint-dockerfiles
lint-dockerfiles:
for dockerfile in $$(ls | grep -E '^Dockerfile'); do \
Expand Down
14 changes: 8 additions & 6 deletions bin/profile-example-memory-usage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ set -e -u -o pipefail

echo "profiling examples"
mkdir -p "${PROFILING_OUTPUT_DIR}/bin"

# shellcheck disable=SC2044
for py_script in $(find "${LIGHTGBM_HOME}/examples/python-guide" -name '*.py'); do
base_filename=$(basename "${py_script}")
prof_file=$(echo "${base_filename}" | sed -e 's/\.py/\.bin/g')
table_file=$(echo "${base_filename}" | sed -e 's/\.py/-table\.html/g')
leak_table_file=$(echo "${base_filename}" | sed -e 's/\.py/-leak-table\.html/g')
flamegraph_file=$(echo "${base_filename}" | sed -e 's/\.py/-flamegraph\.html/g')
prof_file="${base_filename/.py/.bin}"
table_file="${base_filename/.py/-table.html}"
leak_table_file="${base_filename/.py/-leak-table.html}"
flamegraph_file="${base_filename/.py/-flamegraph.html}"
echo " - ${base_filename}"
memray run \
-o "${PROFILING_OUTPUT_DIR}/bin/${prof_file}" \
"${py_script}" 2>&1 > /dev/null \
|| true
"${py_script}" > /dev/null 2>&1 ||
true
memray table \
-o "${PROFILING_OUTPUT_DIR}/${table_file}" \
--force \
Expand Down
7 changes: 4 additions & 3 deletions bin/profile-examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
set -e -u -o pipefail

echo "profiling examples"
# shellcheck disable=SC2044
for py_script in $(find "${LIGHTGBM_HOME}/examples/python-guide" -name '*.py'); do
base_filename=$(basename "${py_script}")
prof_file=$(echo "${base_filename}" | sed -e 's/\.py/\.prof/g')
prof_file="${base_filename/.py/.prof}"
echo " - ${base_filename}"
python \
-Wignore \
-m cProfile \
-o "${PROFILING_OUTPUT_DIR}/${prof_file}" \
"${py_script}" 2>&1 > /dev/null \
|| true
"${py_script}" > /dev/null 2>&1 ||
true
done
echo "Done profiling examples. See '${PROFILING_OUTPUT_DIR}' for results."
1 change: 1 addition & 0 deletions jupyter_notebook_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# mypy: disable-error-code="name-defined"
c.ServerApp.token = ""
c.ServerApp.password = ""
c.ServerApp.open_browser = False
Expand Down
8 changes: 4 additions & 4 deletions notebooks/_img/dask-horizontal.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 3 additions & 1 deletion notebooks/demo-aws.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@
"with open(\"../ecr-details.json\", \"r\") as f:\n",
" ecr_details = json.loads(f.read())\n",
"\n",
"CONTAINER_IMAGE = ecr_details[\"repository\"][\"repositoryUri\"] + \":\" + os.environ[\"DASK_VERSION\"]\n",
"CONTAINER_IMAGE = (\n",
" ecr_details[\"repository\"][\"repositoryUri\"] + \":\" + os.environ[\"DASK_VERSION\"]\n",
")\n",
"print(f\"scheduler and worker image: {CONTAINER_IMAGE}\")"
]
},
Expand Down
35 changes: 24 additions & 11 deletions notebooks/testing/ranker-local.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@
" avg_gs=10,\n",
" random_state=0,\n",
"):\n",
" \"\"\"Generate a learning-to-rank dataset - feature vectors grouped together with\n",
" integer-valued graded relevance scores. Replace this with a sklearn.datasets function\n",
" if ranking objective becomes supported in sklearn.datasets module.\"\"\"\n",
" \"\"\"\n",
" Generate a learning-to-rank dataset - feature vectors grouped\n",
" together with integer-valued graded relevance scores. Replace this\n",
" with a sklearn.datasets function if ranking objective becomes\n",
" supported in sklearn.datasets module.\n",
" \"\"\"\n",
" rnd_generator = check_random_state(random_state)\n",
"\n",
" y_vec, group_vec = np.empty((0,), dtype=int), np.empty((0,), dtype=int)\n",
Expand All @@ -84,7 +87,8 @@
" x_grid = np.linspace(0, stop=1, num=gmax + 2)\n",
" X = rnd_generator.uniform(size=(n_samples, n_features))\n",
"\n",
" # make first n_informative features values bucketed according to relevance scores.\n",
" # make first n_informative features values\n",
" # bucketed according to relevance scores.\n",
" def bucket_fn(z):\n",
" return rnd_generator.uniform(x_grid[z], high=x_grid[z + 1])\n",
"\n",
Expand All @@ -102,12 +106,14 @@
" g_rle = np.array([sum([1 for _ in grp]) for _, grp in itertools.groupby(g)])\n",
"\n",
" if output == \"dataframe\":\n",
" # add target, weight, and group to DataFrame so that partitions abide by group boundaries.\n",
" # add target, weight, and group to DataFrame so that\n",
" # partitions abide by group boundaries.\n",
" X_df = pd.DataFrame(X, columns=[f\"feature_{i}\" for i in range(X.shape[1])])\n",
" X = X_df.copy()\n",
" X_df = X_df.assign(y=y, g=g, w=w)\n",
"\n",
" # set_index ensures partitions are based on group id. See https://bit.ly/3pAWyNw.\n",
" # set_index ensures partitions are based on group id.\n",
" # See https://bit.ly/3pAWyNw.\n",
" X_df.set_index(\"g\", inplace=True)\n",
" dX = dd.from_pandas(X_df, chunksize=chunk_size)\n",
"\n",
Expand All @@ -117,12 +123,16 @@
" dX = dX.drop(columns=[\"y\", \"w\"])\n",
" dg = dX.index.to_series()\n",
"\n",
" # encode group identifiers into run-length encoding, the format LightGBMRanker is expecting\n",
" # encode group identifiers into run-length encoding,\n",
" # the format LightGBMRanker is expecting\n",
" # so that within each partition, sum(g) = n_samples.\n",
" dg = dg.map_partitions(lambda p: p.groupby(\"g\", sort=False).apply(lambda z: z.shape[0]))\n",
" dg = dg.map_partitions(\n",
" lambda p: p.groupby(\"g\", sort=False).apply(lambda z: z.shape[0])\n",
" )\n",
"\n",
" elif output == \"array\":\n",
" # ranking arrays: one chunk per group. Each chunk must include all columns.\n",
" # ranking arrays: one chunk per group.\n",
" # Each chunk must include all columns.\n",
" p = X.shape[1]\n",
" dX, dy, dw, dg = list(), list(), list(), list()\n",
" for g_idx, rhs in enumerate(np.cumsum(g_rle)):\n",
Expand All @@ -138,7 +148,9 @@
" dg = da.concatenate(dg, axis=0)\n",
"\n",
" else:\n",
" raise ValueError(\"ranking data creation only supported for Dask arrays and dataframes\")\n",
" raise ValueError(\n",
" \"ranking data creation only supported for Dask arrays and dataframes\"\n",
" )\n",
"\n",
" return X, y, w, g_rle, dX, dy, dw, dg"
]
Expand Down Expand Up @@ -219,7 +231,8 @@
"metadata": {},
"outputs": [],
"source": [
"# relative difference between distributed ranker and local ranker spearman corr should be small.\n",
"# relative difference between distributed ranker\n",
"# and local ranker spearman corr should be small.\n",
"lcor = spearmanr(rnkvec_local, y).correlation\n",
"print(np.abs(dcor - lcor))\n",
"assert np.abs(dcor - lcor) < 0.003"
Expand Down
43 changes: 32 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
[tool.black]
line-length = 100
exclude = '''
/(
| LightGBM
)/
'''
[tool.ruff.lint]
select = [
# flake8-bugbear
"B",
# flake8-comprehensions
"C4",
# pycodestyle
"E",
# pyflakes
"F",
# NumPy-specific rules
"NPY",
# pylint
"PL",
# flake8-return: unnecessary assignment before return
"RET504",
# flake8-simplify: use dict.get() instead of an if-else block
"SIM401",
]

[tool.nbqa.exclude]
black = "LightGBM/"
flake8 = "LightGBM/"
isort = "LightGBM/"
[tool.ruff.lint.per-file-ignores]
"*.ipynb" = [
# (pylint) Unnecessary list() call
"C408",
# (pylint) too many arguments in function definition
"PLR0913",
# (pylint) Magic value used in comparison
"PLR2004",
]
"jupyter_notebook_config.py" = [
# (flake8) undefined name
"F821",
]
15 changes: 0 additions & 15 deletions setup.cfg

This file was deleted.

0 comments on commit 58a33db

Please sign in to comment.