Skip to content

Commit

Permalink
Remove all datasets related extras_require and move requirements to…
Browse files Browse the repository at this point in the history
… `pyproject.toml` (#3078)

* Remove some requirements from setup.py

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Comment out dependencies

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Try fixing tests

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Add s3fs back

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Add pyarrow back

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Fix test

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Try fixing test

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Fix e2e test

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Fix unit test?

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Move requirements from setup.py to pyproject.toml

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Revert cloud version test

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Add package data to pyproject.toml

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Revert to setup.py

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Try fixing unit test

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Fix unit test

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Revert changes from individual e2e test and change environment.py

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Remove jupyter upper bounds

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Pandas version

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Pin fsspec version in test requirements

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* lint

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

---------

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
  • Loading branch information
ankatiyar authored Oct 3, 2023
1 parent e9a07bb commit 145f91f
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 198 deletions.
2 changes: 1 addition & 1 deletion features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,6 @@ def _install_project_requirements(context):
.splitlines()
)
install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req]
install_reqs.append(".[pandas.CSVDataSet]")
install_reqs.append("kedro-datasets[pandas.CSVDataSet]")
call([context.pip, "install", *install_reqs], env=context.env)
return context
2 changes: 1 addition & 1 deletion features/load_context.feature
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Feature: Custom Kedro project
Background:
Given I have prepared a config file
And I have run a non-interactive kedro new with starter "default"
And I have run a non-interactive kedro new with starter "default"

Scenario: Update the source directory to be nested
When I move the package to "src/nested"
Expand Down
52 changes: 51 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,57 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
dynamic = ["readme", "version", "optional-dependencies"]
dynamic = ["readme", "version"]

[project.optional-dependencies]
test = [
"bandit>=1.6.2, <2.0",
"behave==1.2.6",
"blacken-docs==1.9.2",
"black~=22.0",
"coverage[toml]",
"fsspec<2023.9", # Temporary, newer version causing "test_no_versions_with_cloud_protocol" to fail
"import-linter[toml]==1.8.0",
"ipython>=7.31.1, <8.0; python_version < '3.8'",
"ipython~=8.10; python_version >= '3.8'",
"Jinja2<3.1.0",
"jupyterlab_server>=2.11.1",
"jupyterlab~=3.0",
"jupyter~=1.0",
"memory_profiler>=0.50.0, <1.0",
"moto==1.3.7; python_version < '3.10'",
"moto==4.1.12; python_version >= '3.10'",
"pandas~=2.0",
"pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2.
"pyarrow>=1.0; python_version < '3.11'",
"pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors
"pyproj~=3.0",
"pytest-cov~=3.0",
"pytest-mock>=1.7.1, <2.0",
"pytest-xdist[psutil]~=2.2.1",
"pytest~=7.2",
"s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
"semver",
"trufflehog~=2.1",
]
docs = [
# docutils>=0.17 changed the HTML
# see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
"docutils==0.16",
"sphinx~=5.3.0",
"sphinx_rtd_theme==1.2.0",
# Regression on sphinx-autodoc-typehints 1.21
# that creates some problematic docstrings
"sphinx-autodoc-typehints==1.20.2",
"sphinx_copybutton==0.3.1",
"sphinx-notfound-page",
"ipykernel>=5.3, <7.0",
"sphinxcontrib-mermaid~=0.7.1",
"myst-parser~=1.0.0",
"Jinja2<3.1.0",
"kedro-datasets[all]~=1.7.0",
]
all = [ "kedro[test,docs]" ]

[project.urls]
Homepage = "https://kedro.org"
Expand Down
191 changes: 0 additions & 191 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
from glob import glob
from itertools import chain

from setuptools import setup

# at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec
PANDAS = "pandas~=1.3"
SPARK = "pyspark>=2.2, <3.4"
HDFS = "hdfs>=2.5.8, <3.0"
S3FS = "s3fs>=0.3.0, <0.5"

template_files = []
for pattern in ["**/*", "**/.*", "**/.*/**", "**/.*/.**"]:
template_files.extend(
Expand All @@ -18,192 +11,8 @@
]
)


def _collect_requirements(requires):
return sorted(set(chain.from_iterable(requires.values())))


api_require = {"api.APIDataSet": ["requests~=2.20"]}
biosequence_require = {"biosequence.BioSequenceDataSet": ["biopython~=1.73"]}
dask_require = {"dask.ParquetDataSet": ["dask[complete]~=2021.10", "triad>=0.6.7, <1.0"]}
geopandas_require = {
"geopandas.GeoJSONDataSet": ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]
}
matplotlib_require = {"matplotlib.MatplotlibWriter": ["matplotlib>=3.0.3, <4.0"]}
holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews>=1.13.0"]}
networkx_require = {"networkx.NetworkXDataSet": ["networkx~=2.4"]}
pandas_require = {
"pandas.CSVDataSet": [PANDAS],
"pandas.ExcelDataSet": [PANDAS, "openpyxl>=3.0.6, <4.0"],
"pandas.FeatherDataSet": [PANDAS],
"pandas.GBQTableDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
"pandas.GBQQueryDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
"pandas.HDFDataSet": [
PANDAS,
"tables~=3.6.0; platform_system == 'Windows'",
"tables~=3.6; platform_system != 'Windows'",
],
"pandas.JSONDataSet": [PANDAS],
"pandas.ParquetDataSet": [PANDAS, "pyarrow>=1.0, <7.0"],
"pandas.SQLTableDataSet": [PANDAS, "SQLAlchemy~=1.2"],
"pandas.SQLQueryDataSet": [PANDAS, "SQLAlchemy~=1.2"],
"pandas.XMLDataSet": [PANDAS, "lxml~=4.6"],
"pandas.GenericDataSet": [PANDAS],
}
pickle_require = {"pickle.PickleDataSet": ["compress-pickle[lz4]~=2.1.0"]}
pillow_require = {"pillow.ImageDataSet": ["Pillow~=9.0"]}
video_require = {
"video.VideoDataSet": ["opencv-python~=4.5.5.64"]
}
plotly_require = {
"plotly.PlotlyDataSet": [PANDAS, "plotly>=4.8.0, <6.0"],
"plotly.JSONDataSet": ["plotly>=4.8.0, <6.0"],
}
redis_require = {"redis.PickleDataSet": ["redis~=4.1"]}
spark_require = {
"spark.SparkDataSet": [SPARK, HDFS, S3FS],
"spark.SparkHiveDataSet": [SPARK, HDFS, S3FS],
"spark.SparkJDBCDataSet": [SPARK, HDFS, S3FS],
"spark.DeltaTableDataSet": [SPARK, HDFS, S3FS, "delta-spark>=1.0, <3.0"],
}
svmlight_require = {"svmlight.SVMLightDataSet": ["scikit-learn~=1.0.2", "scipy~=1.7.3"]}
tensorflow_required = {
"tensorflow.TensorflowModelDataset": [
# currently only TensorFlow V2 supported for saving and loading.
# V1 requires HDF5 and serialises differently
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
# https://developer.apple.com/metal/tensorflow-plugin/
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
]
}
yaml_require = {"yaml.YAMLDataSet": [PANDAS, "PyYAML>=4.2, <7.0"]}

extras_require = {
"api": _collect_requirements(api_require),
"biosequence": _collect_requirements(biosequence_require),
"dask": _collect_requirements(dask_require),
"docs": [
# docutils>=0.17 changed the HTML
# see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
"docutils==0.16",
"sphinx~=5.3.0",
"sphinx_rtd_theme==1.2.0",
# Regression on sphinx-autodoc-typehints 1.21
# that creates some problematic docstrings
"sphinx-autodoc-typehints==1.20.2",
"sphinx_copybutton==0.3.1",
"sphinx-notfound-page",
"ipykernel>=5.3, <7.0",
"sphinxcontrib-mermaid~=0.7.1",
"myst-parser~=1.0.0",
"Jinja2<3.1.0",
"kedro-datasets[all]~=1.7.0",
],
"geopandas": _collect_requirements(geopandas_require),
"matplotlib": _collect_requirements(matplotlib_require),
"holoviews": _collect_requirements(holoviews_require),
"networkx": _collect_requirements(networkx_require),
"pandas": _collect_requirements(pandas_require),
"pickle": _collect_requirements(pickle_require),
"pillow": _collect_requirements(pillow_require),
"video": _collect_requirements(video_require),
"plotly": _collect_requirements(plotly_require),
"redis": _collect_requirements(redis_require),
"spark": _collect_requirements(spark_require),
"svmlight": _collect_requirements(svmlight_require),
"tensorflow": _collect_requirements(tensorflow_required),
"yaml": _collect_requirements(yaml_require),
**api_require,
**biosequence_require,
**dask_require,
**geopandas_require,
**matplotlib_require,
**holoviews_require,
**networkx_require,
**pandas_require,
**pickle_require,
**pillow_require,
**video_require,
**plotly_require,
**spark_require,
**svmlight_require,
**tensorflow_required,
**yaml_require,
}

extras_require["all"] = _collect_requirements(extras_require)
extras_require["test"] = [
"adlfs~=2023.1; python_version >= '3.8'",
"bandit>=1.6.2, <2.0",
"behave==1.2.6",
"biopython~=1.73",
"blacken-docs==1.9.2",
"black~=22.0",
"compress-pickle[lz4]~=2.1.0",
"coverage[toml]",
"dask[complete]~=2021.10", # pinned by Snyk to avoid a vulnerability
"delta-spark>=1.2.1; python_version >= '3.11'", # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
"delta-spark~=1.2.1; python_version < '3.11'",
"dill~=0.3.1",
"filelock>=3.4.0, <4.0",
"gcsfs>=2023.1, <2023.3; python_version >= '3.8'",
"geopandas>=0.6.0, <1.0",
"hdfs>=2.5.8, <3.0",
"holoviews>=1.13.0",
"import-linter[toml]==1.8.0",
"ipython>=7.31.1, <8.0; python_version < '3.8'",
"ipython~=8.10; python_version >= '3.8'",
"Jinja2<3.1.0",
"joblib>=0.14",
"jupyterlab_server>=2.11.1",
"jupyterlab~=3.0",
"jupyter~=1.0",
"lxml~=4.6",
"matplotlib>=3.0.3, <3.4; python_version < '3.10'", # 3.4.0 breaks holoviews
"matplotlib>=3.5, <3.6; python_version >= '3.10'",
"memory_profiler>=0.50.0, <1.0",
"moto==1.3.7; python_version < '3.10'",
"moto==4.1.12; python_version >= '3.10'",
"networkx~=2.4",
"opencv-python~=4.5.5.64",
"openpyxl>=3.0.3, <4.0",
"pandas-gbq>=0.12.0, <0.18.0; python_version < '3.11'",
"pandas-gbq>=0.18.0; python_version >= '3.11'",
"pandas~=1.3 # 1.3 for read_xml/to_xml",
"Pillow~=9.0",
"plotly>=4.8.0, <6.0",
"pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2.
"pyarrow>=1.0; python_version < '3.11'",
"pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors
"pyproj~=3.0",
"pyspark>=2.2, <3.4; python_version < '3.11'",
"pyspark>=3.4; python_version >= '3.11'",
"pytest-cov~=3.0",
"pytest-mock>=1.7.1, <2.0",
"pytest-xdist[psutil]~=2.2.1",
"pytest~=7.2",
"redis~=4.1",
"requests-mock~=1.6",
"requests~=2.20",
"s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
"scikit-learn>=1.0.2,<2",
"scipy>=1.7.3",
"semver",
"SQLAlchemy~=1.2",
"tables~=3.6.0; platform_system == 'Windows' and python_version<'3.8'",
"tables~=3.8.0; platform_system == 'Windows' and python_version>='3.8'", # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593
"tables~=3.6; platform_system != 'Windows'",
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
# https://developer.apple.com/metal/tensorflow-plugin/
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
"triad>=0.6.7, <1.0",
"trufflehog~=2.1",
"xlsxwriter~=1.0",
]

setup(
package_data={
"kedro": ["py.typed"] + template_files
},
extras_require=extras_require,
)
2 changes: 1 addition & 1 deletion tests/framework/context/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import toml
import yaml
from attrs.exceptions import FrozenInstanceError
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro import __version__ as kedro_version
from kedro.config import ConfigLoader, MissingConfigException
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import pytest
from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro.io import (
AbstractDataset,
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_incremental_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from kedro_datasets.pickle import PickleDataSet
from kedro_datasets.text import TextDataSet
from moto import mock_s3
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro.io import AbstractDataset, DatasetError, IncrementalDataset
from kedro.io.data_catalog import CREDENTIALS_KEY
Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_partitioned_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import s3fs
from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
from moto import mock_s3
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from kedro.io import DatasetError, PartitionedDataset
from kedro.io.data_catalog import CREDENTIALS_KEY
Expand Down

0 comments on commit 145f91f

Please sign in to comment.