Remove all datasets related extras_require and move requirements to…

… `pyproject.toml` (#3078) * Remove some requirements from setup.py Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Comment out dependencies Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Try fixing tests Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Add s3fs back Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Add pyarrow back Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Fix test Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Try fixing test Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Fix e2e test Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Fix unit test? Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Move requirements from setup.py to pyproject.toml Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Revert cloud version test Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Add package data to pyproject.toml Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Revert to setup.py Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Try fixing unit test Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Fix unit test Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Revert changes from individual e2e test and change environment.py Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Remove jupyter upper bounds Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Pandas version Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * Pin fsspec version in test requirements Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> * lint Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com> --------- Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
kedro-org · Oct 3, 2023 · 145f91f · 145f91f
1 parent e9a07bb
commit 145f91f
Show file tree

Hide file tree

Showing 8 changed files with 57 additions and 198 deletions.
diff --git a/features/environment.py b/features/environment.py
@@ -120,6 +120,6 @@ def _install_project_requirements(context):
         .splitlines()
     )
     install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req]
-    install_reqs.append(".[pandas.CSVDataSet]")
+    install_reqs.append("kedro-datasets[pandas.CSVDataSet]")
     call([context.pip, "install", *install_reqs], env=context.env)
     return context
diff --git a/features/load_context.feature b/features/load_context.feature
@@ -1,7 +1,7 @@
 Feature: Custom Kedro project
     Background:
         Given I have prepared a config file
-    And I have run a non-interactive kedro new with starter "default"
+        And I have run a non-interactive kedro new with starter "default"
 
     Scenario: Update the source directory to be nested
         When I move the package to "src/nested"

diff --git a/pyproject.toml b/pyproject.toml
@@ -52,7 +52,57 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
-dynamic = ["readme", "version", "optional-dependencies"]
+dynamic = ["readme", "version"]
+
+[project.optional-dependencies]
+test = [
+    "bandit>=1.6.2, <2.0",
+    "behave==1.2.6",
+    "blacken-docs==1.9.2",
+    "black~=22.0",
+    "coverage[toml]",
+    "fsspec<2023.9", # Temporary, newer version causing "test_no_versions_with_cloud_protocol" to fail
+    "import-linter[toml]==1.8.0",
+    "ipython>=7.31.1, <8.0; python_version < '3.8'",
+    "ipython~=8.10; python_version >= '3.8'",
+    "Jinja2<3.1.0",
+    "jupyterlab_server>=2.11.1",
+    "jupyterlab~=3.0",
+    "jupyter~=1.0",
+    "memory_profiler>=0.50.0, <1.0",
+    "moto==1.3.7; python_version < '3.10'",
+    "moto==4.1.12; python_version >= '3.10'",
+    "pandas~=2.0",
+    "pre-commit>=2.9.2, <3.0",  # The hook `mypy` requires pre-commit version 2.9.2.
+    "pyarrow>=1.0; python_version < '3.11'",
+    "pyarrow>=7.0; python_version >= '3.11'",  # Adding to avoid numpy build errors
+    "pyproj~=3.0",
+    "pytest-cov~=3.0",
+    "pytest-mock>=1.7.1, <2.0",
+    "pytest-xdist[psutil]~=2.2.1",
+    "pytest~=7.2",
+    "s3fs>=0.3.0, <0.5",  # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
+    "semver",
+    "trufflehog~=2.1",
+]
+docs = [
+    # docutils>=0.17 changed the HTML
+    # see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
+    "docutils==0.16",
+    "sphinx~=5.3.0",
+    "sphinx_rtd_theme==1.2.0",
+    # Regression on sphinx-autodoc-typehints 1.21
+    # that creates some problematic docstrings
+    "sphinx-autodoc-typehints==1.20.2",
+    "sphinx_copybutton==0.3.1",
+    "sphinx-notfound-page",
+    "ipykernel>=5.3, <7.0",
+    "sphinxcontrib-mermaid~=0.7.1",
+    "myst-parser~=1.0.0",
+    "Jinja2<3.1.0",
+    "kedro-datasets[all]~=1.7.0",
+]
+all = [ "kedro[test,docs]" ]
 
 [project.urls]
 Homepage = "https://kedro.org"

diff --git a/setup.py b/setup.py
@@ -1,14 +1,7 @@
 from glob import glob
-from itertools import chain
 
 from setuptools import setup
 
-# at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec
-PANDAS = "pandas~=1.3"
-SPARK = "pyspark>=2.2, <3.4"
-HDFS = "hdfs>=2.5.8, <3.0"
-S3FS = "s3fs>=0.3.0, <0.5"
-
 template_files = []
 for pattern in ["**/*", "**/.*", "**/.*/**", "**/.*/.**"]:
     template_files.extend(
@@ -18,192 +11,8 @@
         ]
     )
 
-
-def _collect_requirements(requires):
-    return sorted(set(chain.from_iterable(requires.values())))
-
-
-api_require = {"api.APIDataSet": ["requests~=2.20"]}
-biosequence_require = {"biosequence.BioSequenceDataSet": ["biopython~=1.73"]}
-dask_require = {"dask.ParquetDataSet": ["dask[complete]~=2021.10", "triad>=0.6.7, <1.0"]}
-geopandas_require = {
-    "geopandas.GeoJSONDataSet": ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]
-}
-matplotlib_require = {"matplotlib.MatplotlibWriter": ["matplotlib>=3.0.3, <4.0"]}
-holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews>=1.13.0"]}
-networkx_require = {"networkx.NetworkXDataSet": ["networkx~=2.4"]}
-pandas_require = {
-    "pandas.CSVDataSet": [PANDAS],
-    "pandas.ExcelDataSet": [PANDAS, "openpyxl>=3.0.6, <4.0"],
-    "pandas.FeatherDataSet": [PANDAS],
-    "pandas.GBQTableDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
-    "pandas.GBQQueryDataSet": [PANDAS, "pandas-gbq>=0.12.0, <0.18.0"],
-    "pandas.HDFDataSet": [
-        PANDAS,
-        "tables~=3.6.0; platform_system == 'Windows'",
-        "tables~=3.6; platform_system != 'Windows'",
-    ],
-    "pandas.JSONDataSet": [PANDAS],
-    "pandas.ParquetDataSet": [PANDAS, "pyarrow>=1.0, <7.0"],
-    "pandas.SQLTableDataSet": [PANDAS, "SQLAlchemy~=1.2"],
-    "pandas.SQLQueryDataSet": [PANDAS, "SQLAlchemy~=1.2"],
-    "pandas.XMLDataSet": [PANDAS, "lxml~=4.6"],
-    "pandas.GenericDataSet": [PANDAS],
-}
-pickle_require = {"pickle.PickleDataSet": ["compress-pickle[lz4]~=2.1.0"]}
-pillow_require = {"pillow.ImageDataSet": ["Pillow~=9.0"]}
-video_require = {
-    "video.VideoDataSet": ["opencv-python~=4.5.5.64"]
-}
-plotly_require = {
-    "plotly.PlotlyDataSet": [PANDAS, "plotly>=4.8.0, <6.0"],
-    "plotly.JSONDataSet": ["plotly>=4.8.0, <6.0"],
-}
-redis_require = {"redis.PickleDataSet": ["redis~=4.1"]}
-spark_require = {
-    "spark.SparkDataSet": [SPARK, HDFS, S3FS],
-    "spark.SparkHiveDataSet": [SPARK, HDFS, S3FS],
-    "spark.SparkJDBCDataSet": [SPARK, HDFS, S3FS],
-    "spark.DeltaTableDataSet": [SPARK, HDFS, S3FS, "delta-spark>=1.0, <3.0"],
-}
-svmlight_require = {"svmlight.SVMLightDataSet": ["scikit-learn~=1.0.2", "scipy~=1.7.3"]}
-tensorflow_required = {
-    "tensorflow.TensorflowModelDataset": [
-        # currently only TensorFlow V2 supported for saving and loading.
-        # V1 requires HDF5 and serialises differently
-        "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
-        # https://developer.apple.com/metal/tensorflow-plugin/
-        "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
-    ]
-}
-yaml_require = {"yaml.YAMLDataSet": [PANDAS, "PyYAML>=4.2, <7.0"]}
-
-extras_require = {
-    "api": _collect_requirements(api_require),
-    "biosequence": _collect_requirements(biosequence_require),
-    "dask": _collect_requirements(dask_require),
-    "docs": [
-        # docutils>=0.17 changed the HTML
-        # see https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
-        "docutils==0.16",
-        "sphinx~=5.3.0",
-        "sphinx_rtd_theme==1.2.0",
-        # Regression on sphinx-autodoc-typehints 1.21
-        # that creates some problematic docstrings
-        "sphinx-autodoc-typehints==1.20.2",
-        "sphinx_copybutton==0.3.1",
-        "sphinx-notfound-page",
-        "ipykernel>=5.3, <7.0",
-        "sphinxcontrib-mermaid~=0.7.1",
-        "myst-parser~=1.0.0",
-        "Jinja2<3.1.0",
-        "kedro-datasets[all]~=1.7.0",
-    ],
-    "geopandas": _collect_requirements(geopandas_require),
-    "matplotlib": _collect_requirements(matplotlib_require),
-    "holoviews": _collect_requirements(holoviews_require),
-    "networkx": _collect_requirements(networkx_require),
-    "pandas": _collect_requirements(pandas_require),
-    "pickle": _collect_requirements(pickle_require),
-    "pillow": _collect_requirements(pillow_require),
-    "video": _collect_requirements(video_require),
-    "plotly": _collect_requirements(plotly_require),
-    "redis": _collect_requirements(redis_require),
-    "spark": _collect_requirements(spark_require),
-    "svmlight": _collect_requirements(svmlight_require),
-    "tensorflow": _collect_requirements(tensorflow_required),
-    "yaml": _collect_requirements(yaml_require),
-    **api_require,
-    **biosequence_require,
-    **dask_require,
-    **geopandas_require,
-    **matplotlib_require,
-    **holoviews_require,
-    **networkx_require,
-    **pandas_require,
-    **pickle_require,
-    **pillow_require,
-    **video_require,
-    **plotly_require,
-    **spark_require,
-    **svmlight_require,
-    **tensorflow_required,
-    **yaml_require,
-}
-
-extras_require["all"] = _collect_requirements(extras_require)
-extras_require["test"] = [
-    "adlfs~=2023.1; python_version >= '3.8'",
-    "bandit>=1.6.2, <2.0",
-    "behave==1.2.6",
-    "biopython~=1.73",
-    "blacken-docs==1.9.2",
-    "black~=22.0",
-    "compress-pickle[lz4]~=2.1.0",
-    "coverage[toml]",
-    "dask[complete]~=2021.10",  # pinned by Snyk to avoid a vulnerability
-    "delta-spark>=1.2.1; python_version >= '3.11'",  # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
-    "delta-spark~=1.2.1; python_version < '3.11'",
-    "dill~=0.3.1",
-    "filelock>=3.4.0, <4.0",
-    "gcsfs>=2023.1, <2023.3; python_version >= '3.8'",
-    "geopandas>=0.6.0, <1.0",
-    "hdfs>=2.5.8, <3.0",
-    "holoviews>=1.13.0",
-    "import-linter[toml]==1.8.0",
-    "ipython>=7.31.1, <8.0; python_version < '3.8'",
-    "ipython~=8.10; python_version >= '3.8'",
-    "Jinja2<3.1.0",
-    "joblib>=0.14",
-    "jupyterlab_server>=2.11.1",
-    "jupyterlab~=3.0",
-    "jupyter~=1.0",
-    "lxml~=4.6",
-    "matplotlib>=3.0.3, <3.4; python_version < '3.10'",  # 3.4.0 breaks holoviews
-    "matplotlib>=3.5, <3.6; python_version >= '3.10'",
-    "memory_profiler>=0.50.0, <1.0",
-    "moto==1.3.7; python_version < '3.10'",
-    "moto==4.1.12; python_version >= '3.10'",
-    "networkx~=2.4",
-    "opencv-python~=4.5.5.64",
-    "openpyxl>=3.0.3, <4.0",
-    "pandas-gbq>=0.12.0, <0.18.0; python_version < '3.11'",
-    "pandas-gbq>=0.18.0; python_version >= '3.11'",
-    "pandas~=1.3  # 1.3 for read_xml/to_xml",
-    "Pillow~=9.0",
-    "plotly>=4.8.0, <6.0",
-    "pre-commit>=2.9.2, <3.0",  # The hook `mypy` requires pre-commit version 2.9.2.
-    "pyarrow>=1.0; python_version < '3.11'",
-    "pyarrow>=7.0; python_version >= '3.11'",  # Adding to avoid numpy build errors
-    "pyproj~=3.0",
-    "pyspark>=2.2, <3.4; python_version < '3.11'",
-    "pyspark>=3.4; python_version >= '3.11'",
-    "pytest-cov~=3.0",
-    "pytest-mock>=1.7.1, <2.0",
-    "pytest-xdist[psutil]~=2.2.1",
-    "pytest~=7.2",
-    "redis~=4.1",
-    "requests-mock~=1.6",
-    "requests~=2.20",
-    "s3fs>=0.3.0, <0.5",  # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
-    "scikit-learn>=1.0.2,<2",
-    "scipy>=1.7.3",
-    "semver",
-    "SQLAlchemy~=1.2",
-    "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.8'",
-    "tables~=3.8.0; platform_system == 'Windows' and python_version>='3.8'",  # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593
-    "tables~=3.6; platform_system != 'Windows'",
-    "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
-    # https://developer.apple.com/metal/tensorflow-plugin/
-    "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",
-    "triad>=0.6.7, <1.0",
-    "trufflehog~=2.1",
-    "xlsxwriter~=1.0",
-]
-
 setup(
     package_data={
         "kedro": ["py.typed"] + template_files
     },
-    extras_require=extras_require,
 )
diff --git a/tests/framework/context/test_context.py b/tests/framework/context/test_context.py
@@ -13,7 +13,7 @@
 import toml
 import yaml
 from attrs.exceptions import FrozenInstanceError
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro import __version__ as kedro_version
 from kedro.config import ConfigLoader, MissingConfigException

diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import pytest
 from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro.io import (
     AbstractDataset,

diff --git a/tests/io/test_incremental_dataset.py b/tests/io/test_incremental_dataset.py
@@ -11,7 +11,7 @@
 from kedro_datasets.pickle import PickleDataSet
 from kedro_datasets.text import TextDataSet
 from moto import mock_s3
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro.io import AbstractDataset, DatasetError, IncrementalDataset
 from kedro.io.data_catalog import CREDENTIALS_KEY

diff --git a/tests/io/test_partitioned_dataset.py b/tests/io/test_partitioned_dataset.py
@@ -9,7 +9,7 @@
 import s3fs
 from kedro_datasets.pandas import CSVDataSet, ParquetDataSet
 from moto import mock_s3
-from pandas.util.testing import assert_frame_equal
+from pandas.testing import assert_frame_equal
 
 from kedro.io import DatasetError, PartitionedDataset
 from kedro.io.data_catalog import CREDENTIALS_KEY