diff --git a/.circleci/setup_env.sh b/.circleci/setup_env.sh index 52a8cab1cd2de..dc0ba20c190f5 100755 --- a/.circleci/setup_env.sh +++ b/.circleci/setup_env.sh @@ -54,11 +54,7 @@ if pip list | grep -q ^pandas; then pip uninstall -y pandas || true fi -echo "Build extensions" -# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs -python setup.py build_ext -q -j1 - echo "Install pandas" -python -m pip install --no-build-isolation --no-use-pep517 -e . +python -m pip install --no-build-isolation -v . echo "done" diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 23bb988ef4d73..5acc5ca572128 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -11,12 +11,15 @@ runs: shell: bash -el {0} - name: Build Pandas + if : ${{ runner.os != 'Windows' }} run: | - python setup.py build_ext -j $N_JOBS - python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index + pip install . --no-build-isolation -v shell: bash -el {0} - env: - # Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873 - # GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct - N_JOBS: 1 - #N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }} + + - name: Build Pandas (Windows) + if: ${{ runner.os == 'Windows' }} + run: | + call micromamba activate test + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" + python -m pip install . --no-build-isolation -v + shell: cmd /C call {0} diff --git a/.github/workflows/32-bit-linux.yml b/.github/workflows/32-bit-linux.yml index 8c9f0b594f321..073776e3158df 100644 --- a/.github/workflows/32-bit-linux.yml +++ b/.github/workflows/32-bit-linux.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" @@ -41,11 +39,12 @@ jobs: . ~/virtualenvs/pandas-dev/bin/activate && \ python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \ pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \ - python setup.py build_ext -q -j1 && \ - python -m pip install --no-build-isolation --no-use-pep517 -e . && \ - python -m pip list && \ + pip install "git+https://github.com/mesonbuild/meson.git@master" && \ + pip install "git+https://github.com/mesonbuild/meson-python.git@main" && \ export PANDAS_CI=1 && \ - pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml" + python -m pip install --no-build-isolation -v . && \ + python -m pip list && \ + pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml --import-mode=importlib" - name: Publish test results for Python 3.8-32 bit full Linux uses: actions/upload-artifact@v3 diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 6aff77c708378..583619826265c 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x env: ENV_FILE: environment.yml @@ -88,7 +86,7 @@ jobs: echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV if: ${{ steps.build.outcome == 'success' && always() }} - - name: Typing + - name: Typing + pylint uses: pre-commit/action@v2.0.3 with: extra_args: --hook-stage manual --all-files diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index cfb4966847721..45306c65c36ee 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -5,14 +5,12 @@ on: branches: - main - 1.5.x - - 1.4.x tags: - '*' pull_request: branches: - main - 1.5.x - - 1.4.x env: ENV_FILE: environment.yml @@ -51,10 +49,10 @@ jobs: run: python web/pandas_web.py web/pandas --target-path=web/build - name: Build documentation - run: doc/make.py --warnings-are-errors + run: cd doc && python make.py --warnings-are-errors - name: Build documentation zip - run: doc/make.py zip_html + run: cd doc && python make.py zip_html - name: Build the interactive terminal run: | diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml index 63c065fa3d701..81c798c9d86c3 100644 --- a/.github/workflows/macos-windows.yml +++ b/.github/workflows/macos-windows.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index b725f6812bc3b..145e5e00d57ec 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -25,12 +25,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" @@ -77,18 +75,34 @@ jobs: python -m pip install --upgrade pip setuptools wheel python -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy python -m pip install git+https://github.com/nedbat/coveragepy.git - python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17 + python -m pip install python-dateutil pytz cython + # TODO: update when upstream releases fixes + python -m pip install "git+https://github.com/mesonbuild/meson.git@master" + python -m pip install "git+https://github.com/FFY00/meson-python.git@main" + python -m pip install hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17 python -m pip list - # GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs + # Sigh, someone (numpy?) is depending on mingw, which pandas doesn't compile with. + # Also, meson doesn't detect visual c++ unless cl.exe is in path. + # TODO: File a bug with meson about this. - name: Build Pandas + if : ${{ runner.os != 'Windows' }} run: | - python setup.py build_ext -q -j1 - python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index + python3 -m pip install . --no-build-isolation -v + shell: bash -el {0} + + - name: Build Pandas (Windows) + if: ${{ runner.os == 'Windows' }} + run: | + call micromamba activate test + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" + python -m pip install . --no-build-isolation -v + shell: cmd /C call {0} - name: Build Version run: | - python -c "import pandas; pandas.show_versions();" + # Can't import pandas from the source directory + cd .. && python -c "import pandas; pandas.show_versions();" - name: Test uses: ./.github/actions/run-tests diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 73cab7ff909fc..1eedfe5b8ab51 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -29,7 +29,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@v2.0.3 + uses: ossf/scorecard-action@v2.0.6 with: results_file: results.sarif results_format: sarif diff --git a/.github/workflows/sdist.yml b/.github/workflows/sdist.yml index 14cede7bc1a39..9957fc72e9f51 100644 --- a/.github/workflows/sdist.yml +++ b/.github/workflows/sdist.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x types: [labeled, opened, synchronize, reopened] paths-ignore: - "doc/**" @@ -30,7 +28,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{matrix.python-version}}-sdist @@ -42,7 +40,7 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -86,6 +84,8 @@ jobs: pip install numpy==1.20.3 ;; 3.10) pip install numpy==1.21.2 ;; + 3.11) + pip install numpy==1.23.2 ;; esac - name: Import pandas diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 072e2523c9727..69bde1d812e07 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 218d95229e93a..b0ff6a3110f6a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -52,7 +52,7 @@ jobs: - [windows-2019, win_amd64] - [windows-2019, win32] # TODO: support PyPy? - python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11-dev"]]# "pp38", "pp39"] + python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]# "pp38", "pp39"] env: IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }} IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} @@ -72,14 +72,22 @@ jobs: env: CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} - # Used to test the built wheels - - uses: actions/setup-python@v3 + # Used to test(Windows-only) and push the built wheels + # You might need to use setup-python separately + # if the new Python-dev version + # is unavailable on conda-forge. + - uses: conda-incubator/setup-miniconda@v2 with: + auto-update-conda: true python-version: ${{ matrix.python[1] }} + activate-environment: test + channels: conda-forge, anaconda + channel-priority: true + mamba-version: "*" - name: Test wheels (Windows 64-bit only) if: ${{ matrix.buildplat[1] == 'win_amd64' }} - shell: cmd + shell: cmd /C CALL {0} run: | python ci/test_wheels.py wheelhouse @@ -88,26 +96,15 @@ jobs: name: ${{ matrix.python[0] }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }} path: ./wheelhouse/*.whl - # Used to push the built wheels - # TODO: once Python 3.11 is available on conda, de-dup with - # setup python above - - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - # Really doesn't matter what version we upload with - # just the version we test with - python-version: '3.8' - channels: conda-forge - channel-priority: true - mamba-version: "*" - name: Install anaconda client if: ${{ success() && (env.IS_SCHEDULE_DISPATCH == 'true' || env.IS_PUSH == 'true') }} + shell: bash -el {0} run: conda install -q -y anaconda-client - name: Upload wheels - if: success() + if: ${{ success() && (env.IS_SCHEDULE_DISPATCH == 'true' || env.IS_PUSH == 'true') }} shell: bash -el {0} env: PANDAS_STAGING_UPLOAD_TOKEN: ${{ secrets.PANDAS_STAGING_UPLOAD_TOKEN }} @@ -180,11 +177,12 @@ jobs: - name: Install anaconda client if: ${{ success() && (env.IS_SCHEDULE_DISPATCH == 'true' || env.IS_PUSH == 'true') }} + shell: bash -el {0} run: | conda install -q -y anaconda-client - name: Upload sdist - if: success() + if: ${{ success() && (env.IS_SCHEDULE_DISPATCH == 'true' || env.IS_PUSH == 'true') }} shell: bash -el {0} env: PANDAS_STAGING_UPLOAD_TOKEN: ${{ secrets.PANDAS_STAGING_UPLOAD_TOKEN }} diff --git a/.gitignore b/.gitignore index 07b1f056d511b..324e978e01e19 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ *.py[ocd] *.so .build_cache_dir +.mesonpy-native-file.ini MANIFEST # Python files # diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d41d091d03bd7..6aa1f5659365f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,16 +18,16 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/python/black - rev: 22.8.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell - rev: v2.2.1 + rev: v2.2.2 hooks: - id: codespell types_or: [python, rst, markdown] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.1.8 + rev: v0.2.1 hooks: - id: cython-lint - repo: https://github.com/pre-commit/pre-commit-hooks @@ -60,15 +60,16 @@ repos: - flake8-bugbear==22.7.1 - pandas-dev-flaker==0.5.0 - repo: https://github.com/pycqa/pylint - rev: v2.15.3 + rev: v2.15.5 hooks: - id: pylint + stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v2.38.2 + rev: v3.2.0 hooks: - id: pyupgrade args: [--py38-plus] @@ -83,7 +84,7 @@ repos: types: [text] # overwrite types: [rst] types_or: [python, rst] - repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v0.6.1 + rev: v0.6.7 hooks: - id: sphinx-lint - repo: https://github.com/asottile/yesqa @@ -102,7 +103,7 @@ repos: types: [python] stages: [manual] additional_dependencies: &pyright_dependencies - - pyright@1.1.264 + - pyright@1.1.276 - id: pyright_reportGeneralTypeIssues # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues diff --git a/README.md b/README.md index 69f2395920c66..61cc74da93eb8 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,6 @@ You can also triage issues which may include reproducing bug reports, or asking Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it! -Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). +Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Slack](https://pandas.pydata.org/docs/dev/development/community.html?highlight=slack#community-slack). As contributors and maintainers to this project, you are expected to abide by pandas' code of conduct. More information can be found at: [Contributor Code of Conduct](https://github.com/pandas-dev/.github/blob/master/CODE_OF_CONDUCT.md) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index b1ea2682b7ea7..4a0c882640eb6 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -54,7 +54,6 @@ "openpyxl": [], "xlsxwriter": [], "xlrd": [], - "xlwt": [], "odfpy": [], "jinja2": [], }, diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py index d4366c42f96aa..d515743ea4431 100644 --- a/asv_bench/benchmarks/attrs_caching.py +++ b/asv_bench/benchmarks/attrs_caching.py @@ -15,7 +15,7 @@ def setup(self): self.cur_index = self.df.index def time_get_index(self): - self.foo = self.df.index + self.df.index def time_set_index(self): self.df.index = self.cur_index diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index a28e20a636ce2..5bb87b8bb2663 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -28,26 +28,6 @@ def time_frame_get_numeric_data(self): self.df._get_numeric_data() -class Lookup: - def setup(self): - self.df = DataFrame(np.random.randn(10000, 8), columns=list("abcdefgh")) - self.df["foo"] = "bar" - self.row_labels = list(self.df.index[::10])[:900] - self.col_labels = list(self.df.columns) * 100 - self.row_labels_all = np.array( - list(self.df.index) * len(self.df.columns), dtype="object" - ) - self.col_labels_all = np.array( - list(self.df.columns) * len(self.df.index), dtype="object" - ) - - def time_frame_fancy_lookup(self): - self.df.lookup(self.row_labels, self.col_labels) - - def time_frame_fancy_lookup_all(self): - self.df.lookup(self.row_labels_all, self.col_labels_all) - - class Reindex: def setup(self): N = 10**3 diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 2225cbd74d718..6dff4a017e2a9 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -310,7 +310,7 @@ def time_different_python_functions_multicol(self, df): df.groupby(["key1", "key2"]).agg([sum, min, max]) def time_different_python_functions_singlecol(self, df): - df.groupby("key1").agg([sum, min, max]) + df.groupby("key1")[["value1", "value2", "value3"]].agg([sum, min, max]) class GroupStrings: @@ -683,7 +683,7 @@ class String: def setup(self, dtype, method): cols = list("abcdefghjkl") self.df = DataFrame( - np.random.randint(0, 100, size=(1_000_000, len(cols))), + np.random.randint(0, 100, size=(10_000, len(cols))), columns=cols, dtype=dtype, ) diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py index 1a88bb7eef37a..349841f695416 100644 --- a/asv_bench/benchmarks/index_cached_properties.py +++ b/asv_bench/benchmarks/index_cached_properties.py @@ -70,6 +70,3 @@ def time_engine(self, index_type): def time_inferred_type(self, index_type): self.idx.inferred_type - - def time_is_all_dates(self, index_type): - self.idx.is_all_dates diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 0bbb599f2b045..5de3bcda46424 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -223,7 +223,7 @@ class ToDatetimeFormat: def setup(self): N = 100000 self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N) - self.s2 = self.s.str.replace(":\\S+$", "") + self.s2 = self.s.str.replace(":\\S+$", "", regex=True) self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N self.diff_offset = [ diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index a88c4374b7030..5bd4d832f3dde 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -33,7 +33,7 @@ def _generate_dataframe(): class WriteExcel: - params = ["openpyxl", "xlsxwriter", "xlwt"] + params = ["openpyxl", "xlsxwriter"] param_names = ["engine"] def setup(self, engine): @@ -68,10 +68,9 @@ def time_write_excel_style(self, engine): class ReadExcel: - params = ["xlrd", "openpyxl", "odf"] + params = ["openpyxl", "odf"] param_names = ["engine"] fname_excel = "spreadsheet.xlsx" - fname_excel_xls = "spreadsheet.xls" fname_odf = "spreadsheet.ods" def _create_odf(self): @@ -92,13 +91,10 @@ def setup_cache(self): self.df = _generate_dataframe() self.df.to_excel(self.fname_excel, sheet_name="Sheet1") - self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1") self._create_odf() def time_read_excel(self, engine): - if engine == "xlrd": - fname = self.fname_excel_xls - elif engine == "odf": + if engine == "odf": fname = self.fname_odf else: fname = self.fname_excel @@ -107,9 +103,7 @@ def time_read_excel(self, engine): class ReadExcelNRows(ReadExcel): def time_read_excel(self, engine): - if engine == "xlrd": - fname = self.fname_excel_xls - elif engine == "odf": + if engine == "odf": fname = self.fname_odf else: fname = self.fname_excel diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index 4a2c1c872e6eb..12bc65f9e7bf5 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -43,7 +43,7 @@ def setup(self): np.random.randn(N, 100), index=date_range("1/1/2000", periods=N) ) self.df_dc = DataFrame( - np.random.randn(N, 10), columns=["C%03d" % i for i in range(10)] + np.random.randn(N, 10), columns=[f"C{i:03d}" for i in range(10)] ) self.fname = "__test__.h5" diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py index fb8b7dafa0ade..c1f378e8075e9 100644 --- a/asv_bench/benchmarks/io/sql.py +++ b/asv_bench/benchmarks/io/sql.py @@ -38,7 +38,7 @@ def setup(self, connection): }, index=tm.makeStringIndex(N), ) - self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df.iloc[1000:3000, 1] = np.nan self.df["date"] = self.df["datetime"].dt.date self.df["time"] = self.df["datetime"].dt.time self.df["datetime_string"] = self.df["datetime"].astype(str) @@ -88,7 +88,7 @@ def setup(self, connection, dtype): }, index=tm.makeStringIndex(N), ) - self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df.iloc[1000:3000, 1] = np.nan self.df["date"] = self.df["datetime"].dt.date self.df["time"] = self.df["datetime"].dt.time self.df["datetime_string"] = self.df["datetime"].astype(str) @@ -117,7 +117,7 @@ def setup(self): }, index=tm.makeStringIndex(N), ) - self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df.iloc[1000:3000, 1] = np.nan self.df["date"] = self.df["datetime"].dt.date self.df["time"] = self.df["datetime"].dt.time self.df["datetime_string"] = self.df["datetime"].astype(str) @@ -164,7 +164,7 @@ def setup(self, dtype): }, index=tm.makeStringIndex(N), ) - self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df.iloc[1000:3000, 1] = np.nan self.df["date"] = self.df["datetime"].dt.date self.df["time"] = self.df["datetime"].dt.time self.df["datetime_string"] = self.df["datetime"].astype(str) diff --git a/asv_bench/benchmarks/io/style.py b/asv_bench/benchmarks/io/style.py index f0902c9c2c328..1ebdb08e8c727 100644 --- a/asv_bench/benchmarks/io/style.py +++ b/asv_bench/benchmarks/io/style.py @@ -83,11 +83,11 @@ def _style_format(self): def _style_apply_format_hide(self): self.st = self.df.style.applymap(lambda v: "color: red;") self.st.format("{:.3f}") - self.st.hide_index(self.st.index[1:]) - self.st.hide_columns(self.st.columns[1:]) + self.st.hide(self.st.index[1:], axis=0) + self.st.hide(self.st.columns[1:], axis=1) def _style_tooltips(self): ttips = DataFrame("abc", index=self.df.index[::2], columns=self.df.columns[::2]) self.st = self.df.style.set_tooltips(ttips) - self.st.hide_index(self.st.index[12:]) - self.st.hide_columns(self.st.columns[12:]) + self.st.hide(self.st.index[12:], axis=0) + self.st.hide(self.st.columns[12:], axis=1) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 2309347ac96d8..d9fb3c8a8ff89 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -22,26 +22,6 @@ from pandas import ordered_merge as merge_ordered -class Append: - def setup(self): - self.df1 = DataFrame(np.random.randn(10000, 4), columns=["A", "B", "C", "D"]) - self.df2 = self.df1.copy() - self.df2.index = np.arange(10000, 20000) - self.mdf1 = self.df1.copy() - self.mdf1["obj1"] = "bar" - self.mdf1["obj2"] = "bar" - self.mdf1["int1"] = 5 - self.mdf1 = self.mdf1._consolidate() - self.mdf2 = self.mdf1.copy() - self.mdf2.index = self.df2.index - - def time_append_homogenous(self): - self.df1.append(self.df2) - - def time_append_mixed(self): - self.mdf1.append(self.mdf2) - - class Concat: params = [0, 1] diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 05e12630d7540..171e4feb290cf 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -36,7 +36,7 @@ def setup(self): self.df = DataFrame(data) def time_reshape_pivot_time_series(self): - self.df.pivot("date", "variable", "value") + self.df.pivot(index="date", columns="variable", values="value") class SimpleReshape: diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index eda9bce89188c..d7706a39dfae5 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -50,62 +50,58 @@ def time_from_pd_timestamp(self): class TimestampProperties: - _freqs = [None, "B"] - params = [_tzs, _freqs] - param_names = ["tz", "freq"] + params = [_tzs] + param_names = ["tz"] - def setup(self, tz, freq): - self.ts = Timestamp("2017-08-25 08:16:14", tzinfo=tz, freq=freq) + def setup(self, tz): + self.ts = Timestamp("2017-08-25 08:16:14", tzinfo=tz) - def time_tz(self, tz, freq): + def time_tz(self, tz): self.ts.tz - def time_dayofweek(self, tz, freq): + def time_dayofweek(self, tz): self.ts.dayofweek - def time_dayofyear(self, tz, freq): + def time_dayofyear(self, tz): self.ts.dayofyear - def time_week(self, tz, freq): + def time_week(self, tz): self.ts.week - def time_quarter(self, tz, freq): + def time_quarter(self, tz): self.ts.quarter - def time_days_in_month(self, tz, freq): + def time_days_in_month(self, tz): self.ts.days_in_month - def time_freqstr(self, tz, freq): - self.ts.freqstr - - def time_is_month_start(self, tz, freq): + def time_is_month_start(self, tz): self.ts.is_month_start - def time_is_month_end(self, tz, freq): + def time_is_month_end(self, tz): self.ts.is_month_end - def time_is_quarter_start(self, tz, freq): + def time_is_quarter_start(self, tz): self.ts.is_quarter_start - def time_is_quarter_end(self, tz, freq): + def time_is_quarter_end(self, tz): self.ts.is_quarter_end - def time_is_year_start(self, tz, freq): + def time_is_year_start(self, tz): self.ts.is_year_start - def time_is_year_end(self, tz, freq): + def time_is_year_end(self, tz): self.ts.is_year_end - def time_is_leap_year(self, tz, freq): + def time_is_leap_year(self, tz): self.ts.is_leap_year - def time_microsecond(self, tz, freq): + def time_microsecond(self, tz): self.ts.microsecond - def time_month_name(self, tz, freq): + def time_month_name(self, tz): self.ts.month_name() - def time_weekday_name(self, tz, freq): + def time_weekday_name(self, tz): self.ts.day_name() diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 113186c746157..c6067faf92d37 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -47,7 +47,7 @@ import pandas blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis', 'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', - 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'} + 'tables', 'urllib.request', 'xlrd', 'xlsxwriter'} # GH#28227 for some of these check for top-level modules, while others are # more specific (e.g. urllib.request) diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml index ef20c2aa889b9..4de419b6d961c 100644 --- a/ci/deps/actions-310-numpydev.yaml +++ b/ci/deps/actions-310-numpydev.yaml @@ -17,6 +17,8 @@ dependencies: - pip - pip: - "cython" + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple" - "--pre" - "numpy" diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index da3578e7191eb..15afa7b340e18 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -51,5 +51,7 @@ dependencies: - xarray - xlrd - xlsxwriter - - xlwt - zstandard + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml index 29ad2669afbd2..7ae162ec1216d 100644 --- a/ci/deps/actions-38-downstream_compat.yaml +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -51,7 +51,6 @@ dependencies: - xarray - xlrd - xlsxwriter - - xlwt - zstandard # downstream packages @@ -69,3 +68,6 @@ dependencies: - pyyaml - py - pytorch + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index f92d9958a6248..0ac11296cace8 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -25,14 +25,14 @@ dependencies: - blosc=1.21.0 - bottleneck=1.3.2 - brotlipy=0.7.0 - - fastparquet=0.4.0 + - fastparquet=0.6.3 - fsspec=2021.07.0 - html5lib=1.1 - hypothesis=6.13.0 - gcsfs=2021.07.0 - jinja2=3.0.0 - lxml=4.6.3 - - matplotlib=3.3.2 + - matplotlib=3.6.1 - numba=0.53.1 - numexpr=2.7.3 - odfpy=1.4.1 @@ -53,5 +53,7 @@ dependencies: - xarray=0.19.0 - xlrd=2.0.1 - xlsxwriter=1.4.3 - - xlwt=1.3.0 - zstandard=0.15.2 + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index b478b7c900425..1b64641beb3db 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -50,5 +50,7 @@ dependencies: - xarray - xlrd - xlsxwriter - - xlwt - zstandard + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index a12f36ba84cca..f8c1c8dd82bb6 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -51,5 +51,7 @@ dependencies: - xarray - xlrd - xlsxwriter - - xlwt - zstandard + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/deps/actions-pypy-38.yaml b/ci/deps/actions-pypy-38.yaml index e06b992acc191..f9c51bca85b95 100644 --- a/ci/deps/actions-pypy-38.yaml +++ b/ci/deps/actions-pypy-38.yaml @@ -19,3 +19,6 @@ dependencies: - numpy - python-dateutil - pytz + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml index 2b65ece881df7..3e8f89e6e2417 100644 --- a/ci/deps/circle-38-arm64.yaml +++ b/ci/deps/circle-38-arm64.yaml @@ -51,5 +51,7 @@ dependencies: - xarray - xlrd - xlsxwriter - - xlwt - zstandard + - pip: + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/ci/fix_wheels.py b/ci/fix_wheels.py index ed7957fac643b..525aacf572cd4 100644 --- a/ci/fix_wheels.py +++ b/ci/fix_wheels.py @@ -4,7 +4,12 @@ import zipfile try: - _, wheel_path, dest_dir = sys.argv + if len(sys.argv) != 3: + raise ValueError( + "User must pass the path to the wheel and the destination directory." + ) + wheel_path = sys.argv[1] + dest_dir = sys.argv[2] # Figure out whether we are building on 32 or 64 bit python is_32 = sys.maxsize <= 2**32 PYTHON_ARCH = "x86" if is_32 else "x64" @@ -50,5 +55,4 @@ if not success: os.remove(repaired_wheel_path) raise exception -else: - print(f"Successfully repaired wheel was written to {repaired_wheel_path}") +print(f"Successfully repaired wheel was written to {repaired_wheel_path}") diff --git a/ci/run_tests.sh b/ci/run_tests.sh index e6de5caf955fc..e6c9be39c17f9 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -1,5 +1,6 @@ #!/bin/bash -e + # Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set) # https://github.com/pytest-dev/pytest/issues/920 # https://github.com/pytest-dev/pytest/issues/1075 @@ -13,7 +14,7 @@ if [[ "not network" == *"$PATTERN"* ]]; then fi if [[ "$COVERAGE" == "true" ]]; then - COVERAGE="-s --cov=pandas --cov-report=xml --cov-append" + COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=setup.cfg" else COVERAGE="" # We need to reset this for COVERAGE="false" case fi @@ -24,7 +25,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then XVFB="xvfb-run " fi -PYTEST_CMD="${XVFB}pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="${XVFB}pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile --import-mode=importlib $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" @@ -33,7 +34,7 @@ fi echo $PYTEST_CMD sh -c "$PYTEST_CMD" -if [[ "$PANDAS_DATA_MANAGER" != "array" && "$PYTEST_TARGET" == "pandas" ]]; then +if [[ "$PANDAS_DATA_MANAGER" != "array" && "$PYTEST_TARGET" == "pandas/pandas" ]]; then # The ArrayManager tests should have already been run by PYTEST_CMD if PANDAS_DATA_MANAGER was already set to array # If we're targeting specific files, e.g. test_downstream.py, don't run. PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas" diff --git a/ci/test_wheels.py b/ci/test_wheels.py index 9a9a6890d8ecb..c9258422baefd 100644 --- a/ci/test_wheels.py +++ b/ci/test_wheels.py @@ -1,12 +1,11 @@ import glob import os -import platform import shutil import subprocess import sys if os.name == "nt": - py_ver = platform.python_version() + py_ver = f"{sys.version_info.major}.{sys.version_info.minor}" is_32_bit = os.getenv("IS_32_BIT") == "true" try: wheel_dir = sys.argv[1] diff --git a/doc/make.py b/doc/make.py index c758c7fc84bbb..5b1365dffbbcd 100755 --- a/doc/make.py +++ b/doc/make.py @@ -259,8 +259,7 @@ def latex(self, force=False): "You should check the file " '"build/latex/pandas.pdf" for problems.' ) - else: - self._run_os("make") + self._run_os("make") return ret_code def latex_forced(self): @@ -354,8 +353,8 @@ def main(): # external libraries (namely Sphinx) to compile this module and resolve # the import of `python_path` correctly. The latter is used to resolve # the import within the module, injecting it into the global namespace - os.environ["PYTHONPATH"] = args.python_path - sys.path.insert(0, args.python_path) + # os.environ["PYTHONPATH"] = args.python_path + # sys.path.insert(0, args.python_path) globals()["pandas"] = importlib.import_module("pandas") # Set the matplotlib backend to the non-interactive Agg backend for all diff --git a/doc/redirects.csv b/doc/redirects.csv index d0f4ae331f7e3..97cd20b295e65 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -315,7 +315,6 @@ generated/pandas.DataFrame.aggregate,../reference/api/pandas.DataFrame.aggregate generated/pandas.DataFrame.align,../reference/api/pandas.DataFrame.align generated/pandas.DataFrame.all,../reference/api/pandas.DataFrame.all generated/pandas.DataFrame.any,../reference/api/pandas.DataFrame.any -generated/pandas.DataFrame.append,../reference/api/pandas.DataFrame.append generated/pandas.DataFrame.apply,../reference/api/pandas.DataFrame.apply generated/pandas.DataFrame.applymap,../reference/api/pandas.DataFrame.applymap generated/pandas.DataFrame.as_blocks,../reference/api/pandas.DataFrame.as_blocks @@ -408,7 +407,6 @@ generated/pandas.DataFrame.last,../reference/api/pandas.DataFrame.last generated/pandas.DataFrame.last_valid_index,../reference/api/pandas.DataFrame.last_valid_index generated/pandas.DataFrame.le,../reference/api/pandas.DataFrame.le generated/pandas.DataFrame.loc,../reference/api/pandas.DataFrame.loc -generated/pandas.DataFrame.lookup,../reference/api/pandas.DataFrame.lookup generated/pandas.DataFrame.lt,../reference/api/pandas.DataFrame.lt generated/pandas.DataFrame.mask,../reference/api/pandas.DataFrame.mask generated/pandas.DataFrame.max,../reference/api/pandas.DataFrame.max @@ -661,7 +659,6 @@ generated/pandas.Index.identical,../reference/api/pandas.Index.identical generated/pandas.Index.inferred_type,../reference/api/pandas.Index.inferred_type generated/pandas.Index.insert,../reference/api/pandas.Index.insert generated/pandas.Index.intersection,../reference/api/pandas.Index.intersection -generated/pandas.Index.is_all_dates,../reference/api/pandas.Index.is_all_dates generated/pandas.Index.is_boolean,../reference/api/pandas.Index.is_boolean generated/pandas.Index.is_categorical,../reference/api/pandas.Index.is_categorical generated/pandas.Index.is_floating,../reference/api/pandas.Index.is_floating @@ -671,7 +668,6 @@ generated/pandas.Index.is_integer,../reference/api/pandas.Index.is_integer generated/pandas.Index.is_interval,../reference/api/pandas.Index.is_interval generated/pandas.Index.is_lexsorted_for_tuple,../reference/api/pandas.Index.is_lexsorted_for_tuple generated/pandas.Index.is_monotonic_decreasing,../reference/api/pandas.Index.is_monotonic_decreasing -generated/pandas.Index.is_monotonic,../reference/api/pandas.Index.is_monotonic generated/pandas.Index.is_monotonic_increasing,../reference/api/pandas.Index.is_monotonic_increasing generated/pandas.Index.isna,../reference/api/pandas.Index.isna generated/pandas.Index.isnull,../reference/api/pandas.Index.isnull @@ -919,7 +915,6 @@ generated/pandas.Series.aggregate,../reference/api/pandas.Series.aggregate generated/pandas.Series.align,../reference/api/pandas.Series.align generated/pandas.Series.all,../reference/api/pandas.Series.all generated/pandas.Series.any,../reference/api/pandas.Series.any -generated/pandas.Series.append,../reference/api/pandas.Series.append generated/pandas.Series.apply,../reference/api/pandas.Series.apply generated/pandas.Series.argmax,../reference/api/pandas.Series.argmax generated/pandas.Series.argmin,../reference/api/pandas.Series.argmin @@ -1068,7 +1063,6 @@ generated/pandas.Series.interpolate,../reference/api/pandas.Series.interpolate generated/pandas.Series.is_copy,../reference/api/pandas.Series.is_copy generated/pandas.Series.isin,../reference/api/pandas.Series.isin generated/pandas.Series.is_monotonic_decreasing,../reference/api/pandas.Series.is_monotonic_decreasing -generated/pandas.Series.is_monotonic,../reference/api/pandas.Series.is_monotonic generated/pandas.Series.is_monotonic_increasing,../reference/api/pandas.Series.is_monotonic_increasing generated/pandas.Series.isna,../reference/api/pandas.Series.isna generated/pandas.Series.isnull,../reference/api/pandas.Series.isnull @@ -1324,8 +1318,6 @@ generated/pandas.Timestamp.daysinmonth,../reference/api/pandas.Timestamp.daysinm generated/pandas.Timestamp.dst,../reference/api/pandas.Timestamp.dst generated/pandas.Timestamp.floor,../reference/api/pandas.Timestamp.floor generated/pandas.Timestamp.fold,../reference/api/pandas.Timestamp.fold -generated/pandas.Timestamp.freq,../reference/api/pandas.Timestamp.freq -generated/pandas.Timestamp.freqstr,../reference/api/pandas.Timestamp.freqstr generated/pandas.Timestamp.fromisoformat,../reference/api/pandas.Timestamp.fromisoformat generated/pandas.Timestamp.fromordinal,../reference/api/pandas.Timestamp.fromordinal generated/pandas.Timestamp.fromtimestamp,../reference/api/pandas.Timestamp.fromtimestamp diff --git a/doc/scripts/eval_performance.py b/doc/scripts/eval_performance.py new file mode 100644 index 0000000000000..85d9ce4ad01e9 --- /dev/null +++ b/doc/scripts/eval_performance.py @@ -0,0 +1,108 @@ +from timeit import repeat as timeit + +import numpy as np +import seaborn as sns + +from pandas import DataFrame + +setup_common = """from pandas import DataFrame +from numpy.random import randn +df = DataFrame(randn(%d, 3), columns=list('abc')) +%s""" + +setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'" + + +def bench_with(n, times=10, repeat=3, engine="numexpr"): + return ( + np.array( + timeit( + f"df.eval(s, engine={repr(engine)})", + setup=setup_common % (n, setup_with), + repeat=repeat, + number=times, + ) + ) + / times + ) + + +setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'" + + +def bench_subset(n, times=20, repeat=3, engine="numexpr"): + return ( + np.array( + timeit( + f"df.query(s, engine={repr(engine)})", + setup=setup_common % (n, setup_subset), + repeat=repeat, + number=times, + ) + ) + / times + ) + + +def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False): + r = np.logspace(mn, mx, num=num).round().astype(int) + + ev = DataFrame(np.empty((num, len(engines))), columns=engines) + qu = ev.copy(deep=True) + + ev["size"] = qu["size"] = r + + for engine in engines: + for i, n in enumerate(r): + if verbose & (i % 10 == 0): + print(f"engine: {repr(engine)}, i == {i:d}") + ev_times = bench_with(n, times=1, repeat=1, engine=engine) + ev.loc[i, engine] = np.mean(ev_times) + qu_times = bench_subset(n, times=1, repeat=1, engine=engine) + qu.loc[i, engine] = np.mean(qu_times) + + return ev, qu + + +def plot_perf(df, engines, title, filename=None): + from matplotlib.pyplot import figure + + sns.set() + sns.set_palette("Set2") + + fig = figure(figsize=(4, 3), dpi=120) + ax = fig.add_subplot(111) + + for engine in engines: + ax.loglog(df["size"], df[engine], label=engine, lw=2) + + ax.set_xlabel("Number of Rows") + ax.set_ylabel("Time (s)") + ax.set_title(title) + ax.legend(loc="best") + ax.tick_params(top=False, right=False) + + fig.tight_layout() + + if filename is not None: + fig.savefig(filename) + + +if __name__ == "__main__": + import os + + pandas_dir = os.path.dirname( + os.path.dirname(os.path.abspath(os.path.dirname(__file__))) + ) + static_path = os.path.join(pandas_dir, "doc", "source", "_static") + + join = lambda p: os.path.join(static_path, p) + + fn = join("eval-query-perf-data.h5") + + engines = "python", "numexpr" + + ev, qu = bench(verbose=True) # only this one + + plot_perf(ev, engines, "DataFrame.eval()", filename=join("eval-perf.png")) + plot_perf(qu, engines, "DataFrame.query()", filename=join("query-perf.png")) diff --git a/doc/source/_static/eval-perf-small.png b/doc/source/_static/eval-perf-small.png deleted file mode 100644 index d86018363ffdc..0000000000000 Binary files a/doc/source/_static/eval-perf-small.png and /dev/null differ diff --git a/doc/source/_static/eval-perf.png b/doc/source/_static/eval-perf.png index 14c69c1b85d9e..ed92337c1d995 100644 Binary files a/doc/source/_static/eval-perf.png and b/doc/source/_static/eval-perf.png differ diff --git a/doc/source/_static/query-perf-small.png b/doc/source/_static/query-perf-small.png deleted file mode 100644 index e14fa69db7fe8..0000000000000 Binary files a/doc/source/_static/query-perf-small.png and /dev/null differ diff --git a/doc/source/_static/query-perf.png b/doc/source/_static/query-perf.png index d96318df94357..c52849a0edd53 100644 Binary files a/doc/source/_static/query-perf.png and b/doc/source/_static/query-perf.png differ diff --git a/doc/source/conf.py b/doc/source/conf.py index 252670565ebff..af93a979789c1 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -105,7 +105,7 @@ if fname == "index.rst" and os.path.abspath(dirname) == source_path: continue - elif pattern == "-api" and reldir.startswith("reference"): + if pattern == "-api" and reldir.startswith("reference"): exclude_patterns.append(fname) elif ( pattern == "whatsnew" @@ -236,7 +236,7 @@ if ".dev" in version: switcher_version = "dev" elif "rc" in version: - switcher_version = version.split("rc")[0] + " (rc)" + switcher_version = version.split("rc", maxsplit=1)[0] + " (rc)" html_theme_options = { "external_links": [], diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst index 4a70057cf18e3..afa0d0306f1af 100644 --- a/doc/source/development/contributing_environment.rst +++ b/doc/source/development/contributing_environment.rst @@ -10,7 +10,7 @@ To test out code changes, you'll need to build pandas from source, which requires a C/C++ compiler and Python environment. If you're making documentation changes, you can skip to :ref:`contributing to the documentation ` but if you skip creating the development environment you won't be able to build the documentation -locally before pushing your changes. +locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks `. .. contents:: Table of contents: :local: diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 5f258973b3db9..11c419c399877 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -20,7 +20,7 @@ Instructions for installing from source, Python version support ---------------------- -Officially Python 3.8, 3.9 and 3.10. +Officially Python 3.8, 3.9, 3.10 and 3.11. Installing pandas ----------------- @@ -310,7 +310,7 @@ Can be managed as optional_extra with ``pandas[plot, output_formatting]``, depen ========================= ================== ================== ============================================================= Dependency Minimum Version optional_extra Notes ========================= ================== ================== ============================================================= -matplotlib 3.3.2 plot Plotting library +matplotlib 3.6.1 plot Plotting library Jinja2 3.0.0 output_formatting Conditional formatting with DataFrame.style tabulate 0.8.9 output_formatting Printing in Markdown-friendly format (see `tabulate`_) ========================= ================== ================== ============================================================= @@ -336,7 +336,6 @@ Can be managed as optional_extra with ``pandas[excel]``. Dependency Minimum Version optional_extra Notes ========================= ================== =============== ============================================================= xlrd 2.0.1 excel Reading Excel -xlwt 1.3.0 excel Writing Excel xlsxwriter 1.4.3 excel Writing Excel openpyxl 3.0.7 excel Reading / writing for xlsx files pyxlsb 1.0.8 excel Reading for xlsb files @@ -418,7 +417,7 @@ Dependency Minimum Version optional_extra Notes PyTables 3.6.1 hdf5 HDF5-based reading / writing blosc 1.21.0 hdf5 Compression for HDF5 zlib hdf5 Compression for HDF5 -fastparquet 0.4.0 - Parquet reading / writing (pyarrow is default) +fastparquet 0.6.3 - Parquet reading / writing (pyarrow is default) pyarrow 6.0.0 parquet, feather Parquet, ORC, and feather reading / writing pyreadstat 1.1.2 spss SPSS files (.sav) reading odfpy 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst index 3c256081d7955..373470913c293 100644 --- a/doc/source/getting_started/intro_tutorials/09_timeseries.rst +++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst @@ -144,7 +144,7 @@ I want to add a new column to the ``DataFrame`` containing only the month of the By using ``Timestamp`` objects for dates, a lot of time-related properties are provided by pandas. For example the ``month``, but also -``year``, ``weekofyear``, ``quarter``,… All of these properties are +``year``, ``quarter``,… All of these properties are accessible by the ``dt`` accessor. .. raw:: html diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 17510a0b7d479..33a611b15675d 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -157,8 +157,6 @@ Methods Timestamp.day_name Timestamp.dst Timestamp.floor - Timestamp.freq - Timestamp.freqstr Timestamp.fromordinal Timestamp.fromtimestamp Timestamp.isocalendar diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 6c0b6a4752875..ea19bb6d85aed 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -66,7 +66,6 @@ Indexing, iteration DataFrame.keys DataFrame.iterrows DataFrame.itertuples - DataFrame.lookup DataFrame.pop DataFrame.tail DataFrame.xs @@ -250,7 +249,6 @@ Combining / comparing / joining / merging .. autosummary:: :toctree: api/ - DataFrame.append DataFrame.assign DataFrame.compare DataFrame.join diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 93897723d5d71..81148b4a29df5 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -25,7 +25,6 @@ Properties :toctree: api/ Index.values - Index.is_monotonic Index.is_monotonic_increasing Index.is_monotonic_decreasing Index.is_unique @@ -33,7 +32,6 @@ Properties Index.hasnans Index.dtype Index.inferred_type - Index.is_all_dates Index.shape Index.name Index.names @@ -343,8 +341,6 @@ Time/date components DatetimeIndex.timetz DatetimeIndex.dayofyear DatetimeIndex.day_of_year - DatetimeIndex.weekofyear - DatetimeIndex.week DatetimeIndex.dayofweek DatetimeIndex.day_of_week DatetimeIndex.weekday diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 3fda5db3a0199..659385c611ff0 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -167,7 +167,6 @@ Computations / descriptive stats Series.unique Series.nunique Series.is_unique - Series.is_monotonic Series.is_monotonic_increasing Series.is_monotonic_decreasing Series.value_counts @@ -248,7 +247,6 @@ Combining / comparing / joining / merging .. autosummary:: :toctree: api/ - Series.append Series.compare Series.update @@ -311,8 +309,6 @@ Datetime properties Series.dt.second Series.dt.microsecond Series.dt.nanosecond - Series.dt.week - Series.dt.weekofyear Series.dt.dayofweek Series.dt.day_of_week Series.dt.weekday diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 0883113474f54..2204c8b04e438 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1039,34 +1039,6 @@ not noted for a particular column will be ``NaN``: tsdf.agg({"A": ["mean", "min"], "B": "sum"}) -.. _basics.aggregation.mixed_string: - -Mixed dtypes -++++++++++++ - -.. deprecated:: 1.4.0 - Attempting to determine which columns cannot be aggregated and silently dropping them from the results is deprecated and will be removed in a future version. If any porition of the columns or operations provided fail, the call to ``.agg`` will raise. - -When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid -aggregations. This is similar to how ``.groupby.agg`` works. - -.. ipython:: python - - mdf = pd.DataFrame( - { - "A": [1, 2, 3], - "B": [1.0, 2.0, 3.0], - "C": ["foo", "bar", "baz"], - "D": pd.date_range("20130101", periods=3), - } - ) - mdf.dtypes - -.. ipython:: python - :okwarning: - - mdf.agg(["min", "sum"]) - .. _basics.aggregation.custom_describe: Custom describe diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index b5cb1d83a9f52..f3d68f4c471c1 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -353,11 +353,6 @@ Renaming categories is done by using the In contrast to R's ``factor``, categorical data can have categories of other types than string. -.. note:: - - Be aware that assigning new categories is an inplace operation, while most other operations - under ``Series.cat`` per default return a new ``Series`` of dtype ``category``. - Categories must be unique or a ``ValueError`` is raised: .. ipython:: python @@ -952,7 +947,6 @@ categorical (categories and ordering). So if you read back the CSV file you have relevant columns back to ``category`` and assign the right categories and categories ordering. .. ipython:: python - :okwarning: import io @@ -969,8 +963,8 @@ relevant columns back to ``category`` and assign the right categories and catego df2["cats"] # Redo the category df2["cats"] = df2["cats"].astype("category") - df2["cats"].cat.set_categories( - ["very bad", "bad", "medium", "good", "very good"], inplace=True + df2["cats"] = df2["cats"].cat.set_categories( + ["very bad", "bad", "medium", "good", "very good"] ) df2.dtypes df2["cats"] @@ -1162,16 +1156,12 @@ Constructing a ``Series`` from a ``Categorical`` will not copy the input change the original ``Categorical``: .. ipython:: python - :okwarning: cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) s = pd.Series(cat, name="cat") cat s.iloc[0:2] = 10 cat - df = pd.DataFrame(s) - df["cat"].cat.categories = [1, 2, 3, 4, 5] - cat Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``: diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index 1a1229f95523b..9375bb066781b 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -690,21 +690,12 @@ The equivalent in standard Python would be df["a"] = 1 df -The :class:`DataFrame.query` method has a ``inplace`` keyword which determines -whether the query modifies the original frame. - -.. ipython:: python - - df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) - df.query("a > 2") - df.query("a > 2", inplace=True) - df - Local variables ~~~~~~~~~~~~~~~ You must *explicitly reference* any local variable that you want to use in an -expression by placing the ``@`` character in front of the name. For example, +expression by placing the ``@`` character in front of the name. This mechanism is +the same for both :meth:`DataFrame.query` and :meth:`DataFrame.eval`. For example, .. ipython:: python @@ -820,17 +811,12 @@ significant performance benefit. Here is a plot showing the running time of :func:`pandas.eval` as function of the size of the frame involved in the computation. The two lines are two different engines. +.. + The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py .. image:: ../_static/eval-perf.png - -.. note:: - - Operations with smallish objects (around 15k-20k rows) are faster using - plain Python: - - .. image:: ../_static/eval-perf-small.png - +You will only see the performance benefits of using the ``numexpr`` engine with :func:`pandas.eval` if your frame has more than approximately 100,000 rows. This plot was created using a :class:`DataFrame` with 3 columns each containing floating point values generated using ``numpy.random.randn()``. diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index f9b8b793bfde8..dae42dd4f1118 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1007,7 +1007,7 @@ functions: .. ipython:: python :okwarning: - grouped = df.groupby("A") + grouped = df.groupby("A")[["C", "D"]] grouped.agg(lambda x: x.std()) But, it's rather verbose and can be untidy if you need to pass additional diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index f939945fc6cda..6566a1d67d1c9 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1240,6 +1240,17 @@ If instead you don't want to or cannot name your index, you can use the name renaming your columns to something less ambiguous. +The :class:`DataFrame.query` method has a ``inplace`` keyword which determines +whether the query modifies the original frame. + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df.query("a > 2") + df.query("a > 2", inplace=True) + df + + :class:`~pandas.MultiIndex` :meth:`~pandas.DataFrame.query` Syntax ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1438,15 +1449,18 @@ Performance of :meth:`~pandas.DataFrame.query` ``DataFrame.query()`` using ``numexpr`` is slightly faster than Python for large frames. +.. + The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py + .. image:: ../_static/query-perf.png -.. note:: - You will only see the performance benefits of using the ``numexpr`` engine - with ``DataFrame.query()`` if your frame has more than approximately 200,000 - rows. - .. image:: ../_static/query-perf-small.png +You will only see the performance benefits of using the ``numexpr`` engine +with ``DataFrame.query()`` if your frame has more than approximately 100,000 +rows. + + This plot was created using a ``DataFrame`` with 3 columns each containing floating point values generated using ``numpy.random.randn()``. @@ -1545,7 +1559,7 @@ For instance: df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx] Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method -which was deprecated in version 1.2.0. +which was deprecated in version 1.2.0 and removed in version 2.0.0. .. _indexing.class: diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cf4221d055a27..b4bf3ef024d4c 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -154,25 +154,6 @@ usecols : list-like or callable, default ``None`` Using this parameter results in much faster parsing time and lower memory usage when using the c engine. The Python engine loads the data first before deciding which columns to drop. -squeeze : boolean, default ``False`` - If the parsed data only contains one column then return a ``Series``. - - .. deprecated:: 1.4.0 - Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze - the data. -prefix : str, default ``None`` - Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... - - .. deprecated:: 1.4.0 - Use a list comprehension on the DataFrame's columns after calling ``read_csv``. - - .. ipython:: python - - data = "col1,col2,col3\na,b,1" - - df = pd.read_csv(StringIO(data)) - df.columns = [f"pre_{col}" for col in df.columns] - df mangle_dupe_cols : boolean, default ``True`` Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'. @@ -395,23 +376,6 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None`` Error handling ++++++++++++++ -error_bad_lines : boolean, optional, default ``None`` - Lines with too many fields (e.g. a csv line with too many commas) will by - default cause an exception to be raised, and no ``DataFrame`` will be - returned. If ``False``, then these "bad lines" will dropped from the - ``DataFrame`` that is returned. See :ref:`bad lines ` - below. - - .. deprecated:: 1.3.0 - The ``on_bad_lines`` parameter should be used instead to specify behavior upon - encountering a bad line instead. -warn_bad_lines : boolean, optional, default ``None`` - If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for - each "bad line" will be output. - - .. deprecated:: 1.3.0 - The ``on_bad_lines`` parameter should be used instead to specify behavior upon - encountering a bad line instead. on_bad_lines : {{'error', 'warn', 'skip'}}, default 'error' Specifies what to do upon encountering a bad line (a line with too many fields). Allowed values are : @@ -1221,37 +1185,6 @@ Infinity ``inf`` like values will be parsed as ``np.inf`` (positive infinity), and ``-inf`` as ``-np.inf`` (negative infinity). These will ignore the case of the value, meaning ``Inf``, will also be parsed as ``np.inf``. - -Returning Series -'''''''''''''''' - -Using the ``squeeze`` keyword, the parser will return output with a single column -as a ``Series``: - -.. deprecated:: 1.4.0 - Users should append ``.squeeze("columns")`` to the DataFrame returned by - ``read_csv`` instead. - -.. ipython:: python - :okwarning: - - data = "level\nPatient1,123000\nPatient2,23000\nPatient3,1234018" - - with open("tmp.csv", "w") as fh: - fh.write(data) - - print(open("tmp.csv").read()) - - output = pd.read_csv("tmp.csv", squeeze=True) - output - - type(output) - -.. ipython:: python - :suppress: - - os.remove("tmp.csv") - .. _io.boolean: Boolean values @@ -1708,8 +1641,6 @@ Options that are unsupported by the pyarrow engine which are not covered by the * ``thousands`` * ``memory_map`` * ``dialect`` -* ``warn_bad_lines`` -* ``error_bad_lines`` * ``on_bad_lines`` * ``delim_whitespace`` * ``quoting`` @@ -3466,8 +3397,6 @@ See the :ref:`cookbook` for some advanced strategies. .. warning:: - The `xlwt `__ package for writing old-style ``.xls`` - excel files is no longer maintained. The `xlrd `__ package is now only for reading old-style ``.xls`` files. @@ -3481,12 +3410,6 @@ See the :ref:`cookbook` for some advanced strategies. **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** This is no longer supported, switch to using ``openpyxl`` instead. - Attempting to use the ``xlwt`` engine will raise a ``FutureWarning`` - unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. - While this option is now deprecated and will also raise a ``FutureWarning``, - it can be globally set and the warning suppressed. Users are recommended to - write ``.xlsx`` files using the ``openpyxl`` engine instead. - .. _io.excel_reader: Reading Excel files @@ -3788,7 +3711,7 @@ written. For example: df.to_excel("path_to_file.xlsx", sheet_name="Sheet1") -Files with a ``.xls`` extension will be written using ``xlwt`` and those with a +Files with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. @@ -3849,20 +3772,13 @@ pandas supports writing Excel files to buffer-like objects such as ``StringIO`` Excel writer engines '''''''''''''''''''' -.. deprecated:: 1.2.0 - - As the `xlwt `__ package is no longer - maintained, the ``xlwt`` engine will be removed from a future version - of pandas. This is the only engine in pandas that supports writing to - ``.xls`` files. - pandas chooses an Excel writer via two methods: 1. the ``engine`` keyword argument 2. the filename extension (via the default specified in config options) By default, pandas uses the `XlsxWriter`_ for ``.xlsx``, `openpyxl`_ -for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple +for ``.xlsm``. If you have multiple engines installed, you can set the default engine through :ref:`setting the config options ` ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` @@ -3870,14 +3786,12 @@ files if `Xlsxwriter`_ is not available. .. _XlsxWriter: https://xlsxwriter.readthedocs.io .. _openpyxl: https://openpyxl.readthedocs.io/ -.. _xlwt: http://www.python-excel.org To specify which writer you want to use, you can pass an engine keyword argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: * ``openpyxl``: version 2.4 or higher is required * ``xlsxwriter`` -* ``xlwt`` .. code-block:: python diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 3052ee3001681..aefb5f0d3d2df 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -472,7 +472,7 @@ at the new values. .. _scipy: https://scipy.org/ .. _documentation: https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation -.. _guide: https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html +.. _guide: https://docs.scipy.org/doc/scipy/tutorial/interpolate.html .. _missing_data.interp_limits: diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index c7f5d3ddf66d3..ce805f98ca528 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -249,7 +249,7 @@ displayed when calling :meth:`~pandas.DataFrame.info`. ``display.max_info_rows``: :meth:`~pandas.DataFrame.info` will usually show null-counts for each column. For a large :class:`DataFrame`, this can be quite slow. ``max_info_rows`` and ``max_info_cols`` limit this null check to the specified rows and columns respectively. The :meth:`~pandas.DataFrame.info` -keyword argument ``null_counts=True`` will override this. +keyword argument ``show_counts=True`` will override this. .. ipython:: python diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index d350351075cb6..b4ae1d27df2b5 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -267,14 +267,16 @@ i.e., from the end of the string to the beginning of the string: s3 s3.str.replace("^.a|dog", "XX-XX ", case=False, regex=True) -.. warning:: - Some caution must be taken when dealing with regular expressions! The current behavior - is to treat single character patterns as literal strings, even when ``regex`` is set - to ``True``. This behavior is deprecated and will be removed in a future version so - that the ``regex`` keyword is always respected. +.. versionchanged:: 2.0 + +Single character pattern with ``regex=True`` will also be treated as regular expressions: + +.. ipython:: python -.. versionchanged:: 1.2.0 + s4 = pd.Series(["a.b", ".", "b", np.nan, ""], dtype="string") + s4 + s4.str.replace(".", "a", regex=True) If you want literal replacement of a string (equivalent to :meth:`str.replace`), you can set the optional ``regex`` parameter to ``False``, rather than escaping each diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 180de1df53f9e..318ca045847f4 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -236,9 +236,7 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob Frequency conversion -------------------- -Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, -or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. -Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. +Timedelta Series and ``TimedeltaIndex``, and ``Timedelta`` can be converted to other frequencies by astyping to a specific timedelta dtype. .. ipython:: python @@ -250,14 +248,17 @@ Note that division by the NumPy scalar is true division, while astyping is equiv td[3] = np.nan td - # to days - td / np.timedelta64(1, "D") - td.astype("timedelta64[D]") - # to seconds - td / np.timedelta64(1, "s") td.astype("timedelta64[s]") +For timedelta64 resolutions other than the supported "s", "ms", "us", "ns", +an alternative is to divide by another timedelta object. Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. + +.. ipython:: python + + # to days + td / np.timedelta64(1, "D") + # to months (these are constant months) td / np.timedelta64(1, "M") diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst index 0fba784e36661..33f83c272b23d 100644 --- a/doc/source/whatsnew/v0.11.0.rst +++ b/doc/source/whatsnew/v0.11.0.rst @@ -368,7 +368,7 @@ Enhancements - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (:issue:`3070`) .. ipython:: python - :okwarning: + :okexcept: idx = pd.date_range("2001-10-1", periods=5, freq='M') ts = pd.Series(np.random.rand(len(idx)), index=idx) diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 8265ad58f7ea3..df9f0a953ffab 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -532,6 +532,7 @@ Enhancements is frequency conversion. See :ref:`the docs` for the docs. .. ipython:: python + :okexcept: import datetime td = pd.Series(pd.date_range('20130101', periods=4)) - pd.Series( @@ -733,7 +734,7 @@ Enhancements .. _scipy: http://www.scipy.org .. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation -.. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html +.. _guide: https://docs.scipy.org/doc/scipy/tutorial/interpolate.html - ``to_csv`` now takes a ``date_format`` keyword argument that specifies how output datetime objects should be formatted. Datetimes encountered in the diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index 04506f1655c7d..f52253687ecfd 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -70,7 +70,6 @@ For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python - :okwarning: df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) @@ -79,7 +78,7 @@ For full docs, see the :ref:`categorical introduction ` and the df["grade"] # Rename the categories - df["grade"].cat.categories = ["very good", "good", "very bad"] + df["grade"] = df["grade"].cat.rename_categories(["very good", "good", "very bad"]) # Reorder the categories and simultaneously add the missing categories df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 0c992cf3cc462..feeb7b5ee30ce 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -271,12 +271,12 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification such as :func:`to_datetime`. .. ipython:: python - :okwarning: df = pd.read_csv(StringIO(data), dtype="category") df.dtypes df["col3"] - df["col3"].cat.categories = pd.to_numeric(df["col3"].cat.categories) + new_categories = pd.to_numeric(df["col3"].cat.categories) + df["col3"] = df["col3"].cat.rename_categories(new_categories) df["col3"] .. _whatsnew_0190.enhancements.union_categoricals: diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index faf4b1ac44d5b..b41a469fe0c1f 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -104,10 +104,13 @@ aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`) 'D': pd.date_range('20130101', periods=3)}) df.dtypes -.. ipython:: python - :okwarning: +.. code-block:: python - df.agg(['min', 'sum']) + In [10]: df.agg(['min', 'sum']) + Out[10]: + A B C D + min 1 1.0 bar 2013-01-01 + sum 6 6.0 foobarbaz NaT .. _whatsnew_0200.enhancements.dataio_dtype: diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f5175283cce4e..06356c8b02e84 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1357,6 +1357,7 @@ the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`). *New behavior*: .. ipython:: python + :okexcept: :okwarning: ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour()) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index aaf00804262bb..e65be3bcecd76 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -13,7 +13,9 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) +- Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance + from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`) - .. --------------------------------------------------------------------------- @@ -21,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in the Copy-on-Write implementation losing track of views in certain chained indexing cases (:issue:`48996`) - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index a3b6d1dc90fee..c76555f9ef417 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -28,10 +28,24 @@ Available optional dependencies (listed in order of appearance at `install guide ``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql, sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`). -.. _whatsnew_200.enhancements.enhancement2: +.. _whatsnew_200.enhancements.io_readers_nullable_pyarrow: -enhancement2 -^^^^^^^^^^^^ +Configuration option, ``io.nullable_backend``, to return pyarrow-backed dtypes from IO functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet` and :func:`read_csv` (with ``engine="pyarrow"``) +to return pyarrow-backed dtypes when set to ``"pyarrow"`` (:issue:`48957`). + +.. ipython:: python + + import io + data = io.StringIO("""a,b,c,d,e,f,g,h,i + 1,2.5,True,a,,,,, + 3,4.5,False,b,6,7.5,True,a, + """) + with pd.option_context("io.nullable_backend", "pyarrow"): + df = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow") + df .. _whatsnew_200.enhancements.other: @@ -42,13 +56,13 @@ Other enhancements - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) - Added new argument ``use_nullable_dtypes`` to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`) -- Added new global configuration, ``io.nullable_backend`` to allow ``use_nullable_dtypes=True`` to return pyarrow-backed dtypes when set to ``"pyarrow"`` in :func:`read_parquet` (:issue:`48957`) - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) - Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`) - :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) +- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: @@ -99,6 +113,91 @@ notable_bug_fix2 Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_200.api_breaking.astype_to_unsupported_datetimelike: + +Disallow astype conversion to non-supported datetime64/timedelta64 dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In previous versions, converting a :class:`Series` or :class:`DataFrame` +from ``datetime64[ns]`` to a different ``datetime64[X]`` dtype would return +with ``datetime64[ns]`` dtype instead of the requested dtype. In pandas 2.0, +support is added for "datetime64[s]", "datetime64[ms]", and "datetime64[us]" dtypes, +so converting to those dtypes gives exactly the requested dtype: + +*Previous behavior*: + +.. ipython:: python + + idx = pd.date_range("2016-01-01", periods=3) + ser = pd.Series(idx) + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: ser.astype("datetime64[s]") + Out[4]: + 0 2016-01-01 + 1 2016-01-02 + 2 2016-01-03 + dtype: datetime64[ns] + +With the new behavior, we get exactly the requested dtype: + +*New behavior*: + +.. ipython:: python + + ser.astype("datetime64[s]") + +For non-supported resolutions e.g. "datetime64[D]", we raise instead of silently +ignoring the requested dtype: + +*New behavior*: + +.. ipython:: python + :okexcept: + + ser.astype("datetime64[D]") + +For conversion from ``timedelta64[ns]`` dtypes, the old behavior converted +to a floating point format. + +*Previous behavior*: + +.. ipython:: python + + idx = pd.timedelta_range("1 Day", periods=3) + ser = pd.Series(idx) + +*Previous behavior*: + +.. code-block:: ipython + + In [7]: ser.astype("timedelta64[s]") + Out[7]: + 0 86400.0 + 1 172800.0 + 2 259200.0 + dtype: float64 + + In [8]: ser.astype("timedelta64[D]") + Out[8]: + 0 1.0 + 1 2.0 + 2 3.0 + dtype: float64 + +The new behavior, as for datetime64, either gives exactly the requested dtype or raises: + +*New behavior*: + +.. ipython:: python + :okexcept: + + ser.astype("timedelta64[s]") + ser.astype("timedelta64[D]") + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies @@ -123,6 +222,10 @@ Optional libraries below the lowest tested version may still work, but are not c +=================+=================+=========+ | pyarrow | 6.0.0 | X | +-----------------+-----------------+---------+ +| matplotlib | 3.6.1 | X | ++-----------------+-----------------+---------+ +| fastparquet | 0.6.3 | X | ++-----------------+-----------------+---------+ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. @@ -130,6 +233,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ +- The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`) - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) @@ -141,6 +245,10 @@ Other API changes - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) - :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`) +- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`) +- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`) +- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) +- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) - .. --------------------------------------------------------------------------- @@ -156,6 +264,10 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed deprecated :attr:`Timestamp.freq`, :attr:`Timestamp.freqstr` and argument ``freq`` from the :class:`Timestamp` constructor and :meth:`Timestamp.fromordinal` (:issue:`14146`) +- Removed deprecated :class:`CategoricalBlock`, :meth:`Block.is_categorical`, require datetime64 and timedelta64 values to be wrapped in :class:`DatetimeArray` or :class:`TimedeltaArray` before passing to :meth:`Block.make_block_same_class`, require ``DatetimeTZBlock.values`` to have the correct ndim when passing to the :class:`BlockManager` constructor, and removed the "fastpath" keyword from the :class:`SingleBlockManager` constructor (:issue:`40226`, :issue:`40571`) +- Removed deprecated module ``pandas.core.index`` (:issue:`30193`) +- Removed deprecated alias ``pandas.core.tools.datetimes.to_time``, import the function directly from ``pandas.core.tools.times`` instead (:issue:`34145`) - Removed deprecated :meth:`Categorical.to_dense`, use ``np.asarray(cat)`` instead (:issue:`32639`) - Removed deprecated :meth:`Categorical.take_nd` (:issue:`27745`) - Removed deprecated :meth:`Categorical.mode`, use ``Series(cat).mode()`` instead (:issue:`45033`) @@ -165,16 +277,32 @@ Removal of prior version deprecations/changes - Removed deprecated :meth:`Index.is_mixed`, check ``index.inferred_type`` directly instead (:issue:`32922`) - Removed deprecated :func:`pandas.api.types.is_categorical`; use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) - Removed deprecated :meth:`Index.asi8` (:issue:`37877`) +- Enforced deprecation changing behavior when passing ``datetime64[ns]`` dtype data and timezone-aware dtype to :class:`Series`, interpreting the values as wall-times instead of UTC times, matching :class:`DatetimeIndex` behavior (:issue:`41662`) - Removed deprecated :meth:`DataFrame._AXIS_NUMBERS`, :meth:`DataFrame._AXIS_NAMES`, :meth:`Series._AXIS_NUMBERS`, :meth:`Series._AXIS_NAMES` (:issue:`33637`) - Removed deprecated :meth:`Index.to_native_types`, use ``obj.astype(str)`` instead (:issue:`36418`) - Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems`, use ``obj.items`` instead (:issue:`45321`) +- Removed deprecated :meth:`DataFrame.lookup` (:issue:`35224`) +- Removed deprecated :meth:`Series.append`, :meth:`DataFrame.append`, use :func:`concat` instead (:issue:`35407`) +- Removed deprecated :meth:`Series.iteritems`, :meth:`DataFrame.iteritems` and :meth:`HDFStore.iteritems` use ``obj.items`` instead (:issue:`45321`) - Removed deprecated :meth:`DatetimeIndex.union_many` (:issue:`45018`) +- Removed deprecated ``weekofyear`` and ``week`` attributes of :class:`DatetimeArray`, :class:`DatetimeIndex` and ``dt`` accessor in favor of ``isocalendar().week`` (:issue:`33595`) - Removed deprecated :meth:`RangeIndex._start`, :meth:`RangeIndex._stop`, :meth:`RangeIndex._step`, use ``start``, ``stop``, ``step`` instead (:issue:`30482`) - Removed deprecated :meth:`DatetimeIndex.to_perioddelta`, Use ``dtindex - dtindex.to_period(freq).to_timestamp()`` instead (:issue:`34853`) +- Removed deprecated :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` (:issue:`49397`) +- Removed deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` (:issue:`49397`) +- Removed deprecated :meth:`.Styler.where` (:issue:`49397`) +- Removed deprecated :meth:`.Styler.render` (:issue:`49397`) +- Removed deprecated argument ``null_color`` in :meth:`.Styler.highlight_null` (:issue:`49397`) +- Removed deprecated argument ``check_less_precise`` in :meth:`.testing.assert_frame_equal`, :meth:`.testing.assert_extension_array_equal`, :meth:`.testing.assert_series_equal`, :meth:`.testing.assert_index_equal` (:issue:`30562`) +- Removed deprecated ``null_counts`` argument in :meth:`DataFrame.info`. Use ``show_counts`` instead (:issue:`37999`) +- Removed deprecated :meth:`Index.is_monotonic`, and :meth:`Series.is_monotonic`; use ``obj.is_monotonic_increasing`` instead (:issue:`45422`) +- Removed deprecated :meth:`Index.is_all_dates` (:issue:`36697`) - Enforced deprecation disallowing passing a timezone-aware :class:`Timestamp` and ``dtype="datetime64[ns]"`` to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) - Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) +- Enforced deprecation disallowing unit-less "datetime64" dtype in :meth:`Series.astype` and :meth:`DataFrame.astype` (:issue:`47844`) - Enforced deprecation disallowing using ``.astype`` to convert a ``datetime64[ns]`` :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-aware dtype, use ``obj.tz_localize`` or ``ser.dt.tz_localize`` instead (:issue:`39258`) - Enforced deprecation disallowing using ``.astype`` to convert a timezone-aware :class:`Series`, :class:`DataFrame`, or :class:`DatetimeIndex` to timezone-naive ``datetime64[ns]`` dtype, use ``obj.tz_localize(None)`` or ``obj.tz_convert("UTC").tz_localize(None)`` instead (:issue:`39258`) +- Enforced deprecation disallowing passing non boolean argument to sort in :func:`concat` (:issue:`44629`) - Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`) @@ -183,37 +311,82 @@ Removal of prior version deprecations/changes - Removed ``keep_tz`` argument in :meth:`DatetimeIndex.to_series` (:issue:`29731`) - Remove arguments ``names`` and ``dtype`` from :meth:`Index.copy` and ``levels`` and ``codes`` from :meth:`MultiIndex.copy` (:issue:`35853`, :issue:`36685`) - Remove argument ``inplace`` from :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` (:issue:`35626`) +- Removed arguments ``verbose`` and ``encoding`` from :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` (:issue:`47912`) +- Removed argument ``line_terminator`` from :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead (:issue:`45302`) +- Removed argument ``inplace`` from :meth:`DataFrame.set_axis` and :meth:`Series.set_axis`, use ``obj = obj.set_axis(..., copy=False)`` instead (:issue:`48130`) - Disallow passing positional arguments to :meth:`MultiIndex.set_levels` and :meth:`MultiIndex.set_codes` (:issue:`41485`) - Removed :meth:`MultiIndex.is_lexsorted` and :meth:`MultiIndex.lexsort_depth` (:issue:`38701`) - Removed argument ``how`` from :meth:`PeriodIndex.astype`, use :meth:`PeriodIndex.to_timestamp` instead (:issue:`37982`) - Removed argument ``try_cast`` from :meth:`DataFrame.mask`, :meth:`DataFrame.where`, :meth:`Series.mask` and :meth:`Series.where` (:issue:`38836`) - Removed argument ``tz`` from :meth:`Period.to_timestamp`, use ``obj.to_timestamp(...).tz_localize(tz)`` instead (:issue:`34522`) +- Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`) - Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`) - Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`) +- Removed arguments ``prefix``, ``squeeze``, ``error_bad_lines`` and ``warn_bad_lines`` from :func:`read_csv` (:issue:`40413`, :issue:`43427`) +- Removed argument ``datetime_is_numeric`` from :meth:`DataFrame.describe` and :meth:`Series.describe` as datetime data will always be summarized as numeric data (:issue:`34798`) +- Disallow passing list ``key`` to :meth:`Series.xs` and :meth:`DataFrame.xs`, pass a tuple instead (:issue:`41789`) +- Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`) +- Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`) - Disallow passing non-round floats to :class:`Timestamp` with ``unit="M"`` or ``unit="Y"`` (:issue:`47266`) - Remove keywords ``convert_float`` and ``mangle_dupe_cols`` from :func:`read_excel` (:issue:`41176`) +- Removed ``errors`` keyword from :meth:`DataFrame.where`, :meth:`Series.where`, :meth:`DataFrame.mask` and :meth:`Series.mask` (:issue:`47728`) - Disallow passing non-keyword arguments to :func:`read_excel` except ``io`` and ``sheet_name`` (:issue:`34418`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.drop` and :meth:`Series.drop` except ``labels`` (:issue:`41486`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.fillna` and :meth:`Series.fillna` except ``value`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :meth:`StringMethods.split` and :meth:`StringMethods.rsplit` except for ``pat`` (:issue:`47448`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.set_index` except ``keys`` (:issue:`41495`) +- Disallow passing non-keyword arguments to :meth:`Resampler.interpolate` except ``method`` (:issue:`41699`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` except ``level`` (:issue:`41496`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.dropna` and :meth:`Series.dropna` (:issue:`41504`) +- Disallow passing non-keyword arguments to :meth:`ExtensionArray.argsort` (:issue:`46134`) +- Disallow passing non-keyword arguments to :meth:`Categorical.sort_values` (:issue:`47618`) +- Disallow passing non-keyword arguments to :meth:`Index.drop_duplicates` and :meth:`Series.drop_duplicates` (:issue:`41485`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.drop_duplicates` except for ``subset`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` (:issue:`41506`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` except for ``method`` (:issue:`41510`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44896`) +- Disallow passing non-keyword arguments to :meth:`Index.set_names` except for ``names`` (:issue:`41551`) +- Disallow passing non-keyword arguments to :meth:`Index.join` except for ``other`` (:issue:`46518`) +- Disallow passing non-keyword arguments to :func:`concat` except for ``objs`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :func:`pivot` except for ``data`` (:issue:`48301`) +- Disallow passing non-keyword arguments to :meth:`DataFrame.pivot` (:issue:`48301`) +- Disallow passing non-keyword arguments to :func:`read_html` except for ``io`` (:issue:`27573`) +- Disallow passing non-keyword arguments to :func:`read_json` except for ``path_or_buf`` (:issue:`27573`) +- Disallow passing non-keyword arguments to :func:`read_sas` except for ``filepath_or_buffer`` (:issue:`47154`) +- Disallow passing non-keyword arguments to :func:`read_stata` except for ``filepath_or_buffer`` (:issue:`48128`) +- Disallow passing non-keyword arguments to :func:`read_csv` except ``filepath_or_buffer`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :func:`read_table` except ``filepath_or_buffer`` (:issue:`41485`) +- Disallow passing non-keyword arguments to :func:`read_fwf` except ``filepath_or_buffer`` (:issue:`44710`) +- Disallow passing non-keyword arguments to :func:`read_xml` except for ``path_or_buffer`` (:issue:`45133`) - Disallow passing non-keyword arguments to :meth:`Series.mask` and :meth:`DataFrame.mask` except ``cond`` and ``other`` (:issue:`41580`) - Disallow passing non-keyword arguments to :meth:`DataFrame.to_stata` except for ``path`` (:issue:`48128`) - Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`) - Disallow passing non-keyword arguments to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` except for ``labels`` (:issue:`41491`) - Disallow passing non-keyword arguments to :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` except for ``mapper`` (:issue:`47587`) +- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`) - Disallow passing non-keyword arguments to :meth:`Series.clip` and :meth:`DataFrame.clip` (:issue:`41511`) - Disallow passing non-keyword arguments to :meth:`Series.bfill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill` and :meth:`DataFrame.ffill` (:issue:`41508`) - Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`) - Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`) - Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`) +- Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`) +- Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`) - Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) - Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`) - Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`) - Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`) - Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Removed the ``numeric_only`` keyword from :meth:`Categorical.min` and :meth:`Categorical.max` in favor of ``skipna`` (:issue:`48821`) +- Changed behavior of :meth:`DataFrame.median` and :meth:`DataFrame.mean` with ``numeric_only=None`` to not exclude datetime-like columns THIS NOTE WILL BE IRRELEVANT ONCE ``numeric_only=None`` DEPRECATION IS ENFORCED (:issue:`29941`) - Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`) +- Removed ``.ExponentialMovingWindow.vol`` (:issue:`39220`) - Removed :meth:`Index.get_value` and :meth:`Index.set_value` (:issue:`33907`, :issue:`28621`) - Removed :meth:`Series.slice_shift` and :meth:`DataFrame.slice_shift` (:issue:`37601`) - Remove :meth:`DataFrameGroupBy.pad` and :meth:`DataFrameGroupBy.backfill` (:issue:`45076`) - Remove ``numpy`` argument from :func:`read_json` (:issue:`30636`) +- Disallow passing abbreviations for ``orient`` in :meth:`DataFrame.to_dict` (:issue:`32516`) +- Removed ``get_offset`` in favor of :func:`to_offset` (:issue:`30340`) +- Removed the ``warn`` keyword in :func:`infer_freq` (:issue:`45947`) - Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`) - Removed the ``truediv`` keyword from :func:`eval` (:issue:`29812`) - Removed the ``pandas.datetime`` submodule (:issue:`30489`) @@ -221,14 +394,54 @@ Removal of prior version deprecations/changes - Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`) - Removed :meth:`Series.str.__iter__` (:issue:`28277`) - Removed ``pandas.SparseArray`` in favor of :class:`arrays.SparseArray` (:issue:`30642`) -- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame`` (:issue:`30642`) +- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`) +- Enforced disallowing passing an integer ``fill_value`` to :meth:`DataFrame.shift` and :meth:`Series.shift`` with datetime64, timedelta64, or period dtypes (:issue:`32591`) - Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) +- Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) +- Enforced disallowing the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`) +- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`) +- Enforced disallowing setting values with ``.loc`` using a positional slice. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) +- Enforced disallowing positional indexing with a ``float`` key even if that key is a round number, manually cast to integer instead (:issue:`34193`) +- Enforced disallowing using a :class:`DataFrame` indexer with ``.iloc``, use ``.loc`` instead for automatic alignment (:issue:`39022`) +- Enforced disallowing ``set`` or ``dict`` indexers in ``__getitem__`` and ``__setitem__`` methods (:issue:`42825`) +- Enforced disallowing indexing on a :class:`Index` or positional indexing on a :class:`Series` producing multi-dimensional objects e.g. ``obj[:, None]``, convert to numpy before indexing instead (:issue:`35141`) +- Enforced disallowing ``dict`` or ``set`` objects in ``suffixes`` in :func:`merge` (:issue:`34810`) +- Enforced disallowing :func:`merge` to produce duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) +- Enforced disallowing using :func:`merge` or :func:`join` on a different number of levels (:issue:`34862`) +- Enforced disallowing ``value_name`` argument in :func:`DataFrame.melt` to match an element in the :class:`DataFrame` columns (:issue:`35003`) - Removed setting Categorical._codes directly (:issue:`41429`) +- Removed setting Categorical.categories directly (:issue:`47834`) +- Removed argument ``inplace`` from :meth:`Categorical.add_categories`, :meth:`Categorical.remove_categories`, :meth:`Categorical.set_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.set_ordered`, :meth:`Categorical.as_ordered`, :meth:`Categorical.as_unordered` (:issue:`37981`, :issue:`41118`, :issue:`41133`, :issue:`47834`) - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) +- Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`) +- Enforced disallowing indexing a :class:`Series` with a single item list with a slice (e.g. ``ser[[slice(0, 2)]]``). Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) +- Changed behavior indexing on a :class:`DataFrame` with a :class:`DatetimeIndex` index using a string indexer, previously this operated as a slice on rows, now it operates like any other column key; use ``frame.loc[key]`` for the old behavior (:issue:`36179`) - Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`) - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`) +- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`) +- Changed the behavior of :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` with mismatched timezones to convert to UTC instead of casting to object dtype (:issue:`39328`) +- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`) +- Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`) +- Changed behavior of :meth:`Index.ravel` to return a view on the original :class:`Index` instead of a ``np.ndarray`` (:issue:`36900`) +- Changed behavior of :meth:`Index.to_frame` with explicit ``name=None`` to use ``None`` for the column name instead of the index's name or default ``0`` (:issue:`45523`) +- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) +- Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) +- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) +- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) +- Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`) +- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) +- Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`) +- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`) +- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`) +- Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`) +- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`) +- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) +- Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`) +- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) +- Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: @@ -260,6 +473,7 @@ Performance improvements - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`) - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`) - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`) +- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`) .. --------------------------------------------------------------------------- .. _whatsnew_200.bug_fixes: @@ -278,7 +492,7 @@ Datetimelike - Bug in :func:`to_datetime` was raising on invalid offsets with ``errors='coerce'`` and ``infer_datetime_format=True`` (:issue:`48633`) - Bug in :class:`DatetimeIndex` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``dtype`` or data (:issue:`48659`) - Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`) -- +- Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) Timedelta ^^^^^^^^^ @@ -289,7 +503,7 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` with object-dtype containing multiple timezone-aware ``datetime`` objects with heterogeneous timezones to a :class:`DatetimeTZDtype` incorrectly raising (:issue:`32581`) - Numeric @@ -307,7 +521,7 @@ Conversion Strings ^^^^^^^ -- +- Bug in :func:`pandas.api.dtypes.is_string_dtype` that would not return ``True`` for :class:`StringDtype` (:issue:`15585`) - Interval @@ -376,7 +590,8 @@ Reshaping - Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`) - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) -- +- Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`) +- Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`) Sparse ^^^^^^ diff --git a/environment.yml b/environment.yml index 391d7bc779af9..d002cb16d59f5 100644 --- a/environment.yml +++ b/environment.yml @@ -53,7 +53,6 @@ dependencies: - xarray - xlrd - xlsxwriter - - xlwt - zstandard # downstream packages @@ -131,3 +130,5 @@ dependencies: - pip: - jupyterlite==0.1.0b12 - sphinx-toggleprompt + - "git+https://github.com/mesonbuild/meson.git@master" + - "git+https://github.com/mesonbuild/meson-python.git@main" diff --git a/generate_pxi.py b/generate_pxi.py new file mode 100644 index 0000000000000..3462b97aefcbf --- /dev/null +++ b/generate_pxi.py @@ -0,0 +1,33 @@ +import argparse +import os + +from Cython import Tempita + + +def process_tempita(pxifile, outfile): + with open(pxifile) as f: + tmpl = f.read() + pyxcontent = Tempita.sub(tmpl) + + with open(outfile, "w") as f: + f.write(pyxcontent) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("infile", type=str, help="Path to the input file") + parser.add_argument("-o", "--outdir", type=str, help="Path to the output directory") + args = parser.parse_args() + + if not args.infile.endswith(".in"): + raise ValueError(f"Unexpected extension: {args.infile}") + + outdir_abs = os.path.join(os.getcwd(), args.outdir) + outfile = os.path.join( + outdir_abs, os.path.splitext(os.path.split(args.infile)[1])[0] + ) + + process_tempita(args.infile, outfile) + + +main() diff --git a/meson.build b/meson.build index bc5bcf5b845f6..43b6621410c7e 100644 --- a/meson.build +++ b/meson.build @@ -2,17 +2,27 @@ project( 'pandas', 'c', 'cpp', 'cython', - version: '1.6.0.dev0', + version: '2.0.0.dev0', license: 'BSD-3', # TODO: bump when meson 0.64.0 comes out, # we are relying on 0.64.0 features meson_version: '>=0.63', default_options: [ + # TODO: investigate, does meson try to compile against debug Python + # when buildtype = debug, this seems to be causing problems on CI + # where provided Python is not compiled in debug mode 'buildtype=release', - 'c_std=c99' + # TODO: turn on werror when ready + #'werror=true', + # TODO: We are using POSIX functions(strdup in ujson), so we can't compile + # with strict C99 :( + #'c_std=c99' ] ) +add_global_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'c') +add_global_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'cpp') + py_mod = import('python') fs = import('fs') py = py_mod.find_installation('python') diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 96c47471aaf90..7b9fe6422544c 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -81,26 +81,48 @@ class Infinity: """ Provide a positive Infinity comparison method for ranking. """ - __lt__ = lambda self, other: False - __le__ = lambda self, other: isinstance(other, Infinity) - __eq__ = lambda self, other: isinstance(other, Infinity) - __ne__ = lambda self, other: not isinstance(other, Infinity) - __gt__ = lambda self, other: (not isinstance(other, Infinity) and - not missing.checknull(other)) - __ge__ = lambda self, other: not missing.checknull(other) + def __lt__(self, other): + return False + + def __le__(self, other): + return isinstance(other, Infinity) + + def __eq__(self, other): + return isinstance(other, Infinity) + + def __ne__(self, other): + return not isinstance(other, Infinity) + + def __gt__(self, other): + return (not isinstance(other, Infinity) and + not missing.checknull(other)) + + def __ge__(self, other): + return not missing.checknull(other) class NegInfinity: """ Provide a negative Infinity comparison method for ranking. """ - __lt__ = lambda self, other: (not isinstance(other, NegInfinity) and - not missing.checknull(other)) - __le__ = lambda self, other: not missing.checknull(other) - __eq__ = lambda self, other: isinstance(other, NegInfinity) - __ne__ = lambda self, other: not isinstance(other, NegInfinity) - __gt__ = lambda self, other: False - __ge__ = lambda self, other: isinstance(other, NegInfinity) + def __lt__(self, other): + return (not isinstance(other, NegInfinity) and + not missing.checknull(other)) + + def __le__(self, other): + return not missing.checknull(other) + + def __eq__(self, other): + return isinstance(other, NegInfinity) + + def __ne__(self, other): + return not isinstance(other, NegInfinity) + + def __gt__(self, other): + return False + + def __ge__(self, other): + return isinstance(other, NegInfinity) @cython.wraparound(False) @@ -321,7 +343,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t: @cython.cdivision(True) def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): cdef: - Py_ssize_t i, j, xi, yi, N, K + Py_ssize_t i, xi, yi, N, K bint minpv float64_t[:, ::1] result ndarray[uint8_t, ndim=2] mask @@ -377,7 +399,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): @cython.wraparound(False) def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray: cdef: - Py_ssize_t i, j, xi, yi, N, K + Py_ssize_t i, xi, yi, N, K ndarray[float64_t, ndim=2] result ndarray[float64_t, ndim=2] ranked_mat ndarray[float64_t, ndim=1] rankedx, rankedy @@ -746,7 +768,8 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and arr[0] == NPY_NAT): + if arr[0] != arr[0] or (numeric_object_t is int64_t and timelike and + arr[0] == NPY_NAT): # single value is NaN return False, False, True else: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index f798655e9d922..a351ad6e461f3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -265,7 +265,7 @@ def group_cumprod( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, size + Py_ssize_t i, j, N, K int64float_t val, na_val int64float_t[:, ::1] accum intp_t lab @@ -356,7 +356,7 @@ def group_cumsum( This method modifies the `out` parameter, rather than returning an object. """ cdef: - Py_ssize_t i, j, N, K, size + Py_ssize_t i, j, N, K int64float_t val, y, t, na_val int64float_t[:, ::1] accum, compensation uint8_t[:, ::1] accum_mask @@ -441,7 +441,7 @@ def group_shift_indexer( int periods, ) -> None: cdef: - Py_ssize_t N, i, j, ii, lab + Py_ssize_t N, i, ii, lab int offset = 0, sign int64_t idxer, idxer_slot int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64) @@ -743,8 +743,11 @@ def group_sum( # is otherwise the same as in _treat_as_na if uses_mask: isna_entry = mask[i, j] - elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + elif ( + sum_t is float32_t + or sum_t is float64_t + or sum_t is complex64_t + ): # avoid warnings because of equality comparison isna_entry = not val == val elif sum_t is int64_t and is_datetimelike and val == NPY_NAT: @@ -770,8 +773,11 @@ def group_sum( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif (sum_t is float32_t or sum_t is float64_t - or sum_t is complex64_t or sum_t is complex64_t): + elif ( + sum_t is float32_t + or sum_t is float64_t + or sum_t is complex64_t + ): out[i, j] = NAN elif sum_t is int64_t: out[i, j] = NPY_NAT @@ -799,7 +805,7 @@ def group_prod( """ cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) - int64float_t val, count + int64float_t val int64float_t[:, ::1] prodx int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) @@ -872,7 +878,7 @@ def group_var( floating[:, ::1] mean int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -969,7 +975,7 @@ def group_mean( mean_t[:, ::1] sumx, compensation int64_t[:, ::1] nobs Py_ssize_t len_values = len(values), len_labels = len(labels) - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1042,10 +1048,10 @@ def group_ohlc( Only aggregates on axis=0 """ cdef: - Py_ssize_t i, j, N, K, lab + Py_ssize_t i, N, K, lab int64float_t val uint8_t[::1] first_element_set - bint isna_entry, uses_mask = not mask is None + bint isna_entry, uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" @@ -1240,7 +1246,11 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike): +cdef numeric_object_t _get_min_or_max( + numeric_object_t val, + bint compute_max, + bint is_datetimelike, +): """ Find either the min or the max supported by numeric_object_t; 'val' is a placeholder to effectively make numeric_object_t an argument. @@ -1366,7 +1376,10 @@ def group_last( # set a placeholder value in out[i, j]. if uses_mask: result_mask[i, j] = True - elif numeric_object_t is float32_t or numeric_object_t is float64_t: + elif ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + ): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in @@ -1486,7 +1499,10 @@ def group_nth( # it was initialized with np.empty. Also ensures # we can downcast out if appropriate. out[i, j] = 0 - elif numeric_object_t is float32_t or numeric_object_t is float64_t: + elif ( + numeric_object_t is float32_t + or numeric_object_t is float64_t + ): out[i, j] = NAN elif numeric_object_t is int64_t: # Per above, this is a placeholder in diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 7aaeee043c72b..4f86f63718f2a 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -90,7 +90,7 @@ cdef class ObjectFactorizer(Factorizer): self.uniques = ObjectVector() def factorize( - self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None + self, ndarray[object] values, na_sentinel=-1, na_value=None ) -> np.ndarray: """ @@ -115,14 +115,6 @@ cdef class ObjectFactorizer(Factorizer): self.uniques = uniques labels = self.table.get_labels(values, self.uniques, self.count, na_sentinel, na_value) - mask = (labels == na_sentinel) - # sort on - if sort: - sorter = self.uniques.to_array().argsort() - reverse_indexer = np.empty(len(sorter), dtype=np.intp) - reverse_indexer.put(sorter, np.arange(len(sorter))) - labels = reverse_indexer.take(labels, mode='clip') - labels[mask] = na_sentinel self.count = len(self.uniques) return labels @@ -136,7 +128,7 @@ cdef class Int64Factorizer(Factorizer): self.table = Int64HashTable(size_hint) self.uniques = Int64Vector() - def factorize(self, const int64_t[:] values, sort=False, + def factorize(self, const int64_t[:] values, na_sentinel=-1, na_value=None) -> np.ndarray: """ Returns @@ -161,14 +153,5 @@ cdef class Int64Factorizer(Factorizer): labels = self.table.get_labels(values, self.uniques, self.count, na_sentinel, na_value=na_value) - - # sort on - if sort: - sorter = self.uniques.to_array().argsort() - reverse_indexer = np.empty(len(sorter), dtype=np.intp) - reverse_indexer.put(sorter, np.arange(len(sorter))) - - labels = reverse_indexer.take(labels) - self.count = len(self.uniques) return labels diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 1a98633908a49..43e33ef3e7d7e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -133,7 +133,7 @@ cdef class BlockPlacement: @property def as_array(self) -> np.ndarray: cdef: - Py_ssize_t start, stop, end, _ + Py_ssize_t start, stop, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) @@ -259,7 +259,6 @@ cdef class BlockPlacement: """ cdef: slice slc = self._ensure_has_slice() - slice new_slice ndarray[intp_t, ndim=1] new_placement if slc is not None and slc.step == 1: @@ -676,8 +675,16 @@ cdef class BlockManager: public bint _known_consolidated, _is_consolidated public ndarray _blknos, _blklocs public list refs - - def __cinit__(self, blocks=None, axes=None, refs=None, verify_integrity=True): + public object parent + + def __cinit__( + self, + blocks=None, + axes=None, + refs=None, + parent=None, + verify_integrity=True, + ): # None as defaults for unpickling GH#42345 if blocks is None: # This adds 1-2 microseconds to DataFrame(np.array([])) @@ -690,6 +697,7 @@ cdef class BlockManager: self.blocks = blocks self.axes = axes.copy() # copy to make sure we are not remotely-mutable self.refs = refs + self.parent = parent # Populate known_consolidate, blknos, and blklocs lazily self._known_consolidated = False @@ -805,7 +813,9 @@ cdef class BlockManager: nrefs.append(weakref.ref(blk)) new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] - mgr = type(self)(tuple(nbs), new_axes, nrefs, verify_integrity=False) + mgr = type(self)( + tuple(nbs), new_axes, nrefs, parent=self, verify_integrity=False + ) # We can avoid having to rebuild blklocs/blknos blklocs = self._blklocs @@ -827,4 +837,6 @@ cdef class BlockManager: new_axes = list(self.axes) new_axes[axis] = new_axes[axis]._getitem_slice(slobj) - return type(self)(tuple(new_blocks), new_axes, new_refs, verify_integrity=False) + return type(self)( + tuple(new_blocks), new_axes, new_refs, parent=self, verify_integrity=False + ) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index e574aa10f6b57..667eda1b1f1da 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -275,7 +275,7 @@ def left_join_indexer_unique( cdef: Py_ssize_t i, j, nleft, nright ndarray[intp_t] indexer - numeric_object_t lval, rval + numeric_object_t rval i = 0 j = 0 @@ -324,7 +324,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] is non-unique (if both were unique we'd use left_join_indexer_unique). """ cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nright, nleft, count numeric_object_t lval, rval ndarray[intp_t] lindexer, rindexer ndarray[numeric_object_t] result @@ -434,7 +434,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] Both left and right are monotonic increasing but not necessarily unique. """ cdef: - Py_ssize_t i, j, k, nright, nleft, count + Py_ssize_t i, j, nright, nleft, count numeric_object_t lval, rval ndarray[intp_t] lindexer, rindexer ndarray[numeric_object_t] result diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 244d1dbe4730e..6d7f895f7f730 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -158,7 +158,7 @@ def ensure_string_array( ) -> npt.NDArray[np.object_]: ... def infer_datetimelike_array( arr: npt.NDArray[np.object_], -) -> tuple[str, bool]: ... +) -> str: ... def convert_nans_to_NA( arr: npt.NDArray[np.object_], ) -> npt.NDArray[np.object_]: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4151ba927adf0..1b871bf0b745f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -95,7 +95,6 @@ from pandas._libs.util cimport ( is_nan, ) -from pandas._libs.tslib import array_to_datetime from pandas._libs.tslibs import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, @@ -622,6 +621,8 @@ ctypedef fused ndarr_object: # TODO: get rid of this in StringArray and modify # and go through ensure_string_array instead + + @cython.wraparound(False) @cython.boundscheck(False) def convert_nans_to_NA(ndarr_object arr) -> ndarray: @@ -766,9 +767,9 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, Int64 (datetime64) version of generic python version in ``groupby.py``. """ cdef: - Py_ssize_t lenidx, lenbin, i, j, bc, vc + Py_ssize_t lenidx, lenbin, i, j, bc ndarray[int64_t, ndim=1] bins - int64_t l_bin, r_bin, nat_count + int64_t r_bin, nat_count bint right_closed = closed == 'right' nat_count = 0 @@ -1583,25 +1584,19 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: Returns ------- str: {datetime, timedelta, date, nat, mixed} - bool """ cdef: Py_ssize_t i, n = len(arr) bint seen_timedelta = False, seen_date = False, seen_datetime = False bint seen_tz_aware = False, seen_tz_naive = False - bint seen_nat = False, seen_str = False + bint seen_nat = False bint seen_period = False, seen_interval = False - list objs = [] object v for i in range(n): v = arr[i] if isinstance(v, str): - objs.append(v) - seen_str = True - - if len(objs) == 3: - break + return "mixed" elif v is None or util.is_nan(v): # nan or None @@ -1619,7 +1614,7 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: seen_tz_aware = True if seen_tz_naive and seen_tz_aware: - return "mixed", seen_str + return "mixed" elif util.is_datetime64_object(v): # np.datetime64 seen_datetime = True @@ -1635,43 +1630,33 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: seen_interval = True break else: - return "mixed", seen_str + return "mixed" if seen_period: if is_period_array(arr): - return "period", seen_str - return "mixed", seen_str + return "period" + return "mixed" if seen_interval: if is_interval_array(arr): - return "interval", seen_str - return "mixed", seen_str + return "interval" + return "mixed" + + if seen_date: + if not seen_datetime and not seen_timedelta: + return "date" + return "mixed" - if seen_date and not (seen_datetime or seen_timedelta): - return "date", seen_str elif seen_datetime and not seen_timedelta: - return "datetime", seen_str + return "datetime" elif seen_timedelta and not seen_datetime: - return "timedelta", seen_str + return "timedelta" + elif seen_datetime and seen_timedelta: + return "mixed" elif seen_nat: - return "nat", seen_str - - # short-circuit by trying to - # actually convert these strings - # this is for performance as we don't need to try - # convert *every* string array - if len(objs): - try: - # require_iso8601 as in maybe_infer_to_datetimelike - array_to_datetime(objs, errors="raise", require_iso8601=True) - return "datetime", seen_str - except (ValueError, TypeError): - pass + return "nat" - # we are *not* going to infer from strings - # for timedelta as too much ambiguity - - return "mixed", seen_str + return "mixed" cdef inline bint is_timedelta(object o): @@ -2232,14 +2217,24 @@ def maybe_convert_numeric( # Otherwise, iterate and do full inference. cdef: - int status, maybe_int + int maybe_int Py_ssize_t i, n = values.size Seen seen = Seen(coerce_numeric) - ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, 0) - ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_COMPLEX128, 0) - ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, 0) - ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, 0) - ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, 0) + ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_FLOAT64, 0 + ) + ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_COMPLEX128, 0 + ) + ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_INT64, 0 + ) + ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_UINT64, 0 + ) + ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY( + 1, values.shape, cnp.NPY_UINT8, 0 + ) ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1") float64_t fval bint allow_null_in_int = convert_to_masked_nullable @@ -2318,7 +2313,7 @@ def maybe_convert_numeric( seen.float_ = True else: try: - status = floatify(val, &fval, &maybe_int) + floatify(val, &fval, &maybe_int) if fval in na_values: seen.saw_null() @@ -2457,7 +2452,7 @@ def maybe_convert_objects(ndarray[object] objects, int64_t[::1] itimedeltas Seen seen = Seen() object val - float64_t fval, fnan = np.nan + float64_t fnan = np.nan n = len(objects) @@ -2590,10 +2585,15 @@ def maybe_convert_objects(ndarray[object] objects, if seen.datetimetz_: if is_datetime_with_singletz_array(objects): from pandas import DatetimeIndex - dti = DatetimeIndex(objects) - # unbox to DatetimeArray - return dti._data + try: + dti = DatetimeIndex(objects) + except OutOfBoundsDatetime: + # e.g. test_to_datetime_cache_coerce_50_lines_outofbounds + pass + else: + # unbox to DatetimeArray + return dti._data seen.object_ = True elif seen.datetime_: @@ -2937,7 +2937,7 @@ def to_object_array(rows: object, min_width: int = 0) -> ndarray: def tuples_to_object_array(ndarray[object] tuples): cdef: - Py_ssize_t i, j, n, k, tmp + Py_ssize_t i, j, n, k ndarray[object, ndim=2] result tuple tup @@ -3065,7 +3065,9 @@ cpdef ndarray eq_NA_compat(ndarray[object] arr, object key): key is assumed to have `not isna(key)` """ cdef: - ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_BOOL, 0) + ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY( + arr.ndim, arr.shape, cnp.NPY_BOOL, 0 + ) Py_ssize_t i object item diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index b0012fb76e57c..75d507a398b93 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -3,14 +3,7 @@ _algos_take_helper = custom_target('algos_take_helper_pxi', input: 'algos_take_helper.pxi.in', command: [ py, tempita, '@INPUT@', '-o', '@OUTDIR@' - ], - # TODO: remove these two below lines - # Weird bug in meson that only repros on my potato computer - # (possibly b/c of low number of threads)? - # The first custom_target is never built for some reason - # so algos.pyx will error out later in the build. - build_by_default: true, - build_always_stale: true + ] ) _algos_common_helper = custom_target('algos_common_helper_pxi', output: 'algos_common_helper.pxi', @@ -91,10 +84,12 @@ cython_sources_list = [ 'util.pxd', ] cython_sources = {} +cython_sources_tgts = [] foreach source: cython_sources_list source_pyx = fs.copyfile(source) cython_sources += {source: source_pyx} + cython_sources_tgts += source_pyx endforeach subdir('tslibs') diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index b30e0ff8b099e..a5b07d46bfeef 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -13,6 +13,7 @@ import sys import time import warnings +from pandas.errors import ParserError from pandas.util._exceptions import find_stack_level from pandas import StringDtype @@ -74,7 +75,7 @@ from pandas._libs.util cimport ( UINT64_MAX, ) -import pandas._libs.lib as lib +from pandas._libs import lib from pandas._libs.khash cimport ( kh_destroy_float64, @@ -971,11 +972,9 @@ cdef class TextReader: all(isinstance(u, int) for u in self.usecols)): missing_usecols = [col for col in self.usecols if col >= num_cols] if missing_usecols: - warnings.warn( - "Defining usecols with out of bounds indices is deprecated " - "and will raise a ParserError in a future version.", - FutureWarning, - stacklevel=find_stack_level(), + raise ParserError( + "Defining usecols without of bounds indices is not allowed. " + f"{missing_usecols} are out of bounds.", ) results = {} diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index e17d82d51f0fb..6bc560c5ce576 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -47,6 +47,16 @@ int main() { */ /* + 2013-05-02 (0.2.8): + * Use quadratic probing. When the capacity is power of 2, stepping function + i*(i+1)/2 guarantees to traverse each bucket. It is better than double + hashing on cache performance and is more robust than linear probing. + In theory, double hashing should be more robust than quadratic probing. + However, my implementation is probably not for large hash tables, because + the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php + 2011-09-16 (0.2.6): * The capacity is a power of 2. This seems to dramatically improve the @@ -107,7 +117,7 @@ int main() { Generic hash table library. */ -#define AC_VERSION_KHASH_H "0.2.6" +#define AC_VERSION_KHASH_H "0.2.8" #include #include @@ -177,7 +187,6 @@ typedef khuint_t khiter_t; #define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i) #define __ac_set_isdel_true(flag, i) ((void)0) - // specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){ const khuint32_t SEED = 0xc70f6907UL; @@ -252,13 +261,6 @@ khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){ return murmur2_32_32to32(k1, k2); } - -#ifdef KHASH_LINEAR -#define __ac_inc(k, m) 1 -#else -#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m) -#endif - #define __ac_fsize(m) ((m) < 32? 1 : (m)>>5) #ifndef kroundup32 @@ -310,12 +312,12 @@ static const double __ac_HASH_UPPER = 0.77; SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ { \ if (h->n_buckets) { \ - khuint_t inc, k, i, last, mask; \ + khuint_t k, i, last, mask, step=0;\ mask = h->n_buckets - 1; \ k = __hash_func(key); i = k & mask; \ - inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ + last = i; \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - i = (i + inc) & mask; \ + i = (i + ++step) & mask; \ if (i == last) return h->n_buckets; \ } \ return __ac_iseither(h->flags, i)? h->n_buckets : i; \ @@ -348,11 +350,10 @@ static const double __ac_HASH_UPPER = 0.77; if (kh_is_map) val = h->vals[j]; \ __ac_set_isempty_true(h->flags, j); \ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ - khuint_t inc, k, i; \ + khuint_t k, i, step=0;\ k = __hash_func(key); \ i = k & new_mask; \ - inc = __ac_inc(k, new_mask); \ - while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \ + while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ __ac_set_isempty_false(new_flags, i); \ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ @@ -385,14 +386,14 @@ static const double __ac_HASH_UPPER = 0.77; else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ { \ - khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + khuint_t k, i, site, last, mask = h->n_buckets - 1, step=0;\ x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ else { \ - inc = __ac_inc(k, mask); last = i; \ + last = i ; \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + inc) & mask; \ + i = (i + (++step)) & mask; \ if (i == last) { x = site; break; } \ } \ if (x == h->n_buckets) { \ diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 679cde9932a7a..b7457f94f3447 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -161,13 +161,17 @@ cpdef assert_almost_equal(a, b, is_unequal = True diff += 1 if not first_diff: - first_diff = f"At positional index {i}, first diff: {a[i]} != {b[i]}" + first_diff = ( + f"At positional index {i}, first diff: {a[i]} != {b[i]}" + ) if is_unequal: from pandas._testing import raise_assert_detail msg = (f"{obj} values are different " f"({np.round(diff * 100.0 / na, 5)} %)") - raise_assert_detail(obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values) + raise_assert_detail( + obj, msg, lobj, robj, first_diff=first_diff, index_values=index_values + ) return True diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 8fec9ecf27f30..ac8d5bac7c6e7 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -28,3 +28,7 @@ def array_to_datetime( ) -> tuple[np.ndarray, tzinfo | None]: ... # returned ndarray may be object dtype or datetime64[ns] + +def array_to_datetime_with_tz( + values: npt.NDArray[np.object_], tz: tzinfo +) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 03331f54db892..6d6e90673f030 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,5 +1,3 @@ -import warnings - cimport cython from cpython.datetime cimport ( PyDate_Check, @@ -8,8 +6,7 @@ from cpython.datetime cimport ( import_datetime, tzinfo, ) - -from pandas.util._exceptions import find_stack_level +from cpython.object cimport PyObject # import datetime C API import_datetime() @@ -264,7 +261,7 @@ def array_with_unit_to_datetime( tz : parsed timezone offset or None """ cdef: - Py_ssize_t i, j, n=len(values) + Py_ssize_t i, n=len(values) int64_t mult int prec = 0 ndarray[float64_t] fvalues @@ -421,6 +418,7 @@ def array_with_unit_to_datetime( return oresult, tz + @cython.wraparound(False) @cython.boundscheck(False) def first_non_null(values: ndarray) -> int: @@ -428,7 +426,6 @@ def first_non_null(values: ndarray) -> int: cdef: Py_ssize_t n = len(values) Py_ssize_t i - int result for i in range(n): val = values[i] if checknull_with_nat_and_na(val): @@ -439,6 +436,7 @@ def first_non_null(values: ndarray) -> int: else: return -1 + @cython.wraparound(False) @cython.boundscheck(False) cpdef array_to_datetime( @@ -613,7 +611,8 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data \"{val}\" at position {i} doesn't match format specified" + f"time data \"{val}\" at position {i} doesn't " + "match format specified" ) return values, tz_out @@ -629,7 +628,10 @@ cpdef array_to_datetime( if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError(f"invalid string coercion to datetime for \"{val}\" at position {i}") + raise TypeError( + f"invalid string coercion to datetime for \"{val}\" " + f"at position {i}" + ) if tz is not None: seen_datetime_offset = True @@ -855,19 +857,61 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): # We delay this check for as long as possible # because it catches relatively rare cases if val == "now": - iresult[0] = Timestamp.utcnow().value - if not utc: + if utc: + iresult[0] = Timestamp.utcnow().value + else: # GH#18705 make sure to_datetime("now") matches Timestamp("now") - warnings.warn( - "The parsing of 'now' in pd.to_datetime without `utc=True` is " - "deprecated. In a future version, this will match Timestamp('now') " - "and Timestamp.now()", - FutureWarning, - stacklevel=find_stack_level(), - ) - + # Note using Timestamp.now() is faster than Timestamp("now") + iresult[0] = Timestamp.now().value return True elif val == "today": iresult[0] = Timestamp.today().value return True return False + + +def array_to_datetime_with_tz(ndarray values, tzinfo tz): + """ + Vectorized analogue to pd.Timestamp(value, tz=tz) + + values has object-dtype, unrestricted ndim. + + Major differences between this and array_to_datetime with utc=True + - np.datetime64 objects are treated as _wall_ times. + - tznaive datetimes are treated as _wall_ times. + """ + cdef: + ndarray result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values) + Py_ssize_t i, n = values.size + object item + int64_t ival + datetime ts + + for i in range(n): + # Analogous to `item = values[i]` + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if checknull_with_nat_and_na(item): + # this catches pd.NA which would raise in the Timestamp constructor + ival = NPY_NAT + + else: + ts = Timestamp(item) + if ts is NaT: + ival = NPY_NAT + else: + if ts.tz is not None: + ts = ts.tz_convert(tz) + else: + # datetime64, tznaive pydatetime, int, float + ts = ts.tz_localize(tz) + ts = ts._as_unit("ns") + ival = ts.value + + # Analogous to: result[i] = ival + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + + cnp.PyArray_MultiIter_NEXT(mi) + + return result diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 94781374296fa..357227de2fc2c 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -396,7 +396,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil: # TODO: use in _matplotlib.converter? -cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns) except? -1: +cpdef int64_t periods_per_day( + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns +) except? -1: """ How many of the given time units fit into a single day? """ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3c7406d231241..dda26ad3bebc6 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -325,7 +325,11 @@ def get_start_end_field( @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(const int64_t[:] dtindex, str field, NPY_DATETIMEUNIT reso=NPY_FR_ns): +def get_date_field( + const int64_t[:] dtindex, + str field, + NPY_DATETIMEUNIT reso=NPY_FR_ns, +): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 79299ec38e19c..e2a291dfe632f 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -204,9 +204,10 @@ cdef class _NaT(datetime): return result # __rsub__ logic here - # TODO(cython3): remove this, move above code out of ``if not is_rsub`` block + # TODO(cython3): remove this, move above code out of + # ``if not is_rsub`` block # timedelta64 - NaT we have to treat NaT as timedelta64 - # for this to be meaningful, and the result is timedelta64 + # for this to be meaningful, and the result is timedelta64 result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result @@ -240,7 +241,8 @@ cdef class _NaT(datetime): result = np.empty(other.shape, dtype="timedelta64[ns]") result.fill("NaT") return result - # other cases are same, swap operands is allowed even though we subtract because this is NaT + # other cases are same, swap operands is allowed even though we subtract + # because this is NaT return self.__sub__(other) def __pos__(self): @@ -683,8 +685,6 @@ class NaTType(_NaT): ---------- ordinal : int Date corresponding to a proleptic Gregorian ordinal. - freq : str, DateOffset - Offset to apply to the Timestamp. tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone for the Timestamp. @@ -1201,6 +1201,7 @@ default 'raise' NaT """, ) + @property def tz(self) -> None: return None diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 07872050dc822..b1ff456c84a70 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -46,7 +46,7 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _S_MIN_DTS, _S_MAX_DTS npy_datetimestruct _M_MIN_DTS, _M_MAX_DTS - PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype); + PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype) cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, @@ -171,7 +171,11 @@ class OutOfBoundsTimedelta(ValueError): pass -cdef get_implementation_bounds(NPY_DATETIMEUNIT reso, npy_datetimestruct *lower, npy_datetimestruct *upper): +cdef get_implementation_bounds( + NPY_DATETIMEUNIT reso, + npy_datetimestruct *lower, + npy_datetimestruct *upper, +): if reso == NPY_FR_ns: upper[0] = _NS_MAX_DTS lower[0] = _NS_MIN_DTS @@ -420,7 +424,6 @@ def compare_mismatched_resolutions(ndarray left, ndarray right, op): Py_ssize_t i, N = left.size npy_datetimestruct ldts, rdts - for i in range(N): # Analogous to: lval = lvalues[i] lval = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -511,7 +514,10 @@ cdef ndarray astype_round_check( @cython.overflowcheck(True) -cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1: +cdef int64_t get_conversion_factor( + NPY_DATETIMEUNIT from_unit, + NPY_DATETIMEUNIT to_unit +) except? -1: """ Find the factor by which we need to multiply to convert from from_unit to to_unit. """ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 37b87f92971cc..50d6a0a02b0cf 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1058,12 +1058,6 @@ cdef class Tick(SingleConstructorOffset): if util.is_timedelta64_object(other) or PyDelta_Check(other): return other + self.delta - elif isinstance(other, type(self)): - # TODO(2.0): remove once apply deprecation is enforced. - # This is reached in tests that specifically call apply, - # but should not be reached "naturally" because __add__ should - # catch this case first. - return type(self)(self.n + other.n) raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") @@ -2268,7 +2262,12 @@ cdef class QuarterOffset(SingleConstructorOffset): def _apply_array(self, dtarr): reso = get_unit_from_dtype(dtarr.dtype) shifted = shift_quarters( - dtarr.view("i8"), self.n, self.startingMonth, self._day_opt, modby=3, reso=reso + dtarr.view("i8"), + self.n, + self.startingMonth, + self._day_opt, + modby=3, + reso=reso, ) return shifted @@ -2548,7 +2547,9 @@ cdef class SemiMonthOffset(SingleConstructorOffset): ndarray i8other = dtarr.view("i8") Py_ssize_t i, count = dtarr.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray out = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) npy_datetimestruct dts int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month @@ -2756,7 +2757,9 @@ cdef class Week(SingleConstructorOffset): cdef: Py_ssize_t i, count = i8other.size int64_t val, res_val - ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray out = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday @@ -3328,7 +3331,9 @@ cdef class FY5253Quarter(FY5253Mixin): for qlen in qtr_lens: if qlen * 7 <= tdelta.days: num_qtrs += 1 - tdelta -= (<_Timedelta>Timedelta(days=qlen * 7))._as_creso(norm._creso) + tdelta -= ( + <_Timedelta>Timedelta(days=qlen * 7) + )._as_creso(norm._creso) else: break else: @@ -4145,7 +4150,9 @@ cdef ndarray _shift_bdays( """ cdef: Py_ssize_t i, n = i8other.size - ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + ndarray result = cnp.PyArray_EMPTY( + i8other.ndim, i8other.shape, cnp.NPY_INT64, 0 + ) int64_t val, res_val int wday, nadj, days npy_datetimestruct dts diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 469e0721f1207..6f5b1e5b4e799 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -418,7 +418,9 @@ cdef parse_datetime_string_with_reso( from pandas import Timestamp parsed = Timestamp(date_string) else: - parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) + parsed = datetime( + dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us + ) reso = { NPY_DATETIMEUNIT.NPY_FR_Y: "year", NPY_DATETIMEUNIT.NPY_FR_M: "month", @@ -717,7 +719,8 @@ def try_parse_dates( date = datetime.now() default = datetime(date.year, date.month, 1) - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + def parse_date(x): + return du_parse(x, dayfirst=dayfirst, default=default) # EAFP here try: @@ -1011,10 +1014,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: break # Only consider it a valid guess if we have a year, month and day, - # unless it's %Y which is both common and unambiguous. + # unless it's %Y or %Y-%m which conform with ISO8601. Note that we don't + # make an exception for %Y%m because it's explicitly not considered ISO8601. if ( len({'year', 'month', 'day'} & found_attrs) != 3 - and format_guess != ['%Y'] + and format_guess not in (['%Y'], ['%Y', None, '%m']) ): return None @@ -1049,6 +1053,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: else: return None + cdef str _fill_token(token: str, padding: int): cdef str token_filled if '.' not in token: @@ -1063,6 +1068,7 @@ cdef str _fill_token(token: str, padding: int): token_filled = f'{seconds}.{nanoseconds}' return token_filled + @cython.wraparound(False) @cython.boundscheck(False) cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index be6f87791284e..0e7cfa4dd9670 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1053,7 +1053,9 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): cdef: Py_ssize_t n = len(arr) Py_ssize_t increment = arr.strides[0] // 8 - ndarray[int64_t] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_INT64, 0) + ndarray[int64_t] result = cnp.PyArray_EMPTY( + arr.ndim, arr.shape, cnp.NPY_INT64, 0 + ) _period_asfreq( cnp.PyArray_DATA(arr), @@ -1362,7 +1364,6 @@ def get_period_field_arr(str field, const int64_t[:] arr, int freq): cdef: Py_ssize_t i, sz int64_t[::1] out - accessor f func = _get_accessor_func(field) if func is NULL: @@ -1438,7 +1439,9 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: cdef: Py_ssize_t i, n = values.size int64_t ordinal - ndarray ordinals = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) + ndarray ordinals = cnp.PyArray_EMPTY( + values.ndim, values.shape, cnp.NPY_INT64, 0 + ) cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values) object p @@ -1684,7 +1687,10 @@ cdef class _Period(PeriodMixin): raise IncompatibleFrequency("Input cannot be converted to " f"Period(freq={self.freqstr})") - if util.is_timedelta64_object(other) and get_timedelta64_value(other) == NPY_NAT: + if ( + util.is_timedelta64_object(other) and + get_timedelta64_value(other) == NPY_NAT + ): # i.e. np.timedelta64("nat") return NaT @@ -2478,7 +2484,8 @@ class Period(_Period): the start or the end of the period, but rather the entire period itself. freq : str, default None One of pandas period strings or corresponding objects. Accepted - strings are listed in the :ref:`offset alias section ` in the user docs. + strings are listed in the + :ref:`offset alias section ` in the user docs. ordinal : int, default None The period offset from the proleptic Gregorian epoch. year : int, default None @@ -2511,7 +2518,6 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef _Period self if freq is not None: freq = cls._maybe_convert_freq(freq) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 6287c2fbc5d34..f540ad19c48d2 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -75,7 +75,6 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai int iso_week, iso_year int64_t us, ns object val, group_key, ampm, found, timezone - dict found_key bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f3de67b705d4d..bed3ad6243e20 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -176,7 +176,9 @@ def ints_to_pytimedelta(ndarray m8values, box=False): # `it` iterates C-order as well, so the iteration matches # See discussion at # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305 - ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0) + ndarray result = cnp.PyArray_EMPTY( + m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0 + ) object[::1] res_flat = result.ravel() # should NOT be a copy ndarray arr = m8values.view("i8") @@ -364,7 +366,7 @@ cdef convert_to_timedelta64(object ts, str unit): if PyDelta_Check(ts): ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") elif not is_timedelta64_object(ts): - raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") + raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}") return ts.astype("timedelta64[ns]") @@ -468,7 +470,11 @@ cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: return parse_timedelta_string(item) -cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) except? -1: +cdef inline int64_t _item_to_timedelta64( + object item, + str parsed_unit, + str errors +) except? -1: """ See array_to_timedelta64. """ @@ -967,7 +973,6 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): "Only resolutions 's', 'ms', 'us', 'ns' are supported." ) - td_base.value = value td_base._is_populated = 0 td_base._creso = reso @@ -1570,8 +1575,6 @@ class Timedelta(_Timedelta): "milliseconds", "microseconds", "nanoseconds"} def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base - if value is _no_input: if not len(kwargs): raise ValueError("cannot construct a Timedelta without a " @@ -1625,7 +1628,8 @@ class Timedelta(_Timedelta): if len(kwargs): # GH#48898 raise ValueError( - "Cannot pass both a Timedelta input and timedelta keyword arguments, got " + "Cannot pass both a Timedelta input and timedelta keyword " + "arguments, got " f"{list(kwargs.keys())}" ) return value @@ -1712,7 +1716,7 @@ class Timedelta(_Timedelta): @cython.cdivision(True) def _round(self, freq, mode): cdef: - int64_t result, unit, remainder + int64_t result, unit ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset @@ -1801,9 +1805,6 @@ class Timedelta(_Timedelta): __rmul__ = __mul__ def __truediv__(self, other): - cdef: - int64_t new_value - if _should_cast_to_timedelta(other): # We interpret NaT as timedelta64("NaT") other = Timedelta(other) diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 397df11144d60..fc62e04961dcb 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -15,7 +15,6 @@ from pandas._libs.tslibs.offsets cimport BaseOffset cdef _Timestamp create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, tzinfo tz, - BaseOffset freq, bint fold, NPY_DATETIMEUNIT reso=*) @@ -23,7 +22,6 @@ cdef _Timestamp create_timestamp_from_ts(int64_t value, cdef class _Timestamp(ABCTimestamp): cdef readonly: int64_t value, nanosecond, year - BaseOffset _freq NPY_DATETIMEUNIT _creso cdef bint _get_start_end_field(self, str field, freq) @@ -34,7 +32,5 @@ cdef class _Timestamp(ABCTimestamp): cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, int op) except -1 - cpdef void _set_freq(self, freq) - cdef _warn_on_field_deprecation(_Timestamp self, freq, str field) cdef bint _compare_mismatched_resos(_Timestamp self, _Timestamp other, int op) cdef _Timestamp _as_creso(_Timestamp self, NPY_DATETIMEUNIT reso, bint round_ok=*) diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index e916d7eb12dbf..77f02741aae48 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -37,9 +37,6 @@ class Timestamp(datetime): def __new__( # type: ignore[misc] cls: type[_DatetimeT], ts_input: np.integer | float | str | _date | datetime | np.datetime64 = ..., - freq: int | None | str | BaseOffset = ..., - tz: str | _tzinfo | None | int = ..., - unit: str | int | None = ..., year: int | None = ..., month: int | None = ..., day: int | None = ..., @@ -47,12 +44,13 @@ class Timestamp(datetime): minute: int | None = ..., second: int | None = ..., microsecond: int | None = ..., - nanosecond: int | None = ..., tzinfo: _tzinfo | None = ..., *, + nanosecond: int | None = ..., + tz: str | _tzinfo | None | int = ..., + unit: str | int | None = ..., fold: int | None = ..., ) -> _DatetimeT | NaTType: ... - def _set_freq(self, freq: BaseOffset | None) -> None: ... @classmethod def _from_value_and_reso( cls, value: int, reso: int, tz: _tzinfo | None @@ -89,7 +87,6 @@ class Timestamp(datetime): def fromordinal( cls: type[_DatetimeT], ordinal: int, - freq: str | BaseOffset | None = ..., tz: _tzinfo | str | None = ..., ) -> _DatetimeT: ... @classmethod @@ -176,7 +173,7 @@ class Timestamp(datetime): def is_year_end(self) -> bool: ... def to_pydatetime(self, warn: bool = ...) -> datetime: ... def to_datetime64(self) -> np.datetime64: ... - def to_period(self, freq: BaseOffset | str | None = ...) -> Period: ... + def to_period(self, freq: BaseOffset | str = ...) -> Period: ... def to_julian_date(self) -> np.float64: ... @property def asm8(self) -> np.datetime64: ... diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3c3bb8496aa6e..62607df5b2aa8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -97,11 +97,7 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsTimedelta, ) -from pandas._libs.tslibs.offsets cimport ( - BaseOffset, - is_offset_object, - to_offset, -) +from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.timedeltas cimport ( _Timedelta, delta_to_nanoseconds, @@ -134,7 +130,6 @@ cdef inline _Timestamp create_timestamp_from_ts( int64_t value, npy_datetimestruct dts, tzinfo tz, - BaseOffset freq, bint fold, NPY_DATETIMEUNIT reso=NPY_FR_ns, ): @@ -160,7 +155,6 @@ cdef inline _Timestamp create_timestamp_from_ts( dts.sec, dts.us, tz, fold=fold) ts_base.value = value - ts_base._freq = freq ts_base.year = dts.year ts_base.nanosecond = dts.ps // 1000 ts_base._creso = reso @@ -171,7 +165,6 @@ cdef inline _Timestamp create_timestamp_from_ts( def _unpickle_timestamp(value, freq, tz, reso=NPY_FR_ns): # GH#41949 dont warn on unpickle if we have a freq ts = Timestamp._from_value_and_reso(value, reso, tz) - ts._set_freq(freq) return ts @@ -239,21 +232,6 @@ cdef class _Timestamp(ABCTimestamp): max = MinMaxReso("max") resolution = MinMaxReso("resolution") # GH#21336, GH#21365 - cpdef void _set_freq(self, freq): - # set the ._freq attribute without going through the constructor, - # which would issue a warning - # Caller is responsible for validation - self._freq = freq - - @property - def freq(self): - warnings.warn( - "Timestamp.freq is deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._freq - @property def _unit(self) -> str: """ @@ -267,7 +245,6 @@ cdef class _Timestamp(ABCTimestamp): @classmethod def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso, tzinfo tz): cdef: - npy_datetimestruct dts _TSObject obj = _TSObject() if value == NPY_NAT: @@ -284,7 +261,7 @@ cdef class _Timestamp(ABCTimestamp): maybe_localize_tso(obj, tz, reso) return create_timestamp_from_ts( - value, obj.dts, tz=obj.tzinfo, freq=None, fold=obj.fold, reso=reso + value, obj.dts, tz=obj.tzinfo, fold=obj.fold, reso=reso ) @classmethod @@ -294,7 +271,6 @@ cdef class _Timestamp(ABCTimestamp): # This is herely mainly so we can incrementally implement non-nano # (e.g. only tznaive at first) cdef: - npy_datetimestruct dts int64_t value NPY_DATETIMEUNIT reso @@ -317,7 +293,6 @@ cdef class _Timestamp(ABCTimestamp): def __richcmp__(_Timestamp self, object other, int op): cdef: _Timestamp ots - int ndim if isinstance(other, _Timestamp): ots = other @@ -364,15 +339,14 @@ cdef class _Timestamp(ABCTimestamp): # which incorrectly drops tz and normalizes to midnight # before comparing # We follow the stdlib datetime behavior of never being equal - warnings.warn( - "Comparison of Timestamp with datetime.date is deprecated in " - "order to match the standard library behavior. " - "In a future version these will be considered non-comparable. " - "Use 'ts == pd.Timestamp(date)' or 'ts.date() == date' instead.", - FutureWarning, - stacklevel=find_stack_level(), + if op == Py_EQ: + return False + elif op == Py_NE: + return True + raise TypeError( + "Cannot compare Timestamp with datetime.date. " + "Use ts == pd.Timestamp(date) or ts.date() == date instead." ) - return NotImplemented else: return NotImplemented @@ -454,8 +428,6 @@ cdef class _Timestamp(ABCTimestamp): f"Out of bounds nanosecond timestamp: {new_value}" ) from err - if result is not NaT: - result._set_freq(self._freq) # avoid warning in constructor return result elif is_integer_object(other): @@ -592,7 +564,7 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds month_kw = kwds.get('startingMonth', kwds.get('month', 12)) - freqstr = self._freqstr + freqstr = freq.freqstr else: month_kw = 12 freqstr = None @@ -603,31 +575,6 @@ cdef class _Timestamp(ABCTimestamp): field, freqstr, month_kw, self._creso) return out[0] - cdef _warn_on_field_deprecation(self, freq, str field): - """ - Warn if the removal of .freq change the value of start/end properties. - """ - cdef: - bint needs = False - - if freq is not None: - kwds = freq.kwds - month_kw = kwds.get("startingMonth", kwds.get("month", 12)) - freqstr = self._freqstr - if month_kw != 12: - needs = True - if freqstr.startswith("B"): - needs = True - - if needs: - warnings.warn( - "Timestamp.freq is deprecated and will be removed in a future " - "version. When you have a freq, use " - f"freq.{field}(timestamp) instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - @property def is_month_start(self) -> bool: """ @@ -643,11 +590,7 @@ cdef class _Timestamp(ABCTimestamp): >>> ts.is_month_start True """ - if self._freq is None: - # fast-path for non-business frequencies - return self.day == 1 - self._warn_on_field_deprecation(self._freq, "is_month_start") - return self._get_start_end_field("is_month_start", self._freq) + return self.day == 1 @property def is_month_end(self) -> bool: @@ -664,11 +607,7 @@ cdef class _Timestamp(ABCTimestamp): >>> ts.is_month_end True """ - if self._freq is None: - # fast-path for non-business frequencies - return self.day == self.days_in_month - self._warn_on_field_deprecation(self._freq, "is_month_end") - return self._get_start_end_field("is_month_end", self._freq) + return self.day == self.days_in_month @property def is_quarter_start(self) -> bool: @@ -685,11 +624,7 @@ cdef class _Timestamp(ABCTimestamp): >>> ts.is_quarter_start True """ - if self._freq is None: - # fast-path for non-business frequencies - return self.day == 1 and self.month % 3 == 1 - self._warn_on_field_deprecation(self._freq, "is_quarter_start") - return self._get_start_end_field("is_quarter_start", self._freq) + return self.day == 1 and self.month % 3 == 1 @property def is_quarter_end(self) -> bool: @@ -706,11 +641,7 @@ cdef class _Timestamp(ABCTimestamp): >>> ts.is_quarter_end True """ - if self._freq is None: - # fast-path for non-business frequencies - return (self.month % 3) == 0 and self.day == self.days_in_month - self._warn_on_field_deprecation(self._freq, "is_quarter_end") - return self._get_start_end_field("is_quarter_end", self._freq) + return (self.month % 3) == 0 and self.day == self.days_in_month @property def is_year_start(self) -> bool: @@ -727,11 +658,7 @@ cdef class _Timestamp(ABCTimestamp): >>> ts.is_year_start True """ - if self._freq is None: - # fast-path for non-business frequencies - return self.day == self.month == 1 - self._warn_on_field_deprecation(self._freq, "is_year_start") - return self._get_start_end_field("is_year_start", self._freq) + return self.day == self.month == 1 @property def is_year_end(self) -> bool: @@ -748,11 +675,7 @@ cdef class _Timestamp(ABCTimestamp): >>> ts.is_year_end True """ - if self._freq is None: - # fast-path for non-business frequencies - return self.month == 12 and self.day == 31 - self._warn_on_field_deprecation(self._freq, "is_year_end") - return self._get_start_end_field("is_year_end", self._freq) + return self.month == 12 and self.day == 31 @cython.boundscheck(False) cdef _get_date_name_field(self, str field, object locale): @@ -930,7 +853,6 @@ cdef class _Timestamp(ABCTimestamp): def __setstate__(self, state): self.value = state[0] - self._freq = state[1] self.tzinfo = state[2] if len(state) == 3: @@ -942,7 +864,7 @@ cdef class _Timestamp(ABCTimestamp): self._creso = reso def __reduce__(self): - object_state = self.value, self._freq, self.tzinfo, self._creso + object_state = self.value, None, self.tzinfo, self._creso return (_unpickle_timestamp, object_state) # ----------------------------------------------------------------- @@ -1021,9 +943,8 @@ cdef class _Timestamp(ABCTimestamp): pass tz = f", tz='{zone}'" if zone is not None else "" - freq = "" if self._freq is None else f", freq='{self._freqstr}'" - return f"Timestamp('{stamp}'{tz}{freq})" + return f"Timestamp('{stamp}'{tz})" @property def _repr_base(self) -> str: @@ -1213,15 +1134,6 @@ cdef class _Timestamp(ABCTimestamp): stacklevel=find_stack_level(), ) - if freq is None: - freq = self._freq - warnings.warn( - "In a future version, calling 'Timestamp.to_period()' without " - "passing a 'freq' will raise an exception.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return Period(self, freq=freq) @@ -1244,18 +1156,16 @@ class Timestamp(_Timestamp): ---------- ts_input : datetime-like, str, int, float Value to be converted to Timestamp. - freq : str, DateOffset - Offset which Timestamp will have. + year, month, day : int + hour, minute, second, microsecond : int, optional, default 0 + tzinfo : datetime.tzinfo, optional, default None + nanosecond : int, optional, default 0 tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will have. unit : str Unit used for conversion if ts_input is of type int or float. The valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For example, 's' means seconds and 'ms' means milliseconds. - year, month, day : int - hour, minute, second, microsecond : int, optional, default 0 - nanosecond : int, optional, default 0 - tzinfo : datetime.tzinfo, optional, default None fold : {0, 1}, default None, keyword-only Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the @@ -1303,7 +1213,7 @@ class Timestamp(_Timestamp): """ @classmethod - def fromordinal(cls, ordinal, freq=None, tz=None): + def fromordinal(cls, ordinal, tz=None): """ Construct a timestamp from a a proleptic Gregorian ordinal. @@ -1311,8 +1221,6 @@ class Timestamp(_Timestamp): ---------- ordinal : int Date corresponding to a proleptic Gregorian ordinal. - freq : str, DateOffset - Offset to apply to the Timestamp. tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone for the Timestamp. @@ -1325,8 +1233,7 @@ class Timestamp(_Timestamp): >>> pd.Timestamp.fromordinal(737425) Timestamp('2020-01-01 00:00:00') """ - return cls(datetime.fromordinal(ordinal), - freq=freq, tz=tz) + return cls(datetime.fromordinal(ordinal), tz=tz) @classmethod def now(cls, tz=None): @@ -1482,9 +1389,6 @@ class Timestamp(_Timestamp): def __new__( cls, object ts_input=_no_input, - object freq=None, - tz=None, - unit=None, year=None, month=None, day=None, @@ -1492,9 +1396,11 @@ class Timestamp(_Timestamp): minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo_type tzinfo=None, *, + nanosecond=None, + tz=None, + unit=None, fold=None, ): # The parameter list folds together legacy parameter names (the first @@ -1529,27 +1435,6 @@ class Timestamp(_Timestamp): # GH#17690 tzinfo must be a datetime.tzinfo object, ensured # by the cython annotation. if tz is not None: - if (is_integer_object(tz) - and is_integer_object(ts_input) - and is_integer_object(freq) - ): - # GH#31929 e.g. Timestamp(2019, 3, 4, 5, 6, tzinfo=foo) - # TODO(GH#45307): this will still be fragile to - # mixed-and-matched positional/keyword arguments - ts_input = datetime( - ts_input, - freq, - tz, - unit or 0, - year or 0, - month or 0, - day or 0, - fold=fold or 0, - ) - nanosecond = hour - tz = tzinfo - return cls(ts_input, nanosecond=nanosecond, tz=tz) - raise ValueError('Can provide at most one of tz, tzinfo') # User passed tzinfo instead of tz; avoid silently ignoring @@ -1585,7 +1470,7 @@ class Timestamp(_Timestamp): # check that only ts_input is passed # checking verbosely, because cython doesn't optimize # list comprehensions (as of cython 0.29.x) - if (isinstance(ts_input, _Timestamp) and freq is None and + if (isinstance(ts_input, _Timestamp) and tz is None and unit is None and year is None and month is None and day is None and hour is None and minute is None and second is None and @@ -1598,7 +1483,7 @@ class Timestamp(_Timestamp): if any(arg is not None for arg in _date_attributes): raise ValueError( "Cannot pass a date attribute keyword " - "argument when passing a date string" + "argument when passing a date string; 'tz' is keyword-only" ) elif ts_input is _no_input: @@ -1622,35 +1507,28 @@ class Timestamp(_Timestamp): ts_input = datetime(**datetime_kwargs) - elif is_integer_object(freq): + elif is_integer_object(year): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, # microsecond[, nanosecond[, tzinfo]]]]]]) - ts_input = datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0, fold=fold or 0) - nanosecond = hour - tz = minute - freq = None + ts_input = datetime(ts_input, year, month, day or 0, + hour or 0, minute or 0, second or 0, fold=fold or 0) unit = None + if nanosecond is None: + # nanosecond was not passed as a keyword, but may have been + # passed positionally see test_constructor_nanosecond + nanosecond = microsecond + if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") tzobj = maybe_get_tz(tz) if tzobj is not None and is_datetime64_object(ts_input): - # GH#24559, GH#42288 In the future we will treat datetime64 as + # GH#24559, GH#42288 As of 2.0 we treat datetime64 as # wall-time (consistent with DatetimeIndex) - warnings.warn( - "In a future version, when passing a np.datetime64 object and " - "a timezone to Timestamp, the datetime64 will be interpreted " - "as a wall time, not a UTC time. To interpret as a UTC time, " - "use `Timestamp(dt64).tz_localize('UTC').tz_convert(tz)`", - FutureWarning, - stacklevel=find_stack_level(), - ) - # Once this deprecation is enforced, we can do - # return Timestamp(ts_input).tz_localize(tzobj) + return cls(ts_input).tz_localize(tzobj) if nanosecond is None: nanosecond = 0 @@ -1662,20 +1540,7 @@ class Timestamp(_Timestamp): if ts.value == NPY_NAT: return NaT - if freq is None: - # GH 22311: Try to extract the frequency of a given Timestamp input - freq = getattr(ts_input, '_freq', None) - else: - warnings.warn( - "The 'freq' argument in Timestamp is deprecated and will be " - "removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if not is_offset_object(freq): - freq = to_offset(freq) - - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.creso) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, ts.fold, ts.creso) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): cdef: @@ -1994,22 +1859,6 @@ timedelta}, default 'raise' "Use tz_localize() or tz_convert() as appropriate" ) - @property - def _freqstr(self): - return getattr(self._freq, "freqstr", self._freq) - - @property - def freqstr(self): - """ - Return the total number of days in the month. - """ - warnings.warn( - "Timestamp.freqstr is deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._freqstr - def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): """ Localize the Timestamp to a timezone. @@ -2110,8 +1959,6 @@ default 'raise' ) out = type(self)._from_value_and_reso(value, self._creso, tz=tz) - if out is not NaT: - out._set_freq(self._freq) # avoid warning in constructor return out def tz_convert(self, tz): @@ -2165,8 +2012,6 @@ default 'raise' # Same UTC timestamp, different time zone tz = maybe_get_tz(tz) out = type(self)._from_value_and_reso(self.value, reso=self._creso, tz=tz) - if out is not NaT: - out._set_freq(self._freq) # avoid warning in constructor return out astimezone = tz_convert @@ -2299,7 +2144,7 @@ default 'raise' ts_input, tzobj, nanos=dts.ps // 1000, reso=self._creso ) return create_timestamp_from_ts( - ts.value, dts, tzobj, self._freq, fold, reso=self._creso + ts.value, dts, tzobj, fold, reso=self._creso ) def to_julian_date(self) -> np.float64: diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e2812178a2b43..28259c9db26e5 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -224,14 +224,13 @@ timedelta-like} """ cdef: ndarray[uint8_t, cast=True] ambiguous_array - Py_ssize_t i, idx, pos, n = vals.shape[0] - Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right + Py_ssize_t i, n = vals.shape[0] + Py_ssize_t delta_idx_offset, delta_idx int64_t v, left, right, val, new_local, remaining_mins int64_t first_delta, delta int64_t shift_delta = 0 ndarray[int64_t] result_a, result_b, dst_hours int64_t[::1] result - npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift_forward = False, shift_backward = False bint fill_nonexist = False diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi index d24541aede8d8..22f457b9ddc0b 100644 --- a/pandas/_libs/tslibs/vectorized.pyi +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -7,7 +7,6 @@ from datetime import tzinfo import numpy as np from pandas._libs.tslibs.dtypes import Resolution -from pandas._libs.tslibs.offsets import BaseOffset from pandas._typing import npt def dt64arr_to_periodarr( @@ -34,7 +33,6 @@ def get_resolution( def ints_to_pydatetime( arr: npt.NDArray[np.int64], tz: tzinfo | None = ..., - freq: BaseOffset | None = ..., fold: bool = ..., box: str = ..., reso: int = ..., # NPY_DATETIMEUNIT diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 6a6b156af3dc4..8661ba4b9b2f1 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -32,7 +32,6 @@ from .np_datetime cimport ( npy_datetimestruct, pandas_datetime_to_datetimestruct, ) -from .offsets cimport BaseOffset from .period cimport get_period_ordinal from .timestamps cimport create_timestamp_from_ts from .timezones cimport is_utc @@ -95,7 +94,6 @@ def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ def ints_to_pydatetime( ndarray stamps, tzinfo tz=None, - BaseOffset freq=None, bint fold=False, str box="datetime", NPY_DATETIMEUNIT reso=NPY_FR_ns, @@ -109,8 +107,6 @@ def ints_to_pydatetime( stamps : array of i8 tz : str, optional convert to this timezone - freq : BaseOffset, optional - freq to convert fold : bint, default is 0 Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the @@ -138,7 +134,7 @@ def ints_to_pydatetime( npy_datetimestruct dts tzinfo new_tz - bint use_date = False, use_time = False, use_ts = False, use_pydt = False + bint use_date = False, use_ts = False, use_pydt = False object res_val # Note that `result` (and thus `result_flat`) is C-order and @@ -154,11 +150,9 @@ def ints_to_pydatetime( use_date = True elif box == "timestamp": use_ts = True - elif box == "time": - use_time = True elif box == "datetime": use_pydt = True - else: + elif box != "time": raise ValueError( "box must be one of 'datetime', 'date', 'time' or 'timestamp'" ) @@ -183,7 +177,7 @@ def ints_to_pydatetime( if use_ts: res_val = create_timestamp_from_ts( - utc_val, dts, new_tz, freq, fold, reso=reso + utc_val, dts, new_tz, fold, reso=reso ) elif use_pydt: res_val = datetime( diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 68c05f2bb2c98..702706f00455b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -172,7 +172,9 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, add_sum(values[j], &nobs, &sum_x, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value) + output[i] = calc_sum( + minp, nobs, sum_x, num_consecutive_same_value, prev_value + ) if not is_monotonic_increasing_bounds: nobs = 0 @@ -209,9 +211,15 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct return result -cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, - Py_ssize_t *neg_ct, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: +cdef inline void add_mean( + float64_t val, + Py_ssize_t *nobs, + float64_t *sum_x, + Py_ssize_t *neg_ct, + float64_t *compensation, + int64_t *num_consecutive_same_value, + float64_t *prev_value +) nogil: """ add a value from the mean calc using Kahan summation """ cdef: float64_t y, t @@ -296,7 +304,9 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value) + output[i] = calc_mean( + minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value + ) if not is_monotonic_increasing_bounds: nobs = 0 @@ -309,8 +319,13 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, # Rolling variance -cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, - float64_t ssqdm_x, int64_t num_consecutive_same_value) nogil: +cdef inline float64_t calc_var( + int64_t minp, + int ddof, + float64_t nobs, + float64_t ssqdm_x, + int64_t num_consecutive_same_value +) nogil: cdef: float64_t result @@ -328,9 +343,15 @@ cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, return result -cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, float64_t *compensation, - int64_t *num_consecutive_same_value, float64_t *prev_value) nogil: +cdef inline void add_var( + float64_t val, + float64_t *nobs, + float64_t *mean_x, + float64_t *ssqdm_x, + float64_t *compensation, + int64_t *num_consecutive_same_value, + float64_t *prev_value, +) nogil: """ add a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -364,8 +385,13 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) -cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, - float64_t *ssqdm_x, float64_t *compensation) nogil: +cdef inline void remove_var( + float64_t val, + float64_t *nobs, + float64_t *mean_x, + float64_t *ssqdm_x, + float64_t *compensation +) nogil: """ remove a value from the var calc """ cdef: float64_t delta, prev_mean, y, t @@ -566,7 +592,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, prev, min_val, mean_val, sum_val = 0 + float64_t val, min_val, mean_val, sum_val = 0 float64_t compensation_xxx_add, compensation_xxx_remove float64_t compensation_xx_add, compensation_xx_remove float64_t compensation_x_add, compensation_x_remove @@ -574,7 +600,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, float64_t prev_value int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 int64_t s, e, num_consecutive_same_value - ndarray[float64_t] output, mean_array, values_copy + ndarray[float64_t] output, values_copy bint is_monotonic_increasing_bounds minp = max(minp, 3) @@ -779,7 +805,7 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, prev, mean_val, min_val, sum_val = 0 + float64_t val, mean_val, min_val, sum_val = 0 float64_t compensation_xxxx_add, compensation_xxxx_remove float64_t compensation_xxx_remove, compensation_xxx_add float64_t compensation_xx_remove, compensation_xx_add @@ -853,7 +879,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, &compensation_xxx_add, &compensation_xxxx_add, &num_consecutive_same_value, &prev_value) - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value) + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, + num_consecutive_same_value) if not is_monotonic_increasing_bounds: nobs = 0 @@ -876,7 +903,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, bint err = False, is_monotonic_increasing_bounds int midpoint, ret = 0 int64_t nobs = 0, N = len(start), s, e, win - float64_t val, res, prev + float64_t val, res skiplist_t *sl ndarray[float64_t] output @@ -1149,7 +1176,7 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start, Py_ssize_t i, j, s, e, N = len(start), idx int ret = 0 int64_t nobs = 0, win - float64_t val, prev, midpoint, idx_with_fraction + float64_t val, idx_with_fraction float64_t vlow, vhigh skiplist_t *skiplist InterpolationType interpolation_type @@ -1275,7 +1302,7 @@ def roll_rank(const float64_t[:] values, ndarray[int64_t] start, derived from roll_quantile """ cdef: - Py_ssize_t i, j, s, e, N = len(start), idx + Py_ssize_t i, j, s, e, N = len(start) float64_t rank_min = 0, rank = 0 int64_t nobs = 0, win float64_t val diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index 9ad0d13e7317e..527e8c1d0d090 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -225,7 +225,7 @@ def wrapper(*args, **kwargs): ) try: return t(*args, **kwargs) - except Exception as err: + except Exception as err: # pylint: disable=broad-except errno = getattr(err, "errno", None) if not errno and hasattr(errno, "reason"): # error: "Exception" has no attribute "reason" @@ -243,10 +243,7 @@ def wrapper(*args, **kwargs): if not isinstance(err, error_classes) or raise_on_error: raise - else: - pytest.skip( - f"Skipping test due to lack of connectivity and error {err}" - ) + pytest.skip(f"Skipping test due to lack of connectivity and error {err}") return wrapper diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 1f690b39e6fb8..d0a95e764472d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -4,18 +4,12 @@ Literal, cast, ) -import warnings import numpy as np -from pandas._libs.lib import ( - NoDefault, - no_default, -) from pandas._libs.missing import is_matching_na from pandas._libs.sparse import SparseIndex import pandas._libs.testing as _testing -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_bool, @@ -64,7 +58,6 @@ def assert_almost_equal( left, right, check_dtype: bool | Literal["equiv"] = "equiv", - check_less_precise: bool | int | NoDefault = no_default, rtol: float = 1.0e-5, atol: float = 1.0e-8, **kwargs, @@ -83,20 +76,6 @@ def assert_almost_equal( Check dtype if both a and b are the same type. If 'equiv' is passed in, then `RangeIndex` and `Int64Index` are also considered equivalent when doing type checking. - check_less_precise : bool or int, default False - Specify comparison precision. 5 digits (False) or 3 digits (True) - after decimal points are compared. If int, then specify the number - of digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. rtol : float, default 1e-5 Relative tolerance. @@ -106,16 +85,6 @@ def assert_almost_equal( .. versionadded:: 1.1.0 """ - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - if isinstance(left, Index): assert_index_equal( left, @@ -171,46 +140,6 @@ def assert_almost_equal( ) -def _get_tol_from_less_precise(check_less_precise: bool | int) -> float: - """ - Return the tolerance equivalent to the deprecated `check_less_precise` - parameter. - - Parameters - ---------- - check_less_precise : bool or int - - Returns - ------- - float - Tolerance to be used as relative/absolute tolerance. - - Examples - -------- - >>> # Using check_less_precise as a bool: - >>> _get_tol_from_less_precise(False) - 5e-06 - >>> _get_tol_from_less_precise(True) - 0.0005 - >>> # Using check_less_precise as an int representing the decimal - >>> # tolerance intended: - >>> _get_tol_from_less_precise(2) - 0.005 - >>> _get_tol_from_less_precise(8) - 5e-09 - """ - if isinstance(check_less_precise, bool): - if check_less_precise: - # 3-digit tolerance - return 0.5e-3 - else: - # 5-digit tolerance - return 0.5e-5 - else: - # Equivalent to setting checking_less_precise= - return 0.5 * 10**-check_less_precise - - def _check_isinstance(left, right, cls): """ Helper method for our assert_* methods that ensures that @@ -250,7 +179,6 @@ def assert_index_equal( right: Index, exact: bool | str = "equiv", check_names: bool = True, - check_less_precise: bool | int | NoDefault = no_default, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, @@ -271,14 +199,6 @@ def assert_index_equal( Int64Index as well. check_names : bool, default True Whether to check the names attribute. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default True Whether to compare number exactly. check_categorical : bool, default True @@ -333,16 +253,6 @@ def _get_ilevel_values(index, level): filled = take_nd(unique._values, level_codes, fill_value=unique._na_value) return unique._shallow_copy(filled, name=index.names[level]) - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - # instance validation _check_isinstance(left, right, Index) @@ -775,7 +685,6 @@ def assert_extension_array_equal( right, check_dtype: bool | Literal["equiv"] = True, index_values=None, - check_less_precise=no_default, check_exact: bool = False, rtol: float = 1.0e-5, atol: float = 1.0e-8, @@ -791,14 +700,6 @@ def assert_extension_array_equal( Whether to check if the ExtensionArray dtypes are identical. index_values : numpy.ndarray, default None Optional index (shared by both left and right), used in output. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default False Whether to compare number exactly. rtol : float, default 1e-5 @@ -823,16 +724,6 @@ def assert_extension_array_equal( >>> b, c = a.array, a.array >>> tm.assert_extension_array_equal(b, c) """ - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" if check_dtype: @@ -881,7 +772,6 @@ def assert_series_equal( check_dtype: bool | Literal["equiv"] = True, check_index_type: bool | Literal["equiv"] = "equiv", check_series_type: bool = True, - check_less_precise: bool | int | NoDefault = no_default, check_names: bool = True, check_exact: bool = False, check_datetimelike_compat: bool = False, @@ -910,20 +800,6 @@ def assert_series_equal( are identical. check_series_type : bool, default True Whether to check the Series class is identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_names : bool, default True Whether to check the Series and Index names attribute. check_exact : bool, default False @@ -978,16 +854,6 @@ def assert_series_equal( if not check_index and check_like: raise ValueError("check_like must be False if check_index is False") - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - # instance validation _check_isinstance(left, right, Series) @@ -1150,7 +1016,6 @@ def assert_frame_equal( check_index_type: bool | Literal["equiv"] = "equiv", check_column_type: bool | Literal["equiv"] = "equiv", check_frame_type: bool = True, - check_less_precise=no_default, check_names: bool = True, by_blocks: bool = False, check_exact: bool = False, @@ -1188,20 +1053,6 @@ def assert_frame_equal( :func:`assert_index_equal`. check_frame_type : bool, default True Whether to check the DataFrame class is identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_names : bool, default True Whether to check that the `names` attribute for both the `index` and `column` attributes of the DataFrame is identical. @@ -1271,16 +1122,6 @@ def assert_frame_equal( """ __tracebackhide__ = True - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - # instance validation _check_isinstance(left, right, DataFrame) diff --git a/pandas/_typing.py b/pandas/_typing.py index 5c22baa4bd42e..dad5ffd48caa8 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -198,10 +198,6 @@ def mode(self) -> str: # for _get_filepath_or_buffer ... - def fileno(self) -> int: - # for _MMapWrapper - ... - def seek(self, __offset: int, __whence: int = ...) -> int: # with one argument: gzip.GzipFile, bz2.BZ2File # with two arguments: zip.ZipFile, read_sas @@ -217,7 +213,7 @@ def tell(self) -> int: class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]): - def read(self, __n: int | None = ...) -> AnyStr_cov: + def read(self, __n: int = ...) -> AnyStr_cov: # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File ... @@ -233,7 +229,7 @@ def flush(self) -> Any: class ReadPickleBuffer(ReadBuffer[bytes], Protocol): - def readline(self) -> AnyStr_cov: + def readline(self) -> bytes: ... @@ -247,6 +243,10 @@ def __iter__(self) -> Iterator[AnyStr_cov]: # for engine=python ... + def fileno(self) -> int: + # for _MMapWrapper + ... + def readline(self) -> AnyStr_cov: # for engine=python ... diff --git a/pandas/_version.py b/pandas/_version.py index 25142781299b4..89a3caaf64eae 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -1,3 +1,4 @@ +# pylint: disable=consider-using-f-string # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by GitHub's download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 34e3234390ba5..699d1b565fc71 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -16,14 +16,14 @@ "blosc": "1.21.0", "bottleneck": "1.3.2", "brotli": "0.7.0", - "fastparquet": "0.4.0", + "fastparquet": "0.6.3", "fsspec": "2021.07.0", "html5lib": "1.1", "hypothesis": "6.13.0", "gcsfs": "2021.07.0", "jinja2": "3.0.0", "lxml.etree": "4.6.3", - "matplotlib": "3.3.2", + "matplotlib": "3.6.1", "numba": "0.53.1", "numexpr": "2.7.3", "odfpy": "1.4.1", @@ -43,7 +43,6 @@ "tabulate": "0.8.9", "xarray": "0.19.0", "xlrd": "2.0.1", - "xlwt": "1.3.0", "xlsxwriter": "1.4.3", "zstandard": "0.15.2", "tzdata": "2022.1", @@ -142,8 +141,7 @@ def import_optional_dependency( except ImportError: if errors == "raise": raise ImportError(msg) - else: - return None + return None # Handle submodules: if we have submodule, grab parent module from sys.modules parent = name.split(".")[0] diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 6dc4a66f34710..b02dfac1400d1 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -412,8 +412,7 @@ def validate_resampler_func(method: str, args, kwargs) -> None: "numpy operations are not valid with resample. " f"Use .resample(...).{method}() instead" ) - else: - raise TypeError("too many arguments passed in") + raise TypeError("too many arguments passed in") def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None: diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index c233e3d8a4892..051aa5c337782 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -7,11 +7,7 @@ import copy import io import pickle as pkl -from typing import ( - TYPE_CHECKING, - Generator, -) -import warnings +from typing import Generator import numpy as np @@ -26,12 +22,6 @@ ) from pandas.core.internals import BlockManager -if TYPE_CHECKING: - from pandas import ( - DataFrame, - Series, - ) - def load_reduce(self): stack = self.stack @@ -68,49 +58,6 @@ def load_reduce(self): raise -_sparse_msg = """\ - -Loading a saved '{cls}' as a {new} with sparse values. -'{cls}' is now removed. You should re-save this dataset in its new format. -""" - - -class _LoadSparseSeries: - # To load a SparseSeries as a Series[Sparse] - - # https://github.com/python/mypy/issues/1020 - # error: Incompatible return type for "__new__" (returns "Series", but must return - # a subtype of "_LoadSparseSeries") - def __new__(cls) -> Series: # type: ignore[misc] - from pandas import Series - - warnings.warn( - _sparse_msg.format(cls="SparseSeries", new="Series"), - FutureWarning, - stacklevel=6, - ) - - return Series(dtype=object) - - -class _LoadSparseFrame: - # To load a SparseDataFrame as a DataFrame[Sparse] - - # https://github.com/python/mypy/issues/1020 - # error: Incompatible return type for "__new__" (returns "DataFrame", but must - # return a subtype of "_LoadSparseFrame") - def __new__(cls) -> DataFrame: # type: ignore[misc] - from pandas import DataFrame - - warnings.warn( - _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"), - FutureWarning, - stacklevel=6, - ) - - return DataFrame() - - # If classes are moved, provide compat here. _class_locations_map = { ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), @@ -144,14 +91,6 @@ def __new__(cls) -> DataFrame: # type: ignore[misc] "pandas.core.arrays.sparse", "SparseArray", ), - ("pandas.sparse.series", "SparseSeries"): ( - "pandas.compat.pickle_compat", - "_LoadSparseSeries", - ), - ("pandas.sparse.frame", "SparseDataFrame"): ( - "pandas.core.sparse.frame", - "_LoadSparseFrame", - ), ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"), ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"), ("pandas.indexes.numeric", "Int64Index"): ( @@ -183,14 +122,6 @@ def __new__(cls) -> DataFrame: # type: ignore[misc] "pandas.core.indexes.numeric", "Float64Index", ), - ("pandas.core.sparse.series", "SparseSeries"): ( - "pandas.compat.pickle_compat", - "_LoadSparseSeries", - ), - ("pandas.core.sparse.frame", "SparseDataFrame"): ( - "pandas.compat.pickle_compat", - "_LoadSparseFrame", - ), } diff --git a/pandas/conftest.py b/pandas/conftest.py index 825aa4b51ebaa..4639799d2ee03 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1163,8 +1163,7 @@ def deco(*args): raise ValueError( f"Could not find file {path} and --strict-data-files is set." ) - else: - pytest.skip(f"Could not find {path}.") + pytest.skip(f"Could not find {path}.") return path return deco @@ -1727,7 +1726,7 @@ def any_skipna_inferred_dtype(request): Examples -------- - >>> import pandas._libs.lib as lib + >>> from pandas._libs import lib >>> >>> def test_something(any_skipna_inferred_dtype): ... inferred_dtype, values = any_skipna_inferred_dtype diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4f9af2d0c01d6..725043616eaa7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -4,7 +4,6 @@ from collections import defaultdict from functools import partial import inspect -import re from typing import ( TYPE_CHECKING, Any, @@ -18,7 +17,6 @@ Sequence, cast, ) -import warnings import numpy as np @@ -35,12 +33,8 @@ NDFrameT, npt, ) -from pandas.errors import ( - DataError, - SpecificationError, -) +from pandas.errors import SpecificationError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( @@ -58,10 +52,7 @@ from pandas.core.algorithms import safe_sort from pandas.core.base import SelectionMixin import pandas.core.common as com -from pandas.core.construction import ( - create_series_with_explicit_dtype, - ensure_wrapped_if_datetimelike, -) +from pandas.core.construction import ensure_wrapped_if_datetimelike if TYPE_CHECKING: from pandas import ( @@ -266,34 +257,9 @@ def transform_dict_like(self, func): func = self.normalize_dictlike_arg("transform", obj, func) results: dict[Hashable, DataFrame | Series] = {} - failed_names = [] - all_type_errors = True for name, how in func.items(): colg = obj._gotitem(name, ndim=1) - try: - results[name] = colg.transform(how, 0, *args, **kwargs) - except Exception as err: - if str(err) in { - "Function did not transform", - "No transform functions were provided", - }: - raise err - else: - if not isinstance(err, TypeError): - all_type_errors = False - failed_names.append(name) - # combine results - if not results: - klass = TypeError if all_type_errors else ValueError - raise klass("Transform function failed") - if len(failed_names) > 0: - warnings.warn( - f"{failed_names} did not transform successfully. If any error is " - f"raised, this will raise in a future version of pandas. " - f"Drop these columns/ops to avoid this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) + results[name] = colg.transform(how, 0, *args, **kwargs) return concat(results, axis=1) def transform_str_or_callable(self, func) -> DataFrame | Series: @@ -345,88 +311,28 @@ def agg_list_like(self) -> DataFrame | Series: results = [] keys = [] - failed_names = [] - - depr_nuisance_columns_msg = ( - "{} did not aggregate successfully. If any error is " - "raised this will raise in a future version of pandas. " - "Drop these columns/ops to avoid this warning." - ) # degenerate case if selected_obj.ndim == 1: for a in arg: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - try: - new_res = colg.aggregate(a) + new_res = colg.aggregate(a) + results.append(new_res) - except TypeError: - failed_names.append(com.get_callable_name(a) or a) - else: - results.append(new_res) - - # make sure we find a good name - name = com.get_callable_name(a) or a - keys.append(name) + # make sure we find a good name + name = com.get_callable_name(a) or a + keys.append(name) # multiples else: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - try: - # Capture and suppress any warnings emitted by us in the call - # to agg below, but pass through any warnings that were - # generated otherwise. - # This is necessary because of https://bugs.python.org/issue29672 - # See GH #43741 for more details - with warnings.catch_warnings(record=True) as record: - new_res = colg.aggregate(arg) - if len(record) > 0: - match = re.compile(depr_nuisance_columns_msg.format(".*")) - for warning in record: - if re.match(match, str(warning.message)): - failed_names.append(col) - else: - warnings.warn_explicit( - message=warning.message, - category=warning.category, - filename=warning.filename, - lineno=warning.lineno, - ) - - except (TypeError, DataError): - failed_names.append(col) - except ValueError as err: - # cannot aggregate - if "Must produce aggregated value" in str(err): - # raised directly in _aggregate_named - failed_names.append(col) - elif "no results" in str(err): - # reached in test_frame_apply.test_nuiscance_columns - # where the colg.aggregate(arg) ends up going through - # the selected_obj.ndim == 1 branch above with arg == ["sum"] - # on a datetime64[ns] column - failed_names.append(col) - else: - raise - else: - results.append(new_res) - indices.append(index) - + new_res = colg.aggregate(arg) + results.append(new_res) + indices.append(index) keys = selected_obj.columns.take(indices) - # if we are empty - if not len(results): - raise ValueError("no results") - - if len(failed_names) > 0: - warnings.warn( - depr_nuisance_columns_msg.format(failed_names), - FutureWarning, - stacklevel=find_stack_level(), - ) - try: concatenated = concat(results, keys=keys, axis=1, sort=False) except TypeError as err: @@ -507,8 +413,6 @@ def agg_dict_like(self) -> DataFrame | Series: keys_to_use = ktu axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1 - # error: Key expression in dictionary comprehension has incompatible type - # "Hashable"; expected type "NDFrame" [misc] result = concat( {k: results[k] for k in keys_to_use}, # type: ignore[misc] axis=axis, @@ -560,7 +464,7 @@ def apply_str(self) -> DataFrame | Series: "axis" not in arg_names or f in ("corrwith", "skew") ): raise ValueError(f"Operation {f} does not support axis=1") - elif "axis" in arg_names: + if "axis" in arg_names: self.kwargs["axis"] = self.axis return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs) @@ -858,7 +762,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame: # must be a scalar or 1d if ares > 1: raise ValueError("too many dims to broadcast") - elif ares == 1: + if ares == 1: # must match return dim if result_compare != len(res): @@ -906,14 +810,12 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series # dict of scalars - # the default dtype of an empty Series will be `object`, but this + # the default dtype of an empty Series is `object`, but this # code can be hit by df.mean() where the result should have dtype # float64 even if it's an empty Series. constructor_sliced = self.obj._constructor_sliced - if constructor_sliced is Series: - result = create_series_with_explicit_dtype( - results, dtype_if_empty=np.float64 - ) + if len(results) == 0 and constructor_sliced is Series: + result = constructor_sliced(results, dtype=np.float64) else: result = constructor_sliced(results) result.index = res_index @@ -1277,7 +1179,7 @@ def reconstruct_func( "Function names must be unique if there is no new column names " "assigned" ) - elif func is None: + if func is None: # nicer error message raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).") @@ -1385,9 +1287,7 @@ def _make_unique_kwarg_list( [('a', '_0'), ('a', '_1'), ('b', '')] """ return [ - (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) - if seq.count(pair) > 1 - else pair + (pair[0], f"{pair[1]}_{seq[:i].count(pair)}") if seq.count(pair) > 1 else pair for i, pair in enumerate(seq) ] diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 383ee2c53f0ae..c891ac2fcfc51 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -304,8 +304,7 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) # well. Previously this raised an internal ValueError. We might # support it someday, so raise a NotImplementedError. raise NotImplementedError( - "Cannot apply ufunc {} to mixed DataFrame and Series " - "inputs.".format(ufunc) + f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs." ) axes = self.axes for obj in alignable[1:]: diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 74fc15a6fad63..35f1ace7ec351 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -252,16 +252,11 @@ def _validate_searchsorted_value( @doc(ExtensionArray.shift) def shift(self, periods: int = 1, fill_value=None, axis: AxisInt = 0): - fill_value = self._validate_shift_value(fill_value) + fill_value = self._validate_scalar(fill_value) new_values = shift(self._ndarray, periods, axis, fill_value) return self._from_backing_data(new_values) - def _validate_shift_value(self, fill_value): - # TODO(2.0): after deprecation in datetimelikearraymixin is enforced, - # we can remove this and use validate_fill_value directly - return self._validate_scalar(fill_value) - def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) value = self._validate_setitem_value(value) @@ -309,7 +304,6 @@ def _fill_mask_inplace( # (for now) when self.ndim == 2, we assume axis=0 func = missing.get_fill_func(method, ndim=self.ndim) func(self._ndarray.T, limit=limit, mask=mask.T) - return @doc(ExtensionArray.fillna) def fillna( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index f18664915d015..945ae52c53047 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -20,10 +20,7 @@ pa_version_under6p0, pa_version_under7p0, ) -from pandas.util._decorators import ( - deprecate_nonkeyword_arguments, - doc, -) +from pandas.util._decorators import doc from pandas.core.dtypes.common import ( is_array_like, @@ -452,13 +449,12 @@ def isna(self) -> npt.NDArray[np.bool_]: """ return self._data.is_null().to_numpy() - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def argsort( self, + *, ascending: bool = True, kind: SortKind = "quicksort", na_position: str = "last", - *args, **kwargs, ) -> np.ndarray: order = "ascending" if ascending else "descending" diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9758ca84d236b..cc9b2ce3fed42 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -48,7 +48,6 @@ Appender, Substitution, cache_readonly, - deprecate_nonkeyword_arguments, ) from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( @@ -662,13 +661,12 @@ def _values_for_argsort(self) -> np.ndarray: # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. return np.array(self) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def argsort( self, + *, ascending: bool = True, kind: SortKind = "quicksort", na_position: str = "last", - *args, **kwargs, ) -> np.ndarray: """ @@ -699,7 +697,7 @@ def argsort( # 1. _values_for_argsort : construct the values passed to np.argsort # 2. argsort : total control over sorting. In case of overriding this, # it is recommended to also override argmax/argmin - ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) values = self._values_for_argsort() return nargsort( @@ -1558,7 +1556,6 @@ def _fill_mask_inplace( func(npvalues, limit=limit, mask=mask.copy()) new_values = self._from_sequence(npvalues, dtype=self.dtype) self[mask] = new_values[mask] - return def _rank( self, diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9326b84f8e3be..4cacdb71f4175 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -15,11 +15,6 @@ cast, overload, ) -from warnings import ( - catch_warnings, - simplefilter, - warn, -) import numpy as np @@ -31,10 +26,6 @@ lib, ) from pandas._libs.arrays import NDArrayBacked -from pandas._libs.lib import ( - NoDefault, - no_default, -) from pandas._typing import ( ArrayLike, AstypeArg, @@ -48,8 +39,6 @@ type_t, ) from pandas.compat.numpy import function as nv -from pandas.util._decorators import deprecate_nonkeyword_arguments -from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -87,6 +76,7 @@ ) from pandas.core import ( + algorithms, arraylike, ops, ) @@ -94,7 +84,6 @@ PandasDelegate, delegate_names, ) -import pandas.core.algorithms as algorithms from pandas.core.algorithms import ( factorize, take_nd, @@ -133,7 +122,7 @@ def _cat_compare_op(op): opname = f"__{op.__name__}__" - fill_value = True if op is operator.ne else False + fill_value = op is operator.ne @unpack_zerodim_and_defer(opname) def func(self, other): @@ -393,13 +382,7 @@ def __init__( if not is_list_like(values): # GH#38433 - warn( - "Allowing scalars in the Categorical constructor is deprecated " - "and will raise in a future version. Use `[value]` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - values = [values] + raise TypeError("Categorical input must be list-like") # null_mask indicates missing values we want to exclude from inference. # This means: only missing values in list-likes (not arrays/ndframes). @@ -729,8 +712,6 @@ def categories(self) -> Index: unique and the number of items in the new categories must be the same as the number of items in the old categories. - Assigning to `categories` is a inplace operation! - Raises ------ ValueError @@ -748,17 +729,6 @@ def categories(self) -> Index: """ return self.dtype.categories - @categories.setter - def categories(self, categories) -> None: - warn( - "Setting categories in-place is deprecated and will raise in a " - "future version. Use rename_categories instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - self._set_categories(categories) - @property def ordered(self) -> Ordered: """ @@ -839,24 +809,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Categorical: codes = recode_for_categories(self.codes, self.categories, dtype.categories) return type(self)(codes, dtype=dtype, fastpath=True) - @overload - def set_ordered( - self, value, *, inplace: NoDefault | Literal[False] = ... - ) -> Categorical: - ... - - @overload - def set_ordered(self, value, *, inplace: Literal[True]) -> None: - ... - - @overload - def set_ordered(self, value, *, inplace: bool) -> Categorical | None: - ... - - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) - def set_ordered( - self, value, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def set_ordered(self, value: bool) -> Categorical: """ Set the ordered attribute to the boolean value. @@ -864,98 +817,35 @@ def set_ordered( ---------- value : bool Set whether this categorical is ordered (True) or not (False). - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to the value. - - .. deprecated:: 1.5.0 - """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "set_ordered is deprecated and will be removed in " - "a future version. setting ordered-ness on categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") new_dtype = CategoricalDtype(self.categories, ordered=value) - cat = self if inplace else self.copy() + cat = self.copy() NDArrayBacked.__init__(cat, cat._ndarray, new_dtype) - if not inplace: - return cat - return None - - @overload - def as_ordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical: - ... - - @overload - def as_ordered(self, *, inplace: Literal[True]) -> None: - ... + return cat - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def as_ordered(self, inplace: bool | NoDefault = no_default) -> Categorical | None: + def as_ordered(self) -> Categorical: """ Set the Categorical to be ordered. - Parameters - ---------- - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to True. - - .. deprecated:: 1.5.0 - Returns ------- - Categorical or None - Ordered Categorical or None if ``inplace=True``. + Categorical + Ordered Categorical. """ - if inplace is not no_default: - inplace = validate_bool_kwarg(inplace, "inplace") - return self.set_ordered(True, inplace=inplace) - - @overload - def as_unordered(self, *, inplace: NoDefault | Literal[False] = ...) -> Categorical: - ... + return self.set_ordered(True) - @overload - def as_unordered(self, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def as_unordered( - self, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def as_unordered(self) -> Categorical: """ Set the Categorical to be unordered. - Parameters - ---------- - inplace : bool, default False - Whether or not to set the ordered attribute in-place or return - a copy of this categorical with ordered set to False. - - .. deprecated:: 1.5.0 - Returns ------- - Categorical or None - Unordered Categorical or None if ``inplace=True``. + Categorical + Unordered Categorical. """ - if inplace is not no_default: - inplace = validate_bool_kwarg(inplace, "inplace") - return self.set_ordered(False, inplace=inplace) + return self.set_ordered(False) - def set_categories( - self, new_categories, ordered=None, rename: bool = False, inplace=no_default - ): + def set_categories(self, new_categories, ordered=None, rename: bool = False): """ Set the categories to the specified new_categories. @@ -985,15 +875,10 @@ def set_categories( rename : bool, default False Whether or not the new_categories should be considered as a rename of the old categories or as reordered categories. - inplace : bool, default False - Whether or not to reorder the categories in-place or return a copy - of this categorical with reordered categories. - - .. deprecated:: 1.3.0 Returns ------- - Categorical with reordered categories or None if inplace. + Categorical with reordered categories. Raises ------ @@ -1008,24 +893,12 @@ def set_categories( remove_categories : Remove the specified categories. remove_unused_categories : Remove categories which are not used. """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "set_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - inplace = validate_bool_kwarg(inplace, "inplace") if ordered is None: ordered = self.dtype.ordered new_dtype = CategoricalDtype(new_categories, ordered=ordered) - cat = self if inplace else self.copy() + cat = self.copy() if rename: if cat.dtype.categories is not None and len(new_dtype.categories) < len( cat.dtype.categories @@ -1038,26 +911,9 @@ def set_categories( cat.codes, cat.categories, new_dtype.categories ) NDArrayBacked.__init__(cat, codes, new_dtype) + return cat - if not inplace: - return cat - - @overload - def rename_categories( - self, new_categories, *, inplace: Literal[False] | NoDefault = ... - ) -> Categorical: - ... - - @overload - def rename_categories(self, new_categories, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments( - version=None, allowed_args=["self", "new_categories"] - ) - def rename_categories( - self, new_categories, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def rename_categories(self, new_categories) -> Categorical: """ Rename categories. @@ -1078,16 +934,10 @@ def rename_categories( * callable : a callable that is called on all items in the old categories and whose return values comprise the new categories. - inplace : bool, default False - Whether or not to rename the categories inplace or return a copy of - this categorical with renamed categories. - - .. deprecated:: 1.3.0 - Returns ------- - cat : Categorical or None - Categorical with removed categories or None if ``inplace=True``. + cat : Categorical + Categorical with renamed categories. Raises ------ @@ -1123,32 +973,19 @@ def rename_categories( ['A', 'A', 'B'] Categories (2, object): ['A', 'B'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "rename_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") - cat = self if inplace else self.copy() if is_dict_like(new_categories): - new_categories = [new_categories.get(item, item) for item in cat.categories] + new_categories = [ + new_categories.get(item, item) for item in self.categories + ] elif callable(new_categories): - new_categories = [new_categories(item) for item in cat.categories] + new_categories = [new_categories(item) for item in self.categories] + cat = self.copy() cat._set_categories(new_categories) - if not inplace: - return cat - return None + return cat - def reorder_categories(self, new_categories, ordered=None, inplace=no_default): + def reorder_categories(self, new_categories, ordered=None): """ Reorder categories as specified in new_categories. @@ -1162,16 +999,11 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default): ordered : bool, optional Whether or not the categorical is treated as a ordered categorical. If not given, do not change the ordered information. - inplace : bool, default False - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. - - .. deprecated:: 1.3.0 Returns ------- - cat : Categorical or None - Categorical with removed categories or None if ``inplace=True``. + cat : Categorical + Categorical with reordered categories. Raises ------ @@ -1187,44 +1019,13 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default): remove_unused_categories : Remove categories which are not used. set_categories : Set the categories to the specified ones. """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "reorder_categories is deprecated and will be removed in " - "a future version. Reordering categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") if set(self.dtype.categories) != set(new_categories): raise ValueError( "items in new_categories are not the same as in old categories" ) + return self.set_categories(new_categories, ordered=ordered) - with catch_warnings(): - simplefilter("ignore") - return self.set_categories(new_categories, ordered=ordered, inplace=inplace) - - @overload - def add_categories( - self, new_categories, *, inplace: Literal[False] | NoDefault = ... - ) -> Categorical: - ... - - @overload - def add_categories(self, new_categories, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments( - version=None, allowed_args=["self", "new_categories"] - ) - def add_categories( - self, new_categories, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def add_categories(self, new_categories) -> Categorical: """ Add new categories. @@ -1235,16 +1036,11 @@ def add_categories( ---------- new_categories : category or list-like of category The new categories to be included. - inplace : bool, default False - Whether or not to add the categories inplace or return a copy of - this categorical with added categories. - - .. deprecated:: 1.3.0 Returns ------- - cat : Categorical or None - Categorical with new categories added or None if ``inplace=True``. + cat : Categorical + Categorical with new categories added. Raises ------ @@ -1271,19 +1067,7 @@ def add_categories( ['c', 'b', 'c'] Categories (4, object): ['b', 'c', 'd', 'a'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "add_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - inplace = validate_bool_kwarg(inplace, "inplace") if not is_list_like(new_categories): new_categories = [new_categories] already_included = set(new_categories) & set(self.dtype.categories) @@ -1305,15 +1089,12 @@ def add_categories( new_categories = list(self.dtype.categories) + list(new_categories) new_dtype = CategoricalDtype(new_categories, self.ordered) - - cat = self if inplace else self.copy() + cat = self.copy() codes = coerce_indexer_dtype(cat._ndarray, new_dtype.categories) NDArrayBacked.__init__(cat, codes, new_dtype) - if not inplace: - return cat - return None + return cat - def remove_categories(self, removals, inplace=no_default): + def remove_categories(self, removals): """ Remove the specified categories. @@ -1324,16 +1105,11 @@ def remove_categories(self, removals, inplace=no_default): ---------- removals : category or list of categories The categories which should be removed. - inplace : bool, default False - Whether or not to remove the categories inplace or return a copy of - this categorical with removed categories. - - .. deprecated:: 1.3.0 Returns ------- - cat : Categorical or None - Categorical with removed categories or None if ``inplace=True``. + cat : Categorical + Categorical with removed categories. Raises ------ @@ -1359,19 +1135,6 @@ def remove_categories(self, removals, inplace=no_default): [NaN, 'c', 'b', 'c', NaN] Categories (2, object): ['b', 'c'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "remove_categories is deprecated and will be removed in " - "a future version. Removing unused categories will always " - "return a new Categorical object.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") if not is_list_like(removals): removals = [removals] @@ -1387,41 +1150,16 @@ def remove_categories(self, removals, inplace=no_default): if len(not_included) != 0: raise ValueError(f"removals must all be in old categories: {not_included}") - with catch_warnings(): - simplefilter("ignore") - return self.set_categories( - new_categories, ordered=self.ordered, rename=False, inplace=inplace - ) + return self.set_categories(new_categories, ordered=self.ordered, rename=False) - @overload - def remove_unused_categories( - self, *, inplace: Literal[False] | NoDefault = ... - ) -> Categorical: - ... - - @overload - def remove_unused_categories(self, *, inplace: Literal[True]) -> None: - ... - - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def remove_unused_categories( - self, inplace: bool | NoDefault = no_default - ) -> Categorical | None: + def remove_unused_categories(self) -> Categorical: """ Remove categories which are not used. - Parameters - ---------- - inplace : bool, default False - Whether or not to drop unused categories inplace or return a copy of - this categorical with unused categories dropped. - - .. deprecated:: 1.2.0 - Returns ------- - cat : Categorical or None - Categorical with unused categories dropped or None if ``inplace=True``. + cat : Categorical + Categorical with unused categories dropped. See Also -------- @@ -1448,33 +1186,20 @@ def remove_unused_categories( ['a', 'c', 'a', 'c', 'c'] Categories (2, object): ['a', 'c'] """ - if inplace is not no_default: - warn( - "The `inplace` parameter in pandas.Categorical." - "remove_unused_categories is deprecated and " - "will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - inplace = validate_bool_kwarg(inplace, "inplace") - cat = self if inplace else self.copy() - idx, inv = np.unique(cat._codes, return_inverse=True) + idx, inv = np.unique(self._codes, return_inverse=True) if idx.size != 0 and idx[0] == -1: # na sentinel idx, inv = idx[1:], inv - 1 - new_categories = cat.dtype.categories.take(idx) + new_categories = self.dtype.categories.take(idx) new_dtype = CategoricalDtype._from_fastpath( new_categories, ordered=self.ordered ) new_codes = coerce_indexer_dtype(inv, new_dtype.categories) + + cat = self.copy() NDArrayBacked.__init__(cat, new_codes, new_dtype) - if not inplace: - return cat - return None + return cat # ------------------------------------------------------------------ @@ -1836,10 +1561,8 @@ def check_for_ordered(self, op) -> None: "Categorical to an ordered one\n" ) - # error: Signature of "argsort" incompatible with supertype "ExtensionArray" - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def argsort( # type: ignore[override] - self, ascending: bool = True, kind: SortKind = "quicksort", **kwargs + def argsort( + self, *, ascending: bool = True, kind: SortKind = "quicksort", **kwargs ): """ Return the indices that would sort the Categorical. @@ -1909,9 +1632,12 @@ def sort_values( ) -> None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def sort_values( - self, inplace: bool = False, ascending: bool = True, na_position: str = "last" + self, + *, + inplace: bool = False, + ascending: bool = True, + na_position: str = "last", ) -> Categorical | None: """ Sort the Categorical by category value returning a new @@ -2555,40 +2281,40 @@ def _replace(self, *, to_replace, value, inplace: bool = False): inplace = validate_bool_kwarg(inplace, "inplace") cat = self if inplace else self.copy() - # build a dict of (to replace -> value) pairs - if is_list_like(to_replace): - # if to_replace is list-like and value is scalar - replace_dict = {replace_value: value for replace_value in to_replace} - else: - # if both to_replace and value are scalar - replace_dict = {to_replace: value} - # other cases, like if both to_replace and value are list-like or if # to_replace is a dict, are handled separately in NDFrame - for replace_value, new_value in replace_dict.items(): - if new_value == replace_value: + if not is_list_like(to_replace): + to_replace = [to_replace] + + categories = cat.categories.tolist() + removals = set() + for replace_value in to_replace: + if value == replace_value: + continue + if replace_value not in cat.categories: + continue + if isna(value): + removals.add(replace_value) continue - if replace_value in cat.categories: - if isna(new_value): - with catch_warnings(): - simplefilter("ignore") - cat.remove_categories(replace_value, inplace=True) - continue - - categories = cat.categories.tolist() - index = categories.index(replace_value) - - if new_value in cat.categories: - value_index = categories.index(new_value) - cat._codes[cat._codes == index] = value_index - with catch_warnings(): - simplefilter("ignore") - cat.remove_categories(replace_value, inplace=True) - else: - categories[index] = new_value - with catch_warnings(): - simplefilter("ignore") - cat.rename_categories(categories, inplace=True) + + index = categories.index(replace_value) + + if value in cat.categories: + value_index = categories.index(value) + cat._codes[cat._codes == index] = value_index + removals.add(replace_value) + else: + categories[index] = value + cat._set_categories(categories) + + if len(removals): + new_categories = [c for c in categories if c not in removals] + new_dtype = CategoricalDtype(new_categories, ordered=self.dtype.ordered) + codes = recode_for_categories( + cat.codes, cat.categories, new_dtype.categories + ) + NDArrayBacked.__init__(cat, codes, new_dtype) + if not inplace: return cat @@ -2638,10 +2364,6 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): """ Accessor object for categorical properties of the Series values. - Be aware that assigning to `categories` is a inplace operation, while all - methods return new categorical data per default (but can be called with - `inplace=True`). - Parameters ---------- data : Series or CategoricalIndex diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bcf4b5d58bf74..e82045eee6143 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -217,7 +217,7 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT: raise AbstractMethodError(self) def _unbox_scalar( - self, value: DTScalarOrNaT, setitem: bool = False + self, value: DTScalarOrNaT ) -> np.int64 | np.datetime64 | np.timedelta64: """ Unbox the integer value of a scalar `value`. @@ -226,8 +226,6 @@ def _unbox_scalar( ---------- value : Period, Timestamp, Timedelta, or NaT Depending on subclass. - setitem : bool, default False - Whether to check compatibility with setitem strictness. Returns ------- @@ -240,9 +238,7 @@ def _unbox_scalar( """ raise AbstractMethodError(self) - def _check_compatible_with( - self, other: DTScalarOrNaT, setitem: bool = False - ) -> None: + def _check_compatible_with(self, other: DTScalarOrNaT) -> None: """ Verify that `self` and `other` are compatible. @@ -255,9 +251,6 @@ def _check_compatible_with( Parameters ---------- other - setitem : bool, default False - For __setitem__ we may have stricter compatibility restrictions than - for comparisons. Raises ------ @@ -438,7 +431,6 @@ def astype(self, dtype, copy: bool = True): converted = ints_to_pydatetime( i8data, tz=self.tz, - freq=self.freq, box="timestamp", reso=self._creso, ) @@ -603,37 +595,6 @@ def _validate_comparison_value(self, other): return other - def _validate_shift_value(self, fill_value): - # TODO(2.0): once this deprecation is enforced, use _validate_scalar - if is_valid_na_for_dtype(fill_value, self.dtype): - fill_value = NaT - elif isinstance(fill_value, self._recognized_scalars): - fill_value = self._scalar_type(fill_value) - else: - new_fill: DatetimeLikeScalar - - # only warn if we're not going to raise - if self._scalar_type is Period and lib.is_integer(fill_value): - # kludge for #31971 since Period(integer) tries to cast to str - new_fill = Period._from_ordinal(fill_value, freq=self.freq) - else: - new_fill = self._scalar_type(fill_value) - - # stacklevel here is chosen to be correct when called from - # DataFrame.shift or Series.shift - warnings.warn( - f"Passing {type(fill_value)} to shift is deprecated and " - "will raise in a future version, pass " - f"{self._scalar_type.__name__} instead.", - FutureWarning, - # There is no way to hard-code the level since this might be - # reached directly or called from the Index or Block method - stacklevel=find_stack_level(), - ) - fill_value = new_fill - - return self._unbox(fill_value, setitem=True) - def _validate_scalar( self, value, @@ -694,7 +655,7 @@ def _validate_scalar( # this option exists to prevent a performance hit in # TimedeltaIndex.get_loc return value - return self._unbox_scalar(value, setitem=setitem) + return self._unbox_scalar(value) def _validation_error_message(self, value, allow_listlike: bool = False) -> str: """ @@ -788,19 +749,18 @@ def _validate_setitem_value(self, value): else: return self._validate_scalar(value, allow_listlike=True) - return self._unbox(value, setitem=True) + return self._unbox(value) - def _unbox( - self, other, setitem: bool = False - ) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: + @final + def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: """ Unbox either a scalar with _unbox_scalar or an instance of our own type. """ if lib.is_scalar(other): - other = self._unbox_scalar(other, setitem=setitem) + other = self._unbox_scalar(other) else: # same type as self - self._check_compatible_with(other, setitem=setitem) + self._check_compatible_with(other) other = other._ndarray return other @@ -2303,7 +2263,7 @@ def validate_inferred_freq( "values does not conform to passed frequency " f"{freq.freqstr}" ) - elif freq is None: + if freq is None: freq = inferred_freq freq_infer = False diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 07d689d737c87..71002377293b7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -216,8 +216,6 @@ def _scalar_type(self) -> type[Timestamp]: "hour", "minute", "second", - "weekofyear", - "week", "weekday", "dayofweek", "day_of_week", @@ -294,6 +292,7 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): def _from_sequence_not_strict( cls, data, + *, dtype=None, copy: bool = False, tz=lib.no_default, @@ -302,6 +301,9 @@ def _from_sequence_not_strict( yearfirst: bool = False, ambiguous: TimeAmbiguous = "raise", ): + """ + A non-strict version of _from_sequence, called from DatetimeIndex.__new__. + """ explicit_none = freq is None freq = freq if freq is not lib.no_default else None freq, freq_infer = dtl.maybe_infer_freq(freq) @@ -482,36 +484,19 @@ def _generate_range( # type: ignore[override] # ----------------------------------------------------------------- # DatetimeLike Interface - def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: + def _unbox_scalar(self, value) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") - self._check_compatible_with(value, setitem=setitem) + self._check_compatible_with(value) return value.asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: return Timestamp(value, tz=self.tz) - def _check_compatible_with(self, other, setitem: bool = False): + def _check_compatible_with(self, other) -> None: if other is NaT: return self._assert_tzawareness_compat(other) - if setitem: - # Stricter check for setitem vs comparison methods - if self.tz is not None and not timezones.tz_compare(self.tz, other.tz): - # TODO(2.0): remove this check. GH#37605 - warnings.warn( - "Setitem-like behavior with mismatched timezones is deprecated " - "and will change in a future version. Instead of raising " - "(or for Index, Series, and DataFrame methods, coercing to " - "object dtype), the value being set (or passed as a " - "fill_value, or inserted) will be cast to the existing " - "DatetimeArray/DatetimeIndex/Series/DataFrame column's " - "timezone. To retain the old behavior, explicitly cast to " - "object dtype before the operation.", - FutureWarning, - stacklevel=find_stack_level(), - ) - raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'") # ----------------------------------------------------------------- # Descriptive Properties @@ -520,12 +505,6 @@ def _box_func(self, x: np.datetime64) -> Timestamp | NaTType: # GH#42228 value = x.view("i8") ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz) - # Non-overlapping identity check (left operand type: "Timestamp", - # right operand type: "NaTType") - if ts is not NaT: # type: ignore[comparison-overlap] - # GH#41586 - # do this instead of passing to the constructor to avoid FutureWarning - ts._set_freq(self.freq) return ts @property @@ -626,7 +605,6 @@ def __iter__(self) -> Iterator: converted = ints_to_pydatetime( data[start_i:end_i], tz=self.tz, - freq=self.freq, box="timestamp", reso=self._creso, ) @@ -685,15 +663,10 @@ def astype(self, dtype, copy: bool = True): and dtype != self.dtype and is_unitless(dtype) ): - # TODO(2.0): just fall through to dtl.DatetimeLikeArrayMixin.astype - warnings.warn( - "Passing unit-less datetime64 dtype to .astype is deprecated " - "and will raise in a future version. Pass 'datetime64[ns]' instead", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + "Casting to unit-less dtype 'datetime64' is not supported. " + "Pass e.g. 'datetime64[ns]' instead." ) - # unit conversion e.g. datetime64[s] - return self._ndarray.astype(dtype) elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) @@ -1365,32 +1338,6 @@ def isocalendar(self) -> DataFrame: iso_calendar_df.iloc[self._isnan] = None return iso_calendar_df - @property - def weekofyear(self): - """ - The week ordinal of the year. - - .. deprecated:: 1.1.0 - - weekofyear and week have been deprecated. - Please use DatetimeIndex.isocalendar().week instead. - """ - warnings.warn( - "weekofyear and week have been deprecated, please use " - "DatetimeIndex.isocalendar().week instead, which returns " - "a Series. To exactly reproduce the behavior of week and " - "weekofyear and return an Index, you may call " - "pd.Int64Index(idx.isocalendar().week)", - FutureWarning, - stacklevel=find_stack_level(), - ) - week_series = self.isocalendar().week - if week_series.hasnans: - return week_series.to_numpy(dtype="float64", na_value=np.nan) - return week_series.to_numpy(dtype="int64") - - week = weekofyear - year = _field_accessor( "year", "Y", @@ -1985,14 +1932,13 @@ def std( # Constructor Helpers -def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray: +def sequence_to_datetimes(data) -> DatetimeArray: """ Parse/convert the passed data to either DatetimeArray or np.ndarray[object]. """ result, tz, freq = _sequence_to_dt64ns( data, allow_mixed=True, - require_iso8601=require_iso8601, ) unit = np.datetime_data(result.dtype)[0] @@ -2010,7 +1956,6 @@ def _sequence_to_dt64ns( yearfirst: bool = False, ambiguous: TimeAmbiguous = "raise", allow_mixed: bool = False, - require_iso8601: bool = False, ): """ Parameters @@ -2024,8 +1969,6 @@ def _sequence_to_dt64ns( See pandas._libs.tslibs.tzconversion.tz_localize_to_utc. allow_mixed : bool, default False Interpret integers as timestamps when datetime objects are also present. - require_iso8601 : bool, default False - Only consider ISO-8601 formats when parsing strings. Returns ------- @@ -2063,6 +2006,11 @@ def _sequence_to_dt64ns( copy = False if lib.infer_dtype(data, skipna=False) == "integer": data = data.astype(np.int64) + elif tz is not None and ambiguous == "raise": + # TODO: yearfirst/dayfirst/etc? + obj_data = np.asarray(data, dtype=object) + i8data = tslib.array_to_datetime_with_tz(obj_data, tz) + return i8data.view(DT64NS_DTYPE), tz, None else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times @@ -2072,7 +2020,6 @@ def _sequence_to_dt64ns( yearfirst=yearfirst, allow_object=False, allow_mixed=allow_mixed, - require_iso8601=require_iso8601, ) if tz and inferred_tz: # two timezones: convert to intended from base UTC repr @@ -2123,10 +2070,15 @@ def _sequence_to_dt64ns( # Convert tz-naive to UTC # TODO: if tz is UTC, are there situations where we *don't* want a # copy? tz_localize_to_utc always makes one. + shape = data.shape + if data.ndim > 1: + data = data.ravel() + data = tzconversion.tz_localize_to_utc( data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit ) data = data.view(new_dtype) + data = data.reshape(shape) assert data.dtype == new_dtype, data.dtype result = data @@ -2259,27 +2211,12 @@ def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None): return data, copy if is_float_dtype(data.dtype): - # Note: we must cast to datetime64[ns] here in order to treat these - # as wall-times instead of UTC timestamps. - data = data.astype(DT64NS_DTYPE) + # pre-2.0 we treated these as wall-times, inconsistent with ints + # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes. + # Note: data.astype(np.int64) fails ARM tests, see + # https://github.com/pandas-dev/pandas/issues/49468. + data = data.astype("M8[ns]").view("i8") copy = False - if ( - tz is not None - and len(data) > 0 - and not timezones.is_utc(timezones.maybe_get_tz(tz)) - ): - # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes - warnings.warn( - "The behavior of DatetimeArray._from_sequence with a timezone-aware " - "dtype and floating-dtype data is deprecated. In a future version, " - "this data will be interpreted as nanosecond UTC timestamps " - "instead of wall-times, matching the behavior with integer dtypes. " - "To retain the old behavior, explicitly cast to 'datetime64[ns]' " - "before passing the data to pandas. To get the future behavior, " - "first cast to 'int64'.", - FutureWarning, - stacklevel=find_stack_level(), - ) elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype): # GH#29794 enforcing deprecation introduced in GH#23539 @@ -2424,7 +2361,7 @@ def validate_tz_from_dtype( if dtz is not None: if tz is not None and not timezones.tz_compare(tz, dtz): raise ValueError("cannot supply both a tz and a dtype with a tz") - elif explicit_tz_none: + if explicit_tz_none: raise ValueError("Cannot pass both a timezone-aware dtype and tz=None") tz = dtz diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 2c15a7bbc88a7..77e2fdac26da9 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -43,10 +43,7 @@ ) from pandas.compat.numpy import function as nv from pandas.errors import IntCastingNaNError -from pandas.util._decorators import ( - Appender, - deprecate_nonkeyword_arguments, -) +from pandas.util._decorators import Appender from pandas.core.dtypes.cast import LossySetitemError from pandas.core.dtypes.common import ( @@ -317,17 +314,17 @@ def _simple_new( f"right [{type(right).__name__}] types" ) raise ValueError(msg) - elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): + if is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): # GH 19016 msg = ( "category, object, and string subtypes are not supported " "for IntervalArray" ) raise TypeError(msg) - elif isinstance(left, ABCPeriodIndex): + if isinstance(left, ABCPeriodIndex): msg = "Period dtypes are not supported, use a PeriodIndex instead" raise ValueError(msg) - elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): + if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): msg = ( "left and right must have the same time zone, got " f"'{left.tz}' and '{right.tz}'" @@ -386,7 +383,8 @@ def _from_factorized( Left and right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both - or neither. + or neither.\ + %(name)s copy : bool, default False Copy the data. dtype : dtype or None, default None @@ -411,6 +409,7 @@ def _from_factorized( _interval_shared_docs["from_breaks"] % { "klass": "IntervalArray", + "name": "", "examples": textwrap.dedent( """\ Examples @@ -446,7 +445,8 @@ def from_breaks( Right bounds for each interval. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both - or neither. + or neither.\ + %(name)s copy : bool, default False Copy the data. dtype : dtype, optional @@ -488,6 +488,7 @@ def from_breaks( _interval_shared_docs["from_arrays"] % { "klass": "IntervalArray", + "name": "", "examples": textwrap.dedent( """\ >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) @@ -523,7 +524,8 @@ def from_arrays( Array of tuples. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both - or neither. + or neither.\ + %(name)s copy : bool, default False By-default copy the data, this is compat only and ignored. dtype : dtype or None, default None @@ -550,6 +552,7 @@ def from_arrays( _interval_shared_docs["from_tuples"] % { "klass": "IntervalArray", + "name": "", "examples": textwrap.dedent( """\ Examples @@ -796,16 +799,15 @@ def __lt__(self, other): def __le__(self, other): return self._cmp_method(other, operator.le) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def argsort( self, + *, ascending: bool = True, kind: SortKind = "quicksort", na_position: str = "last", - *args, **kwargs, ) -> np.ndarray: - ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) if ascending and kind == "quicksort" and na_position == "last": return np.lexsort((self.right, self.left)) @@ -1319,7 +1321,7 @@ def mid(self) -> Index: def overlaps(self, other): if isinstance(other, (IntervalArray, ABCIntervalIndex)): raise NotImplementedError - elif not isinstance(other, Interval): + if not isinstance(other, Interval): msg = f"`other` must be Interval-like, got {type(other).__name__}" raise TypeError(msg) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 746175ee3374d..2727d5c82ac83 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -655,7 +655,7 @@ def _arith_method(self, other, op): raise NotImplementedError( f"operator '{op_name}' not implemented for bool dtypes" ) - elif op_name in {"mod", "rmod"}: + if op_name in {"mod", "rmod"}: dtype = "int8" else: dtype = "bool" @@ -1034,7 +1034,7 @@ def _quantile( # I think this should be out_mask=self.isna().all(axis=1) # but am holding off until we have tests raise NotImplementedError - elif self.isna().all(): + if self.isna().all(): out_mask = np.ones(res.shape, dtype=bool) else: out_mask = np.zeros(res.shape, dtype=bool) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5e1b0c4b18718..f7808a729fa0a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -328,13 +328,12 @@ def _generate_range(cls, start, end, periods, freq, fields): def _unbox_scalar( # type: ignore[override] self, value: Period | NaTType, - setitem: bool = False, ) -> np.int64: if value is NaT: # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value" return np.int64(value.value) # type: ignore[union-attr] elif isinstance(value, self._scalar_type): - self._check_compatible_with(value, setitem=setitem) + self._check_compatible_with(value) return np.int64(value.ordinal) else: raise ValueError(f"'value' should be a Period. Got '{value}' instead.") @@ -342,7 +341,7 @@ def _unbox_scalar( # type: ignore[override] def _scalar_from_string(self, value: str) -> Period: return Period(value, freq=self.freq) - def _check_compatible_with(self, other, setitem: bool = False) -> None: + def _check_compatible_with(self, other) -> None: if other is NaT: return self._require_matching_freq(other) @@ -1168,7 +1167,7 @@ def _make_field_arrays(*fields) -> list[np.ndarray]: if isinstance(x, (list, np.ndarray, ABCSeries)): if length is not None and len(x) != length: raise ValueError("Mismatched Period array lengths") - elif length is None: + if length is None: length = len(x) # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 38d3e0d73ef2c..93d6ac0ef6e06 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -78,7 +78,10 @@ notna, ) -from pandas.core import arraylike +from pandas.core import ( + arraylike, + ops, +) import pandas.core.algorithms as algos from pandas.core.array_algos.quantile import quantile_with_mask from pandas.core.arraylike import OpsMixin @@ -96,9 +99,8 @@ ) from pandas.core.missing import interpolate_2d from pandas.core.nanops import check_below_min_count -import pandas.core.ops as ops -import pandas.io.formats.printing as printing +from pandas.io.formats import printing # See https://github.com/python/typing/issues/684 if TYPE_CHECKING: @@ -118,6 +120,8 @@ class ellipsis(Enum): SparseIndexKind = Literal["integer", "block"] + from pandas.core.dtypes.dtypes import ExtensionDtype + from pandas import Series else: @@ -777,7 +781,7 @@ def fillna( ): raise ValueError("Must specify one of 'method' or 'value'.") - elif method is not None: + if method is not None: msg = "fillna with 'method' requires high memory usage." warnings.warn( msg, @@ -1168,8 +1172,7 @@ def _take_without_fill(self: SparseArrayT, indices) -> SparseArrayT: if (indices.max() >= n) or (indices.min() < -n): if n == 0: raise IndexError("cannot do a non-empty take from an empty axes.") - else: - raise IndexError("out of bounds value in 'indices'.") + raise IndexError("out of bounds value in 'indices'.") if to_shift.any(): indices = indices.copy() @@ -1326,14 +1329,13 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): future_dtype = pandas_dtype(dtype) if not isinstance(future_dtype, SparseDtype): # GH#34457 - warnings.warn( - "The behavior of .astype from SparseDtype to a non-sparse dtype " - "is deprecated. In a future version, this will return a non-sparse " - "array with the requested dtype. To retain the old behavior, use " - "`obj.astype(SparseDtype(dtype))`", - FutureWarning, - stacklevel=find_stack_level(), - ) + if isinstance(future_dtype, np.dtype): + values = np.array(self) + return astype_nansafe(values, dtype=future_dtype) + else: + dtype = cast(ExtensionDtype, dtype) + cls = dtype.construct_array_type() + return cls._from_sequence(self, dtype=dtype, copy=copy) dtype = self.dtype.update_dtype(dtype) subtype = pandas_dtype(dtype._subtype_with_str) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 92b9222cfc9bc..65996b1df5e9a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -45,7 +45,6 @@ from pandas.compat.numpy import function as nv from pandas.util._validators import validate_endpoints -from pandas.core.dtypes.astype import astype_td64_unit_conversion from pandas.core.dtypes.common import ( TD64NS_DTYPE, is_dtype_equal, @@ -215,11 +214,15 @@ def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> TimedeltaArr def _from_sequence_not_strict( cls, data, + *, dtype=None, copy: bool = False, freq=lib.no_default, unit=None, ) -> TimedeltaArray: + """ + A non-strict version of _from_sequence, called from TimedeltaIndex.__new__. + """ if dtype: dtype = _validate_td64_dtype(dtype) @@ -288,20 +291,19 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # DatetimeLike Interface - def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: + def _unbox_scalar(self, value) -> np.timedelta64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timedelta.") - self._check_compatible_with(value, setitem=setitem) + self._check_compatible_with(value) if value is NaT: return np.timedelta64(value.value, "ns") else: return value._as_unit(self._unit).asm8 - return np.timedelta64(value.value, "ns") def _scalar_from_string(self, value) -> Timedelta | NaTType: return Timedelta(value) - def _check_compatible_with(self, other, setitem: bool = False) -> None: + def _check_compatible_with(self, other) -> None: # we don't have anything to validate. pass @@ -327,8 +329,11 @@ def astype(self, dtype, copy: bool = True): return type(self)._simple_new( res_values, dtype=res_values.dtype, freq=self.freq ) - - return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy) + else: + raise ValueError( + f"Cannot convert from {self.dtype} to {dtype}. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) @@ -485,7 +490,7 @@ def __truediv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): # let numpy handle it return self._ndarray / other @@ -549,7 +554,7 @@ def __rtruediv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): # let numpy handle it return other / self._ndarray @@ -601,7 +606,7 @@ def __floordiv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -670,7 +675,7 @@ def __rfloordiv__(self, other): if len(other) != len(self): raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate # on the i8 values diff --git a/pandas/core/base.py b/pandas/core/base.py index 4b147dc619692..afcab23e130cd 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,11 +17,10 @@ final, overload, ) -import warnings import numpy as np -import pandas._libs.lib as lib +from pandas._libs import lib from pandas._typing import ( Axis, AxisInt, @@ -38,7 +37,6 @@ cache_readonly, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -71,7 +69,6 @@ from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray from pandas.core.construction import ( - create_series_with_explicit_dtype, ensure_wrapped_if_datetimelike, extract_array, ) @@ -295,6 +292,7 @@ def _values(self) -> ExtensionArray | np.ndarray: # must be defined here as a property for mypy raise AbstractMethodError(self) + @final def transpose(self: _T, *args, **kwargs) -> _T: """ Return the transpose, which is by definition self. @@ -331,6 +329,7 @@ def ndim(self) -> Literal[1]: """ return 1 + @final def item(self): """ Return the first element of the underlying data as a Python scalar. @@ -428,6 +427,7 @@ def array(self) -> ExtensionArray: """ raise AbstractMethodError(self) + @final def to_numpy( self, dtype: npt.DTypeLike | None = None, @@ -543,6 +543,7 @@ def to_numpy( result[np.asanyarray(self.isna())] = na_value return result + @final @property def empty(self) -> bool: return not self.size @@ -842,9 +843,12 @@ def _map_values(self, mapper, na_action=None): # expected to be pd.Series(np.nan, ...). As np.nan is # of dtype float64 the return value of this method should # be float64 as well - mapper = create_series_with_explicit_dtype( - mapper, dtype_if_empty=np.float64 - ) + from pandas import Series + + if len(mapper) == 0: + mapper = Series(mapper, dtype=np.float64) + else: + mapper = Series(mapper) if isinstance(mapper, ABCSeries): if na_action not in (None, "ignore"): @@ -900,6 +904,7 @@ def _map_values(self, mapper, na_action=None): return new_values + @final def value_counts( self, normalize: bool = False, @@ -1004,6 +1009,7 @@ def unique(self): result = unique1d(values) return result + @final def nunique(self, dropna: bool = True) -> int: """ Return number of unique elements in the object. @@ -1054,27 +1060,6 @@ def is_unique(self) -> bool: """ return self.nunique(dropna=False) == len(self) - @property - def is_monotonic(self) -> bool: - """ - Return boolean if values in the object are monotonically increasing. - - .. deprecated:: 1.5.0 - is_monotonic is deprecated and will be removed in a future version. - Use is_monotonic_increasing instead. - - Returns - ------- - bool - """ - warnings.warn( - "is_monotonic is deprecated and will be removed in a future version. " - "Use is_monotonic_increasing instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self.is_monotonic_increasing - @property def is_monotonic_increasing(self) -> bool: """ @@ -1101,6 +1086,7 @@ def is_monotonic_decreasing(self) -> bool: return Index(self).is_monotonic_decreasing + @final def _memory_usage(self, deep: bool = False) -> int: """ Memory usage of the values. @@ -1304,7 +1290,7 @@ def searchsorted( sorter=sorter, ) - def drop_duplicates(self, keep: DropKeep = "first"): + def drop_duplicates(self, *, keep: DropKeep = "first"): duplicated = self._duplicated(keep=keep) # error: Value of type "IndexOpsMixin" is not indexable return self[~duplicated] # type: ignore[index] diff --git a/pandas/core/common.py b/pandas/core/common.py index 817b889623d99..c73c31c2a103b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -148,15 +148,13 @@ def is_bool_indexer(key: Any) -> bool: return False -def cast_scalar_indexer(val, warn_float: bool = False): +def cast_scalar_indexer(val): """ - To avoid numpy DeprecationWarnings, cast float to integer where valid. + Disallow indexing with a float key, even if that key is a round number. Parameters ---------- val : scalar - warn_float : bool, default False - If True, issue deprecation warning for a float indexer. Returns ------- @@ -164,14 +162,11 @@ def cast_scalar_indexer(val, warn_float: bool = False): """ # assumes lib.is_scalar(val) if lib.is_float(val) and val.is_integer(): - if warn_float: - warnings.warn( - "Indexing with a float is deprecated, and will raise an IndexError " - "in pandas 2.0. You can manually convert to an integer key instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return int(val) + raise IndexError( + # GH#34193 + "Indexing with a float is no longer supported. Manually convert " + "to an integer key instead." + ) return val @@ -393,7 +388,7 @@ def standardize_mapping(into): into = type(into) if not issubclass(into, abc.Mapping): raise TypeError(f"unsupported type: {into}") - elif into == defaultdict: + if into == defaultdict: raise TypeError("to_dict() only accepts initialized defaultdicts") return into diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index 2ea5a5367611f..a3a05a9d75c6e 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -17,7 +17,7 @@ REDUCTIONS, ) -import pandas.io.formats.printing as printing +from pandas.io.formats import printing if TYPE_CHECKING: from pandas.core.computation.expr import Expr @@ -102,7 +102,6 @@ def _evaluate(self): ----- Must be implemented by subclasses. """ - pass class NumExprEngine(AbstractEngine): diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index bc8c37b9273ce..f0127ae05182a 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -344,7 +344,7 @@ def eval( "Multi-line expressions are only valid " "if all expressions contain an assignment" ) - elif inplace: + if inplace: raise ValueError("Cannot operate inplace if there is no assignment") # assign if needed diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index c91cfd65e3c40..e7474ea5dd9f8 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -44,7 +44,7 @@ ) from pandas.core.computation.scope import Scope -import pandas.io.formats.printing as printing +from pandas.io.formats import printing def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index e6bdbbcb5aa12..b101b25a10a80 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -624,27 +624,11 @@ def use_inf_as_na_cb(key) -> None: auto, {others}. """ -_xls_options = ["xlwt"] _xlsm_options = ["openpyxl"] _xlsx_options = ["openpyxl", "xlsxwriter"] _ods_options = ["odf"] -with cf.config_prefix("io.excel.xls"): - cf.register_option( - "writer", - "auto", - writer_engine_doc.format(ext="xls", others=", ".join(_xls_options)), - validator=str, - ) -cf.deprecate_option( - "io.excel.xls.writer", - msg="As the xlwt package is no longer maintained, the xlwt engine will be " - "removed in a future version of pandas. This is the only engine in pandas that " - "supports writing in the xls format. Install openpyxl and write to an " - "xlsx file instead.", -) - with cf.config_prefix("io.excel.xlsm"): cf.register_option( "writer", diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b7db95269439c..997611d7860db 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -8,7 +8,6 @@ from typing import ( TYPE_CHECKING, - Any, Optional, Sequence, Union, @@ -18,7 +17,7 @@ import warnings import numpy as np -import numpy.ma as ma +from numpy import ma from pandas._libs import lib from pandas._libs.tslibs.period import Period @@ -55,10 +54,7 @@ is_object_dtype, is_timedelta64_ns_dtype, ) -from pandas.core.dtypes.dtypes import ( - DatetimeTZDtype, - PandasDtype, -) +from pandas.core.dtypes.dtypes import PandasDtype from pandas.core.dtypes.generic import ( ABCExtensionArray, ABCIndex, @@ -503,7 +499,6 @@ def sanitize_array( index: Index | None, dtype: DtypeObj | None = None, copy: bool = False, - raise_cast_failure: bool = True, *, allow_2d: bool = False, ) -> ArrayLike: @@ -517,19 +512,12 @@ def sanitize_array( index : Index or None, default None dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False - raise_cast_failure : bool, default True allow_2d : bool, default False If False, raise if we have a 2D Arraylike. Returns ------- np.ndarray or ExtensionArray - - Notes - ----- - raise_cast_failure=False is only intended to be True when called from the - DataFrame constructor, as the dtype keyword there may be interpreted as only - applying to a subset of columns, see GH#24435. """ if isinstance(data, ma.MaskedArray): data = sanitize_masked_array(data) @@ -556,8 +544,25 @@ def sanitize_array( data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data + elif isinstance(data, ABCExtensionArray): + # it is already ensured above this is not a PandasArray + # Until GH#49309 is fixed this check needs to come before the + # ExtensionDtype check + if dtype is not None: + subarr = data.astype(dtype, copy=copy) + elif copy: + subarr = data.copy() + else: + subarr = data + + elif isinstance(dtype, ExtensionDtype): + # create an extension array from its dtype + _sanitize_non_ordered(data) + cls = dtype.construct_array_type() + subarr = cls._from_sequence(data, dtype=dtype, copy=copy) + # GH#846 - if isinstance(data, np.ndarray): + elif isinstance(data, np.ndarray): if isinstance(data, np.matrix): data = data.A @@ -567,7 +572,10 @@ def sanitize_array( # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int # casting aligning with IntCastingNaNError below with np.errstate(invalid="ignore"): - subarr = _try_cast(data, dtype, copy, True) + # GH#15832: Check if we are requesting a numeric dtype and + # that we can convert the data to the requested dtype. + subarr = maybe_cast_to_integer_array(data, dtype) + except IntCastingNaNError: warnings.warn( "In a future version, passing float-dtype values containing NaN " @@ -580,21 +588,10 @@ def sanitize_array( ) subarr = np.array(data, copy=copy) except ValueError: - if not raise_cast_failure: - # i.e. called via DataFrame constructor - warnings.warn( - "In a future version, passing float-dtype values and an " - "integer dtype to DataFrame will retain floating dtype " - "if they cannot be cast losslessly (matching Series behavior). " - "To retain the old behavior, use DataFrame(data).astype(dtype)", - FutureWarning, - stacklevel=find_stack_level(), - ) - # GH#40110 until the deprecation is enforced, we _dont_ - # ignore the dtype for DataFrame, and _do_ cast even though - # it is lossy. - dtype = cast(np.dtype, dtype) - return np.array(data, dtype=dtype, copy=copy) + # Pre-2.0, we would have different behavior for Series vs DataFrame. + # DataFrame would call np.array(data, dtype=dtype, copy=copy), + # which would cast to the integer dtype even if the cast is lossy. + # See GH#40110. # We ignore the dtype arg and return floating values, # e.g. test_constructor_floating_data_int_dtype @@ -602,32 +599,31 @@ def sanitize_array( subarr = np.array(data, copy=copy) else: # we will try to copy by-definition here - subarr = _try_cast(data, dtype, copy, raise_cast_failure) - - elif isinstance(data, ABCExtensionArray): - # it is already ensured above this is not a PandasArray - subarr = data - - if dtype is not None: - subarr = subarr.astype(dtype, copy=copy) - elif copy: - subarr = subarr.copy() + subarr = _try_cast(data, dtype, copy) + + elif hasattr(data, "__array__"): + # e.g. dask array GH#38645 + data = np.array(data, copy=copy) + return sanitize_array( + data, + index=index, + dtype=dtype, + copy=False, + allow_2d=allow_2d, + ) else: - if isinstance(data, (set, frozenset)): - # Raise only for unordered sets, e.g., not for dict_keys - raise TypeError(f"'{type(data).__name__}' type is unordered") - + _sanitize_non_ordered(data) # materialize e.g. generators, convert e.g. tuples, abc.ValueView - if hasattr(data, "__array__"): - # e.g. dask array GH#38645 - data = np.array(data, copy=copy) - else: - data = list(data) + data = list(data) + + if len(data) == 0 and dtype is None: + # We default to float64, matching numpy + subarr = np.array([], dtype=np.float64) - if dtype is not None or len(data) == 0: + elif dtype is not None: try: - subarr = _try_cast(data, dtype, copy, raise_cast_failure) + subarr = _try_cast(data, dtype, copy) except ValueError: if is_integer_dtype(dtype): casted = np.array(data, copy=False) @@ -639,7 +635,6 @@ def sanitize_array( index, dtype, copy=False, - raise_cast_failure=raise_cast_failure, allow_2d=allow_2d, ) else: @@ -681,6 +676,14 @@ def range_to_ndarray(rng: range) -> np.ndarray: return arr +def _sanitize_non_ordered(data) -> None: + """ + Raise only for unordered sets, e.g., not for dict_keys + """ + if isinstance(data, (set, frozenset)): + raise TypeError(f"'{type(data).__name__}' type is unordered") + + def _sanitize_ndim( result: ArrayLike, data, @@ -695,7 +698,7 @@ def _sanitize_ndim( if getattr(result, "ndim", 0) == 0: raise ValueError("result should be arraylike with ndim > 0") - elif result.ndim == 1: + if result.ndim == 1: # the result that we want result = _maybe_repeat(result, index) @@ -751,9 +754,8 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: def _try_cast( arr: list | np.ndarray, - dtype: DtypeObj | None, + dtype: np.dtype | None, copy: bool, - raise_cast_failure: bool, ) -> ArrayLike: """ Convert input to numpy ndarray and optionally cast to a given dtype. @@ -762,12 +764,9 @@ def _try_cast( ---------- arr : ndarray or list Excludes: ExtensionArray, Series, Index. - dtype : np.dtype, ExtensionDtype or None + dtype : np.dtype or None copy : bool If False, don't copy the data if not needed. - raise_cast_failure : bool - If True, and if a dtype is specified, raise errors during casting. - Otherwise an object array is returned. Returns ------- @@ -798,22 +797,6 @@ def _try_cast( return varr return maybe_infer_to_datetimelike(varr) - elif isinstance(dtype, ExtensionDtype): - # create an extension array from its dtype - if isinstance(dtype, DatetimeTZDtype): - # We can't go through _from_sequence because it handles dt64naive - # data differently; _from_sequence treats naive as wall times, - # while maybe_cast_to_datetime treats it as UTC - # see test_maybe_promote_any_numpy_dtype_with_datetimetz - # TODO(2.0): with deprecations enforced, should be able to remove - # special case. - return maybe_cast_to_datetime(arr, dtype) - # TODO: copy? - - array_type = dtype.construct_array_type()._from_sequence - subarr = array_type(arr, dtype=dtype, copy=copy) - return subarr - elif is_object_dtype(dtype): if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) @@ -836,92 +819,13 @@ def _try_cast( elif dtype.kind in ["m", "M"]: return maybe_cast_to_datetime(arr, dtype) - try: - # GH#15832: Check if we are requesting a numeric dtype and - # that we can convert the data to the requested dtype. - if is_integer_dtype(dtype): - # this will raise if we have e.g. floats - - subarr = maybe_cast_to_integer_array(arr, dtype) - else: - # 4 tests fail if we move this to a try/except/else; see - # test_constructor_compound_dtypes, test_constructor_cast_failure - # test_constructor_dict_cast2, test_loc_setitem_dtype - subarr = np.array(arr, dtype=dtype, copy=copy) - - except (ValueError, TypeError): - if raise_cast_failure: - raise - else: - # we only get here with raise_cast_failure False, which means - # called via the DataFrame constructor - # GH#24435 - warnings.warn( - f"Could not cast to {dtype}, falling back to object. This " - "behavior is deprecated. In a future version, when a dtype is " - "passed to 'DataFrame', either all columns will be cast to that " - "dtype, or a TypeError will be raised.", - FutureWarning, - stacklevel=find_stack_level(), - ) - subarr = np.array(arr, dtype=object, copy=copy) - return subarr - - -def is_empty_data(data: Any) -> bool: - """ - Utility to check if a Series is instantiated with empty data, - which does not contain dtype information. - - Parameters - ---------- - data : array-like, Iterable, dict, or scalar value - Contains data stored in Series. - - Returns - ------- - bool - """ - is_none = data is None - is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") - is_simple_empty = is_list_like_without_dtype and not data - return is_none or is_simple_empty + # GH#15832: Check if we are requesting a numeric dtype and + # that we can convert the data to the requested dtype. + elif is_integer_dtype(dtype): + # this will raise if we have e.g. floats + subarr = maybe_cast_to_integer_array(arr, dtype) + else: + subarr = np.array(arr, dtype=dtype, copy=copy) -def create_series_with_explicit_dtype( - data: Any = None, - index: ArrayLike | Index | None = None, - dtype: Dtype | None = None, - name: str | None = None, - copy: bool = False, - fastpath: bool = False, - dtype_if_empty: Dtype = object, -) -> Series: - """ - Helper to pass an explicit dtype when instantiating an empty Series. - - This silences a DeprecationWarning described in GitHub-17261. - - Parameters - ---------- - data : Mirrored from Series.__init__ - index : Mirrored from Series.__init__ - dtype : Mirrored from Series.__init__ - name : Mirrored from Series.__init__ - copy : Mirrored from Series.__init__ - fastpath : Mirrored from Series.__init__ - dtype_if_empty : str, numpy.dtype, or ExtensionDtype - This dtype will be passed explicitly if an empty Series will - be instantiated. - - Returns - ------- - Series - """ - from pandas.core.series import Series - - if is_empty_data(data) and dtype is None: - dtype = dtype_if_empty - return Series( - data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath - ) + return subarr diff --git a/pandas/core/describe.py b/pandas/core/describe.py index ce2fa950e6e62..33afbfe6489a6 100644 --- a/pandas/core/describe.py +++ b/pandas/core/describe.py @@ -17,7 +17,6 @@ Sequence, cast, ) -import warnings import numpy as np @@ -27,7 +26,6 @@ NDFrameT, npt, ) -from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_percentile from pandas.core.dtypes.common import ( @@ -56,7 +54,6 @@ def describe_ndframe( obj: NDFrameT, include: str | Sequence[str] | None, exclude: str | Sequence[str] | None, - datetime_is_numeric: bool, percentiles: Sequence[float] | np.ndarray | None, ) -> NDFrameT: """Describe series or dataframe. @@ -71,8 +68,6 @@ def describe_ndframe( A white list of data types to include in the result. Ignored for ``Series``. exclude : list-like of dtypes or None (default), optional, A black list of data types to omit from the result. Ignored for ``Series``. - datetime_is_numeric : bool, default False - Whether to treat datetime dtypes as numeric. percentiles : list-like of numbers, optional The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and @@ -88,14 +83,12 @@ def describe_ndframe( if obj.ndim == 1: describer = SeriesDescriber( obj=cast("Series", obj), - datetime_is_numeric=datetime_is_numeric, ) else: describer = DataFrameDescriber( obj=cast("DataFrame", obj), include=include, exclude=exclude, - datetime_is_numeric=datetime_is_numeric, ) result = describer.describe(percentiles=percentiles) @@ -109,13 +102,10 @@ class NDFrameDescriberAbstract(ABC): ---------- obj : Series or DataFrame Object to be described. - datetime_is_numeric : bool - Whether to treat datetime dtypes as numeric. """ - def __init__(self, obj: DataFrame | Series, datetime_is_numeric: bool) -> None: + def __init__(self, obj: DataFrame | Series) -> None: self.obj = obj - self.datetime_is_numeric = datetime_is_numeric @abstractmethod def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame | Series: @@ -136,7 +126,6 @@ class SeriesDescriber(NDFrameDescriberAbstract): def describe(self, percentiles: Sequence[float] | np.ndarray) -> Series: describe_func = select_describe_func( self.obj, - self.datetime_is_numeric, ) return describe_func(self.obj, percentiles) @@ -152,8 +141,6 @@ class DataFrameDescriber(NDFrameDescriberAbstract): A white list of data types to include in the result. exclude : list-like of dtypes or None A black list of data types to omit from the result. - datetime_is_numeric : bool - Whether to treat datetime dtypes as numeric. """ def __init__( @@ -162,7 +149,6 @@ def __init__( *, include: str | Sequence[str] | None, exclude: str | Sequence[str] | None, - datetime_is_numeric: bool, ) -> None: self.include = include self.exclude = exclude @@ -170,14 +156,14 @@ def __init__( if obj.ndim == 2 and obj.columns.size == 0: raise ValueError("Cannot describe a DataFrame without columns") - super().__init__(obj, datetime_is_numeric=datetime_is_numeric) + super().__init__(obj) def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: data = self._select_data() ldesc: list[Series] = [] for _, series in data.items(): - describe_func = select_describe_func(series, self.datetime_is_numeric) + describe_func = select_describe_func(series) ldesc.append(describe_func(series, percentiles)) col_names = reorder_columns(ldesc) @@ -193,9 +179,7 @@ def _select_data(self): """Select columns to be described.""" if (self.include is None) and (self.exclude is None): # when some numerics are found, keep only numerics - default_include: list[npt.DTypeLike] = [np.number] - if self.datetime_is_numeric: - default_include.append("datetime") + default_include: list[npt.DTypeLike] = [np.number, "datetime"] data = self.obj.select_dtypes(include=default_include) if len(data.columns) == 0: data = self.obj @@ -360,7 +344,6 @@ def describe_timestamp_1d(data: Series, percentiles: Sequence[float]) -> Series: def select_describe_func( data: Series, - datetime_is_numeric: bool, ) -> Callable: """Select proper function for describing series based on data type. @@ -368,26 +351,13 @@ def select_describe_func( ---------- data : Series Series to be described. - datetime_is_numeric : bool - Whether to treat datetime dtypes as numeric. """ if is_bool_dtype(data.dtype): return describe_categorical_1d elif is_numeric_dtype(data): return describe_numeric_1d elif is_datetime64_any_dtype(data.dtype): - if datetime_is_numeric: - return describe_timestamp_1d - else: - warnings.warn( - "Treating datetime data as categorical rather than numeric in " - "`.describe` is deprecated and will be removed in a future " - "version of pandas. Specify `datetime_is_numeric=True` to " - "silence this warning and adopt the future behavior now.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return describe_timestamp_as_categorical_1d + return describe_timestamp_1d elif is_timedelta64_dtype(data.dtype): return describe_numeric_1d else: diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 718badc2e4085..4dd49ec6b64bb 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -13,11 +13,6 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import ( - get_unit_from_dtype, - is_supported_unit, - is_unitless, -) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( ArrayLike, @@ -129,15 +124,12 @@ def astype_nansafe( return arr.view(dtype) elif dtype.kind == "m": - # TODO(2.0): change to use the same logic as TDA.astype, i.e. - # giving the requested dtype for supported units (s, ms, us, ns) + # give the requested dtype for supported units (s, ms, us, ns) # and doing the old convert-to-float behavior otherwise. - if is_supported_unit(get_unit_from_dtype(arr.dtype)): - from pandas.core.construction import ensure_wrapped_if_datetimelike + from pandas.core.construction import ensure_wrapped_if_datetimelike - arr = ensure_wrapped_if_datetimelike(arr) - return arr.astype(dtype, copy=copy) - return astype_td64_unit_conversion(arr, dtype, copy=copy) + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") @@ -223,6 +215,7 @@ def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> Arra # Series.astype behavior pre-2.0 did # values.tz_localize("UTC").tz_convert(dtype.tz) # which did not match the DTA/DTI behavior. + # We special-case here to give a Series-specific exception message. raise TypeError( "Cannot use .astype to convert from timezone-naive dtype to " "timezone-aware dtype. Use ser.dt.tz_localize instead." @@ -292,20 +285,6 @@ def astype_array_safe( # Ensure we don't end up with a PandasArray dtype = dtype.numpy_dtype - if ( - is_datetime64_dtype(values.dtype) - # need to do np.dtype check instead of is_datetime64_dtype - # otherwise pyright complains - and isinstance(dtype, np.dtype) - and dtype.kind == "M" - and not is_unitless(dtype) - and not is_dtype_equal(dtype, values.dtype) - and not is_supported_unit(get_unit_from_dtype(dtype)) - ): - # Supported units we handle in DatetimeArray.astype; but that raises - # on non-supported units, so we handle that here. - return np.asarray(values).astype(dtype) - try: new_values = astype_array(values, dtype, copy=copy) except (ValueError, TypeError): @@ -317,36 +296,3 @@ def astype_array_safe( raise return new_values - - -def astype_td64_unit_conversion( - values: np.ndarray, dtype: np.dtype, copy: bool -) -> np.ndarray: - """ - By pandas convention, converting to non-nano timedelta64 - returns an int64-dtyped array with ints representing multiples - of the desired timedelta unit. This is essentially division. - - Parameters - ---------- - values : np.ndarray[timedelta64[ns]] - dtype : np.dtype - timedelta64 with unit not-necessarily nano - copy : bool - - Returns - ------- - np.ndarray - """ - if is_dtype_equal(values.dtype, dtype): - if copy: - return values.copy() - return values - - # otherwise we are converting to non-nano - result = values.astype(dtype, copy=False) # avoid double-copying - result = result.astype(np.float64) - - mask = isna(values) - np.putmask(result, mask, np.nan) - return result diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9830d22f3e2e5..793f407b78714 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,7 +20,6 @@ ) import warnings -from dateutil.parser import ParserError import numpy as np from pandas._libs import lib @@ -42,6 +41,7 @@ Dtype, DtypeObj, Scalar, + npt, ) from pandas.errors import ( IntCastingNaNError, @@ -65,7 +65,6 @@ is_complex, is_complex_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -496,7 +495,7 @@ def maybe_cast_to_extension_array( try: result = cls._from_sequence(obj, dtype=dtype) - except Exception: + except Exception: # pylint: disable=broad-except # We can't predict what downstream EA constructors may raise result = obj return result @@ -1201,7 +1200,7 @@ def convert_dtypes( def maybe_infer_to_datetimelike( - value: np.ndarray, + value: npt.NDArray[np.object_], ) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray: """ we might have a array (or single object) that is datetime like, @@ -1226,101 +1225,52 @@ def maybe_infer_to_datetimelike( v = np.array(value, copy=False) - shape = v.shape if v.ndim != 1: v = v.ravel() if not len(v): return value - def try_datetime(v: np.ndarray) -> ArrayLike: - # Coerce to datetime64, datetime64tz, or in corner cases - # object[datetimes] - from pandas.core.arrays.datetimes import sequence_to_datetimes - - try: - # GH#19671 we pass require_iso8601 to be relatively strict - # when parsing strings. - dta = sequence_to_datetimes(v, require_iso8601=True) - except (ValueError, TypeError): - # e.g. is not convertible to datetime - return v.reshape(shape) - else: - # GH#19761 we may have mixed timezones, in which cast 'dta' is - # an ndarray[object]. Only 1 test - # relies on this behavior, see GH#40111 - return dta.reshape(shape) - - def try_timedelta(v: np.ndarray) -> np.ndarray: - # safe coerce to timedelta64 + inferred_type = lib.infer_datetimelike_array(ensure_object(v)) - # will try first with a string & object conversion - try: - # bc we know v.dtype == object, this is equivalent to - # `np.asarray(to_timedelta(v))`, but using a lower-level API that - # does not require a circular import. - td_values = array_to_timedelta64(v).view("m8[ns]") - except (ValueError, OverflowError): - return v.reshape(shape) - else: - return td_values.reshape(shape) - - inferred_type, seen_str = lib.infer_datetimelike_array(ensure_object(v)) - if inferred_type in ["period", "interval"]: + if inferred_type in ["period", "interval", "timedelta", "datetime"]: # Incompatible return value type (got "Union[ExtensionArray, ndarray]", # expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray, # IntervalArray]") return lib.maybe_convert_objects( # type: ignore[return-value] - v, convert_period=True, convert_interval=True + v, + convert_period=True, + convert_interval=True, + convert_timedelta=True, + convert_datetime=True, + dtype_if_all_nat=np.dtype("M8[ns]"), ) - if inferred_type == "datetime": - # error: Incompatible types in assignment (expression has type "ExtensionArray", - # variable has type "Union[ndarray, List[Any]]") - value = try_datetime(v) # type: ignore[assignment] - elif inferred_type == "timedelta": - value = try_timedelta(v) elif inferred_type == "nat": - # if all NaT, return as datetime - if isna(v).all(): - # error: Incompatible types in assignment (expression has type - # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") - value = try_datetime(v) # type: ignore[assignment] - else: - - # We have at least a NaT and a string - # try timedelta first to avoid spurious datetime conversions - # e.g. '00:00:01' is a timedelta but technically is also a datetime - value = try_timedelta(v) - if lib.infer_dtype(value, skipna=False) in ["mixed"]: - # cannot skip missing values, as NaT implies that the string - # is actually a datetime + # only reached if we have at least 1 NaT and the rest (NaT or None or np.nan) + # This is slightly different from what we'd get with maybe_convert_objects, + # which only converts of all-NaT + from pandas.core.arrays.datetimes import sequence_to_datetimes - # error: Incompatible types in assignment (expression has type - # "ExtensionArray", variable has type "Union[ndarray, List[Any]]") - value = try_datetime(v) # type: ignore[assignment] + # Incompatible types in assignment (expression has type "DatetimeArray", + # variable has type "ndarray[Any, Any]") + value = sequence_to_datetimes(v) # type: ignore[assignment] + assert value.dtype == "M8[ns]" - if value.dtype.kind in ["m", "M"] and seen_str: - # TODO(2.0): enforcing this deprecation should close GH#40111 - warnings.warn( - f"Inferring {value.dtype} from data containing strings is deprecated " - "and will be removed in a future version. To retain the old behavior " - f"explicitly pass Series(data, dtype={value.dtype})", - FutureWarning, - stacklevel=find_stack_level(), - ) return value def maybe_cast_to_datetime( - value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None + value: ExtensionArray | np.ndarray | list, dtype: np.dtype | None ) -> ExtensionArray | np.ndarray: """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT We allow a list *only* when dtype is not None. + + Caller is responsible for handling ExtensionDtype cases. """ from pandas.core.arrays.datetimes import sequence_to_datetimes from pandas.core.arrays.timedeltas import TimedeltaArray @@ -1332,18 +1282,22 @@ def maybe_cast_to_datetime( # TODO: _from_sequence would raise ValueError in cases where # _ensure_nanosecond_dtype raises TypeError dtype = cast(np.dtype, dtype) - dtype = _ensure_nanosecond_dtype(dtype) + # Incompatible types in assignment (expression has type "Union[dtype[Any], + # ExtensionDtype]", variable has type "Optional[dtype[Any]]") + dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] res = TimedeltaArray._from_sequence(value, dtype=dtype) return res if dtype is not None: is_datetime64 = is_datetime64_dtype(dtype) - is_datetime64tz = is_datetime64tz_dtype(dtype) vdtype = getattr(value, "dtype", None) - if is_datetime64 or is_datetime64tz: - dtype = _ensure_nanosecond_dtype(dtype) + if is_datetime64: + # Incompatible types in assignment (expression has type + # "Union[dtype[Any], ExtensionDtype]", variable has type + # "Optional[dtype[Any]]") + dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment] value = np.array(value, copy=False) @@ -1351,65 +1305,21 @@ def maybe_cast_to_datetime( if value.size or not is_dtype_equal(value.dtype, dtype): _disallow_mismatched_datetimelike(value, dtype) - try: - if is_datetime64: - dta = sequence_to_datetimes(value) - # GH 25843: Remove tz information since the dtype - # didn't specify one - - if dta.tz is not None: - raise ValueError( - "Cannot convert timezone-aware data to " - "timezone-naive dtype. Use " - "pd.Series(values).dt.tz_localize(None) instead." - ) - - # TODO(2.0): Do this astype in sequence_to_datetimes to - # avoid potential extra copy? - dta = dta.astype(dtype, copy=False) - value = dta - elif is_datetime64tz: - dtype = cast(DatetimeTZDtype, dtype) - # The string check can be removed once issue #13712 - # is solved. String data that is passed with a - # datetime64tz is assumed to be naive which should - # be localized to the timezone. - is_dt_string = is_string_dtype(value.dtype) - dta = sequence_to_datetimes(value) - if dta.tz is not None: - value = dta.astype(dtype, copy=False) - elif is_dt_string: - # Strings here are naive, so directly localize - # equiv: dta.astype(dtype) # though deprecated - - value = dta.tz_localize(dtype.tz) - else: - # Numeric values are UTC at this point, - # so localize and convert - # equiv: Series(dta).astype(dtype) # though deprecated - if getattr(vdtype, "kind", None) == "M": - # GH#24559, GH#33401 deprecate behavior inconsistent - # with DatetimeArray/DatetimeIndex - warnings.warn( - "In a future version, constructing a Series " - "from datetime64[ns] data and a " - "DatetimeTZDtype will interpret the data " - "as wall-times instead of " - "UTC times, matching the behavior of " - "DatetimeIndex. To treat the data as UTC " - "times, use pd.Series(data).dt" - ".tz_localize('UTC').tz_convert(dtype.tz) " - "or pd.Series(data.view('int64'), dtype=dtype)", - FutureWarning, - stacklevel=find_stack_level(), - ) - - value = dta.tz_localize("UTC").tz_convert(dtype.tz) - except OutOfBoundsDatetime: - raise - except ParserError: - # Note: this is dateutil's ParserError, not ours. - pass + dta = sequence_to_datetimes(value) + # GH 25843: Remove tz information since the dtype + # didn't specify one + + if dta.tz is not None: + raise ValueError( + "Cannot convert timezone-aware data to " + "timezone-naive dtype. Use " + "pd.Series(values).dt.tz_localize(None) instead." + ) + + # TODO(2.0): Do this astype in sequence_to_datetimes to + # avoid potential extra copy? + dta = dta.astype(dtype, copy=False) + value = dta elif getattr(vdtype, "kind", None) in ["m", "M"]: # we are already datetimelike and want to coerce to non-datetimelike; @@ -1504,7 +1414,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: # TODO: other value-dependent functions to standardize here include -# dtypes.concat.cast_to_common_type and Index._find_common_type_compat +# Index._find_common_type_compat def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: """ Find the type/dtype for a the result of an operation between these objects. @@ -1956,7 +1866,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element raise LossySetitemError - elif is_integer(element) or (is_float(element) and element.is_integer()): + if is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 # tipo may be np.int32, despite the fact that it will fit # in smaller int dtypes. @@ -1983,7 +1893,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # Anything other than integer we cannot hold raise LossySetitemError - elif ( + if ( dtype.kind == "u" and isinstance(element, np.ndarray) and element.dtype.kind == "i" @@ -1995,9 +1905,9 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # itemsize issues there? return casted raise LossySetitemError - elif dtype.itemsize < tipo.itemsize: + if dtype.itemsize < tipo.itemsize: raise LossySetitemError - elif not isinstance(tipo, np.dtype): + if not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype; we can put this into an ndarray # losslessly iff it has no NAs if element._hasna: @@ -2008,7 +1918,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError - elif dtype.kind == "f": + if dtype.kind == "f": if lib.is_integer(element) or lib.is_float(element): casted = dtype.type(element) if np.isnan(casted) or casted == element: @@ -2021,7 +1931,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold raise LossySetitemError - elif not isinstance(tipo, np.dtype): + if not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs if element._hasna: @@ -2040,7 +1950,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError - elif dtype.kind == "c": + if dtype.kind == "c": if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): if np.isnan(element): # see test_where_complex GH#6345 @@ -2058,7 +1968,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError raise LossySetitemError - elif dtype.kind == "b": + if dtype.kind == "b": if tipo is not None: if tipo.kind == "b": if not isinstance(tipo, np.dtype): @@ -2072,7 +1982,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element raise LossySetitemError - elif dtype.kind == "S": + if dtype.kind == "S": # TODO: test tests.frame.methods.test_replace tests get here, # need more targeted tests. xref phofl has a PR about this if tipo is not None: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 3c2aa1f6bab5d..e9d3721bbb5f5 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -500,6 +500,9 @@ def is_string_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the string dtype. + If an array is passed with an object dtype, the elements must be + inferred as strings. + Parameters ---------- arr_or_dtype : array-like or dtype @@ -518,21 +521,23 @@ def is_string_dtype(arr_or_dtype) -> bool: True >>> is_string_dtype(int) False - >>> >>> is_string_dtype(np.array(['a', 'b'])) True >>> is_string_dtype(pd.Series([1, 2])) False + >>> is_string_dtype(pd.Series([1, 2], dtype=object)) + False """ - # TODO: gh-15585: consider making the checks stricter. - def condition(dtype) -> bool: - return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) + if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O": + return is_all_strings(arr_or_dtype) - def is_excluded_dtype(dtype) -> bool: - """ - These have kind = "O" but aren't string dtypes so need to be explicitly excluded - """ - return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype)) + def condition(dtype) -> bool: + if is_string_or_object_np_dtype(dtype): + return True + try: + return dtype == "string" + except TypeError: + return False return _is_dtype(arr_or_dtype, condition) @@ -1466,7 +1471,7 @@ def get_dtype(arr_or_dtype) -> DtypeObj: raise TypeError("Cannot deduce dtype from null object") # fastpath - elif isinstance(arr_or_dtype, np.dtype): + if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): return np.dtype(arr_or_dtype) @@ -1634,8 +1639,7 @@ def validate_all_hashable(*args, error_name: str | None = None) -> None: if not all(is_hashable(arg) for arg in args): if error_name: raise TypeError(f"{error_name} must be a hashable type") - else: - raise TypeError("All elements must be hashable") + raise TypeError("All elements must be hashable") def pandas_dtype(dtype) -> DtypeObj: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 5b11945a8589e..03429fd9fee1d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -3,19 +3,12 @@ """ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import warnings import numpy as np -from pandas._typing import ( - ArrayLike, - AxisInt, - DtypeObj, -) +from pandas._typing import AxisInt from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.astype import astype_array @@ -23,10 +16,7 @@ common_dtype_categorical_compat, find_common_type, ) -from pandas.core.dtypes.common import ( - is_dtype_equal, - is_sparse, -) +from pandas.core.dtypes.common import is_dtype_equal from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, ExtensionDtype, @@ -39,34 +29,6 @@ if TYPE_CHECKING: from pandas.core.arrays import Categorical - from pandas.core.arrays.sparse import SparseArray - - -def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: - """ - Helper function for `arr.astype(common_dtype)` but handling all special - cases. - """ - if is_dtype_equal(arr.dtype, dtype): - return arr - - if is_sparse(arr) and not is_sparse(dtype): - # TODO(2.0): remove special case once SparseArray.astype deprecation - # is enforced. - # problem case: SparseArray.astype(dtype) doesn't follow the specified - # dtype exactly, but converts this to Sparse[dtype] -> first manually - # convert to dense array - - # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _ - # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any, - # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict, - # Tuple[Any, Any]]]" [arg-type] - arr = cast("SparseArray", arr) - return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type] - - # astype_array includes ensure_wrapped_if_datetimelike - return astype_array(arr, dtype=dtype, copy=False) def concat_compat(to_concat, axis: AxisInt = 0, ea_compat_axis: bool = False): @@ -126,7 +88,9 @@ def is_nonempty(x) -> bool: if not single_dtype: target_dtype = find_common_type([x.dtype for x in to_concat]) target_dtype = common_dtype_categorical_compat(to_concat, target_dtype) - to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat] + to_concat = [ + astype_array(arr, target_dtype, copy=False) for arr in to_concat + ] if isinstance(to_concat[0], ABCExtensionArray): # TODO: what about EA-backed Index? @@ -320,8 +284,7 @@ def _maybe_unwrap(x): if all(c.ordered for c in to_union): msg = "to union ordered Categoricals, all categories must be the same" raise TypeError(msg) - else: - raise TypeError("Categorical.ordered must be the same") + raise TypeError("Categorical.ordered must be the same") if ignore_order: ordered = False diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index cd1753bc8fec1..ba63ba2638c2d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -116,8 +116,6 @@ class CategoricalDtypeType(type): the type of CategoricalDtype, this metaclass determines subclass ability """ - pass - @register_extension_dtype class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): @@ -525,7 +523,7 @@ def validate_categories(categories, fastpath: bool = False) -> Index: raise TypeError( f"Parameter 'categories' must be list-like, was {repr(categories)}" ) - elif not isinstance(categories, ABCIndex): + if not isinstance(categories, ABCIndex): categories = Index._with_infer(categories, tupleize_cols=False) if not fastpath: @@ -986,10 +984,7 @@ def is_dtype(cls, dtype: object) -> bool: # but doesn't regard freq str like "U" as dtype. if dtype.startswith("period[") or dtype.startswith("Period["): try: - if cls._parse_dtype_strict(dtype) is not None: - return True - else: - return False + return cls._parse_dtype_strict(dtype) is not None except ValueError: return False else: @@ -1254,10 +1249,7 @@ def is_dtype(cls, dtype: object) -> bool: if isinstance(dtype, str): if dtype.lower().startswith("interval"): try: - if cls.construct_from_string(dtype) is not None: - return True - else: - return False + return cls.construct_from_string(dtype) is not None except (ValueError, TypeError): return False else: diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 893e4a9be58ef..de240a39e2951 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -6,13 +6,10 @@ from numbers import Number import re from typing import Pattern -import warnings import numpy as np from pandas._libs import lib -from pandas._typing import ArrayLike -from pandas.util._exceptions import find_stack_level is_bool = lib.is_bool @@ -425,42 +422,3 @@ def is_dataclass(item): return is_dataclass(item) and not isinstance(item, type) except ImportError: return False - - -def is_inferred_bool_dtype(arr: ArrayLike) -> bool: - """ - Check if this is a ndarray[bool] or an ndarray[object] of bool objects. - - Parameters - ---------- - arr : np.ndarray or ExtensionArray - - Returns - ------- - bool - - Notes - ----- - This does not include the special treatment is_bool_dtype uses for - Categorical. - """ - if not isinstance(arr, np.ndarray): - return False - - dtype = arr.dtype - if dtype == np.dtype(bool): - return True - elif dtype == np.dtype("object"): - result = lib.is_bool_array(arr) - if result: - # GH#46188 - warnings.warn( - "In a future version, object-dtype columns with all-bool values " - "will not be included in reductions with bool_only=True. " - "Explicitly cast to bool dtype instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return result - - return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc2bca1bcece6..a8631f42fb2d6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -33,7 +33,7 @@ import warnings import numpy as np -import numpy.ma as ma +from numpy import ma from pandas._config import get_option @@ -95,7 +95,6 @@ from pandas.util._decorators import ( Appender, Substitution, - deprecate_kwarg, deprecate_nonkeyword_arguments, doc, rewrite_axis_style_signature, @@ -124,7 +123,6 @@ is_1d_only_ea_dtype, is_bool_dtype, is_dataclass, - is_datetime64_any_dtype, is_dict_like, is_dtype_equal, is_extension_array_dtype, @@ -189,8 +187,7 @@ ) from pandas.core.indexing import ( check_bool_indexer, - check_deprecated_indexers, - convert_to_index_sliceable, + check_dict_or_set_indexers, ) from pandas.core.internals import ( ArrayManager, @@ -490,7 +487,9 @@ class DataFrame(NDFrame, OpsMixin): data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame Dict can contain Series, arrays, constants, dataclass or list-like objects. If data is a dict, column order follows insertion-order. If a dict contains Series - which have an index defined, it is aligned by its index. + which have an index defined, it is aligned by its index. This alignment also + occurs if data is a Series or a DataFrame itself. Alignment is done on + Series/DataFrame inputs. .. versionchanged:: 0.25.0 If data is a list of dicts, column order follows insertion-order. @@ -592,6 +591,22 @@ class DataFrame(NDFrame, OpsMixin): 0 0 0 1 0 3 2 2 3 + + Constructing DataFrame from Series/DataFrame: + + >>> ser = pd.Series([1, 2, 3], index=["a", "b", "c"]) + >>> df = pd.DataFrame(data=ser, index=["a", "c"]) + >>> df + 0 + a 1 + c 3 + + >>> df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"], columns=["x"]) + >>> df2 = pd.DataFrame(data=df1, index=["a", "c"]) + >>> df2 + x + a 1 + c 3 """ _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set @@ -667,7 +682,7 @@ def __init__( # GH#38939 de facto copy defaults to False only in non-dict cases mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) elif isinstance(data, ma.MaskedArray): - import numpy.ma.mrecords as mrecords + from numpy.ma import mrecords # masked recarray if isinstance(data, mrecords.MaskedRecords): @@ -1644,7 +1659,7 @@ def from_dict( 'tight' as an allowed value for the ``orient`` argument dtype : dtype, default None - Data type to force, otherwise infer. + Data type to force after DataFrame construction, otherwise infer. columns : list, default None Column labels to use when ``orient='index'``. Raises a ValueError if used with ``orient='columns'`` or ``orient='tight'``. @@ -1855,9 +1870,6 @@ def to_dict( [{column -> value}, ... , {column -> value}] - 'index' : dict like {index -> {column -> value}} - Abbreviations are allowed. `s` indicates `series` and `sp` - indicates `split`. - .. versionadded:: 1.4.0 'tight' as an allowed value for the ``orient`` argument @@ -1948,36 +1960,6 @@ def to_dict( # variable has type "Literal['dict', 'list', 'series', 'split', 'tight', # 'records', 'index']") orient = orient.lower() # type: ignore[assignment] - # GH32515 - if orient.startswith(("d", "l", "s", "r", "i")) and orient not in { - "dict", - "list", - "series", - "split", - "records", - "index", - }: - warnings.warn( - "Using short name for 'orient' is deprecated. Only the " - "options: ('dict', list, 'series', 'split', 'records', 'index') " - "will be used in a future version. Use one of the above " - "to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if orient.startswith("d"): - orient = "dict" - elif orient.startswith("l"): - orient = "list" - elif orient.startswith("sp"): - orient = "split" - elif orient.startswith("s"): - orient = "series" - elif orient.startswith("r"): - orient = "records" - elif orient.startswith("i"): - orient = "index" if not index and orient not in ["split", "tight"]: raise ValueError( @@ -2583,7 +2565,6 @@ def _from_arrays( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path", ) - @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_stata( self, path: FilePath | WriteBuffer[bytes], @@ -2743,7 +2724,6 @@ def to_stata( ) writer.write_file() - @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: """ Write a DataFrame to the binary Feather format. @@ -2853,7 +2833,6 @@ def to_parquet( ... @doc(storage_options=_shared_docs["storage_options"]) - @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( self, path: FilePath | WriteBuffer[bytes] | None = None, @@ -3417,17 +3396,7 @@ def info( max_cols: int | None = None, memory_usage: bool | str | None = None, show_counts: bool | None = None, - null_counts: bool | None = None, ) -> None: - if null_counts is not None: - if show_counts is not None: - raise ValueError("null_counts used with show_counts. Use show_counts.") - warnings.warn( - "null_counts is deprecated. Use show_counts instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - show_counts = null_counts info = DataFrameInfo( data=self, memory_usage=memory_usage, @@ -3733,7 +3702,7 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]: yield self._get_column_array(i) def __getitem__(self, key): - check_deprecated_indexers(key) + check_dict_or_set_indexers(key) key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) @@ -3753,17 +3722,18 @@ def __getitem__(self, key): elif is_mi and self.columns.is_unique and key in self.columns: return self._getitem_multilevel(key) # Do we have a slicer (on rows)? - indexer = convert_to_index_sliceable(self, key) - if indexer is not None: + if isinstance(key, slice): + indexer = self.index._convert_slice_indexer( + key, kind="getitem", is_frame=True + ) if isinstance(indexer, np.ndarray): + # reachable with DatetimeIndex indexer = lib.maybe_indices_to_slice( indexer.astype(np.intp, copy=False), len(self) ) if isinstance(indexer, np.ndarray): # GH#43223 If we can not convert, use take return self.take(indexer, axis=0) - # either we have a slice or we have a string that can be converted - # to a slice for partial-string date indexing return self._slice(indexer, axis=0) # Do we have a (boolean) DataFrame? @@ -3933,11 +3903,9 @@ def __setitem__(self, key, value): key = com.apply_if_callable(key, self) # see if we can slice the rows - indexer = convert_to_index_sliceable(self, key) - if indexer is not None: - # either we have a slice or we have a string that can be converted - # to a slice for partial-string date indexing - return self._setitem_slice(indexer, value) + if isinstance(key, slice): + slc = self.index._convert_slice_indexer(key, kind="getitem", is_frame=True) + return self._setitem_slice(slc, value) if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: self._setitem_frame(key, value) @@ -4900,69 +4868,6 @@ def _series(self): for idx, item in enumerate(self.columns) } - def lookup( - self, row_labels: Sequence[IndexLabel], col_labels: Sequence[IndexLabel] - ) -> np.ndarray: - """ - Label-based "fancy indexing" function for DataFrame. - - .. deprecated:: 1.2.0 - DataFrame.lookup is deprecated, - use pandas.factorize and NumPy indexing instead. - For further details see - :ref:`Looking up values by index/column labels `. - - Given equal-length arrays of row and column labels, return an - array of the values corresponding to each (row, col) pair. - - Parameters - ---------- - row_labels : sequence - The row labels to use for lookup. - col_labels : sequence - The column labels to use for lookup. - - Returns - ------- - numpy.ndarray - The found values. - """ - msg = ( - "The 'lookup' method is deprecated and will be " - "removed in a future version. " - "You can use DataFrame.melt and DataFrame.loc " - "as a substitute." - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - - n = len(row_labels) - if n != len(col_labels): - raise ValueError("Row labels must have same size as column labels") - if not (self.index.is_unique and self.columns.is_unique): - # GH#33041 - raise ValueError("DataFrame.lookup requires unique index and columns") - - thresh = 1000 - if not self._is_mixed_type or n > thresh: - values = self.values - ridx = self.index.get_indexer(row_labels) - cidx = self.columns.get_indexer(col_labels) - if (ridx == -1).any(): - raise KeyError("One or more row labels was not found") - if (cidx == -1).any(): - raise KeyError("One or more column labels was not found") - flat_index = ridx * len(self.columns) + cidx - result = values.flat[flat_index] - else: - result = np.empty(n, dtype="O") - for i, (r, c) in enumerate(zip(row_labels, col_labels)): - result[i] = self._get_value(r, c) - - if is_object_dtype(result): - result = lib.maybe_convert_objects(result) - - return result - # ---------------------------------------------------------------------- # Reindexing and alignment @@ -5076,40 +4981,6 @@ def align( broadcast_axis=broadcast_axis, ) - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: Literal[False] | lib.NoDefault = ..., - copy: bool | lib.NoDefault = ..., - ) -> DataFrame: - ... - - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: Literal[True], - copy: bool | lib.NoDefault = ..., - ) -> None: - ... - - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: bool | lib.NoDefault = ..., - copy: bool | lib.NoDefault = ..., - ) -> DataFrame | None: - ... - - # error: Signature of "set_axis" incompatible with supertype "NDFrame" @Appender( """ Examples @@ -5153,10 +5024,9 @@ def set_axis( labels, *, axis: Axis = 0, - inplace: bool | lib.NoDefault = lib.no_default, - copy: bool | lib.NoDefault = lib.no_default, - ): - return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) + copy: bool = True, + ) -> DataFrame: + return super().set_axis(labels, axis=axis, copy=copy) @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.reindex.__doc__) @@ -5221,12 +5091,10 @@ def drop( ) -> DataFrame | None: ... - # error: Signature of "drop" incompatible with supertype "NDFrame" - # github.com/python/mypy/issues/12387 - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) - def drop( # type: ignore[override] + def drop( self, labels: IndexLabel = None, + *, axis: Axis = 0, index: IndexLabel = None, columns: IndexLabel = None, @@ -5596,11 +5464,11 @@ def fillna( ... # error: Signature of "fillna" incompatible with supertype "NDFrame" - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( # type: ignore[override] self, value: Hashable | Mapping | Series | DataFrame = None, + *, method: FillnaOptions | None = None, axis: Axis | None = None, inplace: bool = False, @@ -5853,10 +5721,10 @@ def set_index( ) -> None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "keys"]) def set_index( self, keys, + *, drop: bool = True, append: bool = False, inplace: bool = False, @@ -6098,10 +5966,10 @@ def reset_index( ) -> DataFrame | None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) def reset_index( self, level: IndexLabel = None, + *, drop: bool = False, inplace: bool = False, col_level: Hashable = 0, @@ -6394,9 +6262,9 @@ def dropna( ) -> None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def dropna( self, + *, axis: Axis = 0, how: AnyAll | NoDefault = no_default, thresh: int | NoDefault = no_default, @@ -6556,10 +6424,10 @@ def dropna( self._update_inplace(result) return None - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"]) def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, + *, keep: DropKeep = "first", inplace: bool = False, ignore_index: bool = False, @@ -6964,10 +6832,9 @@ def sort_index( ) -> DataFrame | None: ... - # error: Signature of "sort_index" incompatible with supertype "NDFrame" - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def sort_index( # type: ignore[override] + def sort_index( self, + *, axis: Axis = 0, level: IndexLabel = None, ascending: bool | Sequence[bool] = True, @@ -8055,7 +7922,7 @@ def combine_first(self, other: DataFrame) -> DataFrame: 1 0.0 3.0 1.0 2 NaN 3.0 1.0 """ - import pandas.core.computation.expressions as expressions + from pandas.core.computation import expressions def combiner(x, y): mask = extract_array(isna(x)) @@ -8198,7 +8065,7 @@ def update( 1 2 500.0 2 3 6.0 """ - import pandas.core.computation.expressions as expressions + from pandas.core.computation import expressions # TODO: Support other joins if join != "left": # pragma: no cover @@ -8519,9 +8386,8 @@ def groupby( @Substitution("") @Appender(_shared_docs["pivot"]) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def pivot( - self, index=lib.NoDefault, columns=lib.NoDefault, values=lib.NoDefault + self, *, index=lib.NoDefault, columns=lib.NoDefault, values=lib.NoDefault ) -> DataFrame: from pandas.core.reshape.pivot import pivot @@ -8975,7 +8841,11 @@ def explode( 3 4 1 e """ if not self.columns.is_unique: - raise ValueError("columns must be unique") + duplicate_cols = self.columns[self.columns.duplicated()].tolist() + raise ValueError( + "DataFrame columns must be unique. " + + f"Duplicate columns: {duplicate_cols}" + ) columns: list[Hashable] if is_scalar(column) or isinstance(column, tuple): @@ -9625,118 +9495,6 @@ def infer(x): # ---------------------------------------------------------------------- # Merging / joining methods - def append( - self, - other, - ignore_index: bool = False, - verify_integrity: bool = False, - sort: bool = False, - ) -> DataFrame: - """ - Append rows of `other` to the end of caller, returning a new object. - - .. deprecated:: 1.4.0 - Use :func:`concat` instead. For further details see - :ref:`whatsnew_140.deprecations.frame_series_append` - - Columns in `other` that are not in the caller are added as new columns. - - Parameters - ---------- - other : DataFrame or Series/dict-like object, or list of these - The data to append. - ignore_index : bool, default False - If True, the resulting axis will be labeled 0, 1, …, n - 1. - verify_integrity : bool, default False - If True, raise ValueError on creating index with duplicates. - sort : bool, default False - Sort columns if the columns of `self` and `other` are not aligned. - - .. versionchanged:: 1.0.0 - - Changed to not sort by default. - - Returns - ------- - DataFrame - A new DataFrame consisting of the rows of caller and the rows of `other`. - - See Also - -------- - concat : General function to concatenate DataFrame or Series objects. - - Notes - ----- - If a list of dict/series is passed and the keys are all contained in - the DataFrame's index, the order of the columns in the resulting - DataFrame will be unchanged. - - Iteratively appending rows to a DataFrame can be more computationally - intensive than a single concatenate. A better solution is to append - those rows to a list and then concatenate the list with the original - DataFrame all at once. - - Examples - -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y']) - >>> df - A B - x 1 2 - y 3 4 - >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y']) - >>> df.append(df2) - A B - x 1 2 - y 3 4 - x 5 6 - y 7 8 - - With `ignore_index` set to True: - - >>> df.append(df2, ignore_index=True) - A B - 0 1 2 - 1 3 4 - 2 5 6 - 3 7 8 - - The following, while not recommended methods for generating DataFrames, - show two ways to generate a DataFrame from multiple data sources. - - Less efficient: - - >>> df = pd.DataFrame(columns=['A']) - >>> for i in range(5): - ... df = df.append({'A': i}, ignore_index=True) - >>> df - A - 0 0 - 1 1 - 2 2 - 3 3 - 4 4 - - More efficient: - - >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], - ... ignore_index=True) - A - 0 0 - 1 1 - 2 2 - 3 3 - 4 4 - """ - warnings.warn( - "The frame.append method is deprecated " - "and will be removed from pandas in a future version. " - "Use pandas.concat instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - return self._append(other, ignore_index, verify_integrity, sort) - def _append( self, other, @@ -10755,29 +10513,6 @@ def _reduce( assert filter_type is None or filter_type == "bool", filter_type out_dtype = "bool" if filter_type == "bool" else None - if numeric_only is None and name in ["mean", "median"]: - own_dtypes = [arr.dtype for arr in self._mgr.arrays] - - dtype_is_dt = np.array( - [is_datetime64_any_dtype(dtype) for dtype in own_dtypes], - dtype=bool, - ) - if dtype_is_dt.any(): - warnings.warn( - "DataFrame.mean and DataFrame.median with numeric_only=None " - "will include datetime64 and datetime64tz columns in a " - "future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # Non-copy equivalent to - # dt64_cols = self.dtypes.apply(is_datetime64_any_dtype) - # cols = self.columns[~dt64_cols] - # self = self[cols] - predicate = lambda x: not is_datetime64_any_dtype(x.dtype) - mgr = self._mgr._get_data_subset(predicate) - self = type(self)(mgr) - # TODO: Make other agg func handle axis=None properly GH#21597 axis = self._get_axis_number(axis) labels = self._get_agg_axis(axis) @@ -11352,8 +11087,6 @@ def resample( label: str | None = None, convention: str = "start", kind: str | None = None, - loffset=None, - base: int | None = None, on: Level = None, level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -11367,8 +11100,6 @@ def resample( label=label, convention=convention, kind=kind, - loffset=loffset, - base=base, on=on, level=level, origin=origin, @@ -11794,10 +11525,10 @@ def clip( ) -> DataFrame | None: return super().clip(lower, upper, axis=axis, inplace=inplace, **kwargs) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) def interpolate( self: DataFrame, method: str = "linear", + *, axis: Axis = 0, limit: int | None = None, inplace: bool = False, @@ -11807,13 +11538,13 @@ def interpolate( **kwargs, ) -> DataFrame | None: return super().interpolate( - method, - axis, - limit, - inplace, - limit_direction, - limit_area, - downcast, + method=method, + axis=axis, + limit=limit, + inplace=inplace, + limit_direction=limit_direction, + limit_area=limit_area, + downcast=downcast, **kwargs, ) @@ -11826,7 +11557,6 @@ def where( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> DataFrame: ... @@ -11839,7 +11569,6 @@ def where( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> None: ... @@ -11852,13 +11581,10 @@ def where( inplace: bool = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> DataFrame | None: ... - # error: Signature of "where" incompatible with supertype "NDFrame" - @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) - def where( # type: ignore[override] + def where( self, cond, other=lib.no_default, @@ -11866,7 +11592,6 @@ def where( # type: ignore[override] inplace: bool = False, axis: Axis | None = None, level: Level = None, - errors: IgnoreRaise | lib.NoDefault = "raise", ) -> DataFrame | None: return super().where( cond, @@ -11885,7 +11610,6 @@ def mask( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> DataFrame: ... @@ -11898,7 +11622,6 @@ def mask( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> None: ... @@ -11911,13 +11634,10 @@ def mask( inplace: bool = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> DataFrame | None: ... - # error: Signature of "mask" incompatible with supertype "NDFrame" - @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) - def mask( # type: ignore[override] + def mask( self, cond, other=lib.no_default, @@ -11925,7 +11645,6 @@ def mask( # type: ignore[override] inplace: bool = False, axis: Axis | None = None, level: Level = None, - errors: IgnoreRaise | lib.NoDefault = "raise", ) -> DataFrame | None: return super().mask( cond, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a1f799ec5122a..4a0f31357079f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3,7 +3,6 @@ import collections from datetime import timedelta -import functools import gc import json import operator @@ -87,8 +86,6 @@ SettingWithCopyWarning, ) from pandas.util._decorators import ( - deprecate_kwarg, - deprecate_nonkeyword_arguments, doc, rewrite_axis_style_signature, ) @@ -145,10 +142,7 @@ from pandas.core.array_algos.replace import should_use_regex from pandas.core.arrays import ExtensionArray from pandas.core.base import PandasObject -from pandas.core.construction import ( - create_series_with_explicit_dtype, - extract_array, -) +from pandas.core.construction import extract_array from pandas.core.describe import describe_ndframe from pandas.core.flags import Flags from pandas.core.indexes.api import ( @@ -698,47 +692,13 @@ def size(self) -> int: # expected "int") [return-value] return np.prod(self.shape) # type: ignore[return-value] - @overload - def set_axis( - self: NDFrameT, - labels, - *, - axis: Axis = ..., - inplace: Literal[False] | lib.NoDefault = ..., - copy: bool_t | lib.NoDefault = ..., - ) -> NDFrameT: - ... - - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: Literal[True], - copy: bool_t | lib.NoDefault = ..., - ) -> None: - ... - - @overload - def set_axis( - self: NDFrameT, - labels, - *, - axis: Axis = ..., - inplace: bool_t | lib.NoDefault = ..., - copy: bool_t | lib.NoDefault = ..., - ) -> NDFrameT | None: - ... - def set_axis( self: NDFrameT, labels, *, axis: Axis = 0, - inplace: bool_t | lib.NoDefault = lib.no_default, - copy: bool_t | lib.NoDefault = lib.no_default, - ) -> NDFrameT | None: + copy: bool_t = True, + ) -> NDFrameT: """ Assign desired index to given axis. @@ -754,11 +714,6 @@ def set_axis( The axis to update. The value 0 identifies the rows. For `Series` this parameter is unused and defaults to 0. - inplace : bool, default False - Whether to return a new %(klass)s instance. - - .. deprecated:: 1.5.0 - copy : bool, default True Whether to make a copy of the underlying data. @@ -766,33 +721,14 @@ def set_axis( Returns ------- - renamed : %(klass)s or None - An object of type %(klass)s or None if ``inplace=True``. + renamed : %(klass)s + An object of type %(klass)s. See Also -------- %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. """ - if inplace is not lib.no_default: - warnings.warn( - f"{type(self).__name__}.set_axis 'inplace' keyword is deprecated " - "and will be removed in a future version. Use " - "`obj = obj.set_axis(..., copy=False)` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - inplace = False - - if inplace: - if copy is True: - raise ValueError("Cannot specify both inplace=True and copy=True") - copy = False - elif copy is lib.no_default: - copy = True - - self._check_inplace_and_allows_duplicate_labels(inplace) - return self._set_axis_nocheck(labels, axis, inplace, copy=copy) + return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy) @final def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): @@ -1054,7 +990,7 @@ def _rename( raise TypeError( "Cannot specify both 'axis' and any of 'index' or 'columns'" ) - elif mapper is not None: + if mapper is not None: raise TypeError( "Cannot specify both 'mapper' and any of 'index' or 'columns'" ) @@ -2192,8 +2128,6 @@ def _repr_data_resource_(self): # I/O Methods @final - @deprecate_kwarg(old_arg_name="verbose", new_arg_name=None) - @deprecate_kwarg(old_arg_name="encoding", new_arg_name=None) @doc( klass="object", storage_options=_shared_docs["storage_options"], @@ -2213,9 +2147,7 @@ def to_excel( startcol: int = 0, engine: str | None = None, merge_cells: bool_t = True, - encoding: lib.NoDefault = lib.no_default, inf_rep: str = "inf", - verbose: lib.NoDefault = lib.no_default, freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions = None, ) -> None: @@ -2260,35 +2192,14 @@ def to_excel( Upper left cell column to dump data frame. engine : str, optional Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this - via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and + via the options ``io.excel.xlsx.writer`` or ``io.excel.xlsm.writer``. - .. deprecated:: 1.2.0 - - As the `xlwt `__ package is no longer - maintained, the ``xlwt`` engine will be removed in a future version - of pandas. - merge_cells : bool, default True Write MultiIndex and Hierarchical Rows as merged cells. - encoding : str, optional - Encoding of the resulting excel file. Only necessary for xlwt, - other writers support unicode natively. - - .. deprecated:: 1.5.0 - - This keyword was not used. - inf_rep : str, default 'inf' Representation for infinity (there is no native representation for infinity in Excel). - verbose : bool, default True - Display more information in the error logs. - - .. deprecated:: 1.5.0 - - This keyword was not used. - freeze_panes : tuple of int (length 2), optional Specifies the one-based bottommost row and rightmost column that is to be frozen. @@ -3537,7 +3448,6 @@ def to_csv( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path_or_buf", ) - @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator") def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3998,12 +3908,7 @@ class animal locomotion labels = self._get_axis(axis) if isinstance(key, list): - warnings.warn( - "Passing lists as key for xs is deprecated and will be removed in a " - "future version. Pass key as a tuple instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + raise TypeError("list keys are not supported in xs, pass a tuple instead") if level is not None: if not isinstance(labels, MultiIndex): @@ -4192,7 +4097,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False): if value == "raise": raise SettingWithCopyError(t) - elif value == "warn": + if value == "warn": warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) def __delitem__(self, key) -> None: @@ -4462,10 +4367,10 @@ def drop( ) -> NDFrameT | None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) def drop( self: NDFrameT, labels: IndexLabel = None, + *, axis: Axis = 0, index: IndexLabel = None, columns: IndexLabel = None, @@ -4663,7 +4568,7 @@ def add_prefix(self: NDFrameT, prefix: str, axis: Axis | None = None) -> NDFrame 2 3 5 3 4 6 """ - f = functools.partial("{prefix}{}".format, prefix=prefix) + f = lambda x: f"{prefix}{x}" axis_name = self._info_axis_name if axis is not None: @@ -4737,7 +4642,7 @@ def add_suffix(self: NDFrameT, suffix: str, axis: Axis | None = None) -> NDFrame 2 3 5 3 4 6 """ - f = functools.partial("{}{suffix}".format, suffix=suffix) + f = lambda x: f"{x}{suffix}" axis_name = self._info_axis_name if axis is not None: @@ -5000,6 +4905,7 @@ def sort_index( def sort_index( self: NDFrameT, + *, axis: Axis = 0, level: IndexLabel = None, ascending: bool_t | Sequence[bool_t] = True, @@ -6848,9 +6754,9 @@ def fillna( if inplace: return None return self.copy() - value = create_series_with_explicit_dtype( - value, dtype_if_empty=object - ) + from pandas import Series + + value = Series(value) value = value.reindex(self.index, copy=False) value = value._values elif not is_list_like(value): @@ -7312,6 +7218,7 @@ def replace( def interpolate( self: NDFrameT, method: str = "linear", + *, axis: Axis = 0, limit: int | None = None, inplace: bool_t = False, @@ -8420,7 +8327,7 @@ def between_time( ) # If any of the deprecated arguments ('include_start', 'include_end') # have been passed - elif old_include_arg_used: + if old_include_arg_used: warnings.warn( "`include_start` and `include_end` are deprecated in " "favour of `inclusive`.", @@ -8458,8 +8365,6 @@ def resample( label: str | None = None, convention: str = "start", kind: str | None = None, - loffset=None, - base: int | None = None, on: Level = None, level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -8497,20 +8402,6 @@ def resample( Pass 'timestamp' to convert the resulting index to a `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. By default the input representation is retained. - loffset : timedelta, default None - Adjust the resampled time labels. - - .. deprecated:: 1.1.0 - You should add the loffset to the `df.index` after the resample. - See below. - - base : int, default 0 - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. - - .. deprecated:: 1.1.0 - The new arguments that you should use are 'offset' or 'origin'. on : str, optional For a DataFrame, column to use instead of index for resampling. @@ -8846,31 +8737,6 @@ def resample( 2000-10-02 00:12:00 45 2000-10-02 00:29:00 45 Freq: 17T, dtype: int64 - - To replace the use of the deprecated `base` argument, you can now use `offset`, - in this example it is equivalent to have `base=2`: - - >>> ts.resample('17min', offset='2min').sum() - 2000-10-01 23:16:00 0 - 2000-10-01 23:33:00 9 - 2000-10-01 23:50:00 36 - 2000-10-02 00:07:00 39 - 2000-10-02 00:24:00 24 - Freq: 17T, dtype: int64 - - To replace the use of the deprecated `loffset` argument: - - >>> from pandas.tseries.frequencies import to_offset - >>> loffset = '19min' - >>> ts_out = ts.resample('17min').sum() - >>> ts_out.index = ts_out.index + to_offset(loffset) - >>> ts_out - 2000-10-01 23:33:00 0 - 2000-10-01 23:50:00 9 - 2000-10-02 00:07:00 21 - 2000-10-02 00:24:00 54 - 2000-10-02 00:41:00 24 - Freq: 17T, dtype: int64 """ from pandas.core.resample import get_resampler @@ -8882,9 +8748,7 @@ def resample( closed=closed, axis=axis, kind=kind, - loffset=loffset, convention=convention, - base=base, key=on, level=level, origin=origin, @@ -9703,7 +9567,7 @@ def _where( if axis is None and not other._indexed_same(self): raise InvalidIndexError - elif other.ndim < self.ndim: + if other.ndim < self.ndim: # TODO(EA2D): avoid object-dtype cast in EA case GH#38729 other = other._values if axis == 0: @@ -9774,7 +9638,6 @@ def where( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> NDFrameT: ... @@ -9787,7 +9650,6 @@ def where( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> None: ... @@ -9800,11 +9662,9 @@ def where( inplace: bool_t = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> NDFrameT | None: ... - @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) @doc( klass=_shared_doc_kwargs["klass"], cond="True", @@ -9820,7 +9680,6 @@ def where( inplace: bool_t = False, axis: Axis | None = None, level: Level = None, - errors: IgnoreRaise | lib.NoDefault = "raise", ) -> NDFrameT | None: """ Replace values where the condition is {cond_rev}. @@ -9849,15 +9708,6 @@ def where( unused and defaults to 0. level : int, default None Alignment level if needed. - errors : str, {{'raise', 'ignore'}}, default 'raise' - Note that currently this parameter won't affect - the results and will always coerce to a suitable dtype. - - - 'raise' : allow exceptions to be raised. - - 'ignore' : suppress exceptions. On error return original object. - - .. deprecated:: 1.5.0 - This argument had no effect. Returns ------- @@ -9980,7 +9830,6 @@ def mask( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> NDFrameT: ... @@ -9993,7 +9842,6 @@ def mask( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> None: ... @@ -10006,11 +9854,9 @@ def mask( inplace: bool_t = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> NDFrameT | None: ... - @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) @doc( where, klass=_shared_doc_kwargs["klass"], @@ -10027,7 +9873,6 @@ def mask( inplace: bool_t = False, axis: Axis | None = None, level: Level = None, - errors: IgnoreRaise | lib.NoDefault = "raise", ) -> NDFrameT | None: inplace = validate_bool_kwarg(inplace, "inplace") @@ -10392,8 +10237,7 @@ def _tz_convert(ax, tz): raise TypeError( f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" ) - else: - ax = DatetimeIndex([], tz=tz) + ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_convert(tz) return ax @@ -10562,8 +10406,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): raise TypeError( f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" ) - else: - ax = DatetimeIndex([], tz=tz) + ax = DatetimeIndex([], tz=tz) else: ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) return ax @@ -10592,7 +10435,6 @@ def describe( percentiles=None, include=None, exclude=None, - datetime_is_numeric: bool_t = False, ) -> NDFrameT: """ Generate descriptive statistics. @@ -10638,12 +10480,6 @@ def describe( ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To exclude pandas categorical columns, use ``'category'`` - None (default) : The result will exclude nothing. - datetime_is_numeric : bool, default False - Whether to treat datetime dtypes as numeric. This affects statistics - calculated for the column. For DataFrame input, this also - controls whether datetime columns are included by default. - - .. versionadded:: 1.1.0 Returns ------- @@ -10721,7 +10557,7 @@ def describe( ... np.datetime64("2010-01-01"), ... np.datetime64("2010-01-01") ... ]) - >>> s.describe(datetime_is_numeric=True) + >>> s.describe() count 3 mean 2006-09-01 08:00:00 min 2000-01-01 00:00:00 @@ -10839,7 +10675,6 @@ def describe( obj=self, include=include, exclude=exclude, - datetime_is_numeric=datetime_is_numeric, percentiles=percentiles, ) @@ -11429,11 +11264,6 @@ def _add_numeric_operations(cls) -> None: """ axis_descr, name1, name2 = _doc_params(cls) - @deprecate_nonkeyword_arguments( - version=None, - allowed_args=["self"], - name="DataFrame.any and Series.any", - ) @doc( _bool_doc, desc=_any_desc, @@ -11446,13 +11276,21 @@ def _add_numeric_operations(cls) -> None: ) def any( self, + *, axis: Axis = 0, bool_only=None, skipna: bool_t = True, level=None, **kwargs, ): - return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) + return NDFrame.any( + self, + axis=axis, + bool_only=bool_only, + skipna=skipna, + level=level, + **kwargs, + ) setattr(cls, "any", any) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 42630845bf6b2..a2e9c059cbcc9 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -4,10 +4,7 @@ from __future__ import annotations import dataclasses -from typing import ( - Hashable, - Literal, -) +from typing import Hashable @dataclasses.dataclass(order=True, frozen=True) @@ -61,15 +58,6 @@ class OutputKey: # produces a result that has the same shape as the group. -# TODO(2.0) Remove after pad/backfill deprecation enforced -def maybe_normalize_deprecated_kernels(kernel) -> Literal["bfill", "ffill"]: - if kernel == "backfill": - kernel = "bfill" - elif kernel == "pad": - kernel = "ffill" - return kernel - - transformation_kernels = frozenset( [ "bfill", diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 16732f5421df7..8e4607adddd50 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -80,7 +80,6 @@ ) from pandas.core.arrays.categorical import Categorical import pandas.core.common as com -from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.frame import DataFrame from pandas.core.groupby import base from pandas.core.groupby.groupby import ( @@ -96,6 +95,7 @@ Index, MultiIndex, all_indexes_same, + default_index, ) from pandas.core.indexes.category import CategoricalIndex from pandas.core.series import Series @@ -295,9 +295,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # result is a dict whose keys are the elements of result_index index = self.grouper.result_index - return create_series_with_explicit_dtype( - result, index=index, dtype_if_empty=object - ) + return Series(result, index=index) agg = aggregate @@ -309,7 +307,7 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: # GH 15931 raise SpecificationError("nested renamer is not supported") - elif any(isinstance(x, (tuple, list)) for x in arg): + if any(isinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] # indicated column order @@ -332,7 +330,7 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: from pandas import concat res_df = concat( - results.values(), axis=1, keys=[key.label for key in results.keys()] + results.values(), axis=1, keys=[key.label for key in results] ) return res_df @@ -1141,8 +1139,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) result = gba.agg() except ValueError as err: - if "no results" not in str(err): - # raised directly by _aggregate_multiple_funcs + if "No objects to concatenate" not in str(err): raise result = self._aggregate_frame(func) @@ -1163,7 +1160,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if not self.as_index: self._insert_inaxis_grouper_inplace(result) - result.index = Index(range(len(result))) + result.index = default_index(len(result)) return result @@ -1294,10 +1291,8 @@ def _wrap_applied_output_series( key_index, override_group_keys: bool, ) -> DataFrame | Series: - # this is to silence a DeprecationWarning - # TODO(2.0): Remove when default dtype of empty Series is object kwargs = first_not_none._construct_axes_dict() - backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs) + backup = Series(**kwargs) values = [x if (x is not None) else backup for x in values] all_indexed_same = all_indexes_same(x.index for x in values) @@ -1595,12 +1590,10 @@ def __getitem__(self, key) -> DataFrameGroupBy | SeriesGroupBy: # per GH 23566 if isinstance(key, tuple) and len(key) > 1: # if len == 1, then it becomes a SeriesGroupBy and this is actually - # valid syntax, so don't raise warning - warnings.warn( - "Indexing with multiple keys (implicitly converted to a tuple " - "of keys) will be deprecated, use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), + # valid syntax, so don't raise + raise ValueError( + "Cannot subset columns with a tuple with more than one element. " + "Use a list instead." ) return super().__getitem__(key) @@ -1786,7 +1779,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: ) if not self.as_index: - results.index = Index(range(len(results))) + results.index = default_index(len(results)) self._insert_inaxis_grouper_inplace(results) return results diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a0f83e13c4ece..edda5492aca6d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -88,9 +88,12 @@ class providing the base-class of operations. notna, ) -from pandas.core import nanops +from pandas.core import ( + algorithms, + nanops, + sample, +) from pandas.core._numba import executor -import pandas.core.algorithms as algorithms from pandas.core.arrays import ( BaseMaskedArray, BooleanArray, @@ -121,7 +124,6 @@ class providing the base-class of operations. RangeIndex, ) from pandas.core.internals.blocks import ensure_block_shape -import pandas.core.sample as sample from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter from pandas.core.util.numba_ import ( @@ -1757,7 +1759,7 @@ def _cython_agg_general( raise NotImplementedError( f"{type(self).__name__}.{how} does not implement {kwd_name}." ) - elif not is_ser: + if not is_ser: data = data.get_numeric_data(copy=False) def array_func(values: ArrayLike) -> ArrayLike: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 7ae6495f15541..1cc5e90f9a3a4 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -11,7 +11,6 @@ Iterator, final, ) -import warnings import numpy as np @@ -23,7 +22,6 @@ ) from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -31,7 +29,7 @@ is_scalar, ) -import pandas.core.algorithms as algorithms +from pandas.core import algorithms from pandas.core.arrays import ( Categorical, ExtensionArray, @@ -86,23 +84,6 @@ class Grouper: Only when `freq` parameter is passed. convention : {'start', 'end', 'e', 's'} If grouper is PeriodIndex and `freq` parameter is passed. - base : int, default 0 - Only when `freq` parameter is passed. - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. - - .. deprecated:: 1.1.0 - The new arguments that you should use are 'offset' or 'origin'. - - loffset : str, DateOffset, timedelta object - Only when `freq` parameter is passed. - - .. deprecated:: 1.1.0 - loffset is only working for ``.resample(...)`` and not for - Grouper (:issue:`28302`). - However, loffset is also deprecated for ``.resample(...)`` - See: :class:`DataFrame.resample` origin : Timestamp or str, default 'start_day' The timestamp on which to adjust the grouping. The timezone of origin must @@ -266,7 +247,6 @@ def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: from pandas.core.resample import TimeGrouper - _check_deprecated_resample_kwargs(kwargs, origin=cls) cls = TimeGrouper return super().__new__(cls) @@ -921,7 +901,7 @@ def is_in_obj(gpr) -> bool: if len(groupings) == 0 and len(obj): raise ValueError("No group keys passed!") - elif len(groupings) == 0: + if len(groupings) == 0: groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) # create the internals grouper @@ -954,51 +934,3 @@ def _convert_grouper(axis: Index, grouper): return grouper else: return grouper - - -def _check_deprecated_resample_kwargs(kwargs, origin) -> None: - """ - Check for use of deprecated parameters in ``resample`` and related functions. - - Raises the appropriate warnings if these parameters are detected. - Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629). - - Parameters - ---------- - kwargs : dict - Dictionary of keyword arguments to check for deprecated parameters. - origin : object - From where this function is being called; either Grouper or TimeGrouper. Used - to determine an approximate stacklevel. - """ - # Deprecation warning of `base` and `loffset` since v1.1.0: - # we are raising the warning here to be able to set the `stacklevel` - # properly since we need to raise the `base` and `loffset` deprecation - # warning from three different cases: - # core/generic.py::NDFrame.resample - # core/groupby/groupby.py::GroupBy.resample - # core/groupby/grouper.py::Grouper - # raising these warnings from TimeGrouper directly would fail the test: - # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base - - if kwargs.get("base", None) is not None: - warnings.warn( - "'base' in .resample() and in Grouper() is deprecated.\n" - "The new arguments that you should use are 'offset' or 'origin'.\n" - '\n>>> df.resample(freq="3s", base=2)\n' - "\nbecomes:\n" - '\n>>> df.resample(freq="3s", offset="2s")\n', - FutureWarning, - stacklevel=find_stack_level(), - ) - if kwargs.get("loffset", None) is not None: - warnings.warn( - "'loffset' in .resample() and in Grouper() is deprecated.\n" - '\n>>> df.resample(freq="3s", loffset="8H")\n' - "\nbecomes:\n" - "\n>>> from pandas.tseries.frequencies import to_offset" - '\n>>> df = df.resample(freq="3s").mean()' - '\n>>> df.index = df.index.to_timestamp() + to_offset("8H")\n', - FutureWarning, - stacklevel=find_stack_level(), - ) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index bf3f74330e8cb..c20fe34a178f5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -169,7 +169,7 @@ def _get_cython_function( f"function is not implemented for this dtype: " f"[how->{how},dtype->{dtype_str}]" ) - elif "object" not in f.__signatures__: + if "object" not in f.__signatures__: # raise NotImplementedError here rather than TypeError later raise NotImplementedError( f"function is not implemented for this dtype: " @@ -241,10 +241,10 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): # non-cython implementation. if how in ["sum", "prod", "cumsum", "cumprod"]: raise TypeError(f"{dtype} type does not support {how} operations") - elif how not in ["rank"]: + if how not in ["rank"]: # only "rank" is implemented in cython raise NotImplementedError(f"{dtype} dtype not supported") - elif not dtype.ordered: + if not dtype.ordered: # TODO: TypeError? raise NotImplementedError(f"{dtype} dtype not supported") @@ -628,7 +628,7 @@ def cython_operation( """ if values.ndim > 2: raise NotImplementedError("number of dimensions is currently limited to 2") - elif values.ndim == 2: + if values.ndim == 2: assert axis == 1, axis elif not is_1d_only_ea_dtype(values.dtype): # Note: it is *not* the case that axis is always 0 for 1-dim values, diff --git a/pandas/core/index.py b/pandas/core/index.py deleted file mode 100644 index 19e9c6b27e4e7..0000000000000 --- a/pandas/core/index.py +++ /dev/null @@ -1,37 +0,0 @@ -# pyright: reportUnusedImport = false -from __future__ import annotations - -import warnings - -from pandas.util._exceptions import find_stack_level - -from pandas.core.indexes.api import ( # noqa:F401 - CategoricalIndex, - DatetimeIndex, - Float64Index, - Index, - Int64Index, - IntervalIndex, - MultiIndex, - NaT, - NumericIndex, - PeriodIndex, - RangeIndex, - TimedeltaIndex, - UInt64Index, - _new_Index, - ensure_index, - ensure_index_from_sequences, - get_objs_combined_axis, -) -from pandas.core.indexes.multi import sparsify_labels # noqa:F401 - -# GH#30193 -warnings.warn( - "pandas.core.index is deprecated and will be removed in a future version. " - "The public classes are available in the top-level namespace.", - FutureWarning, - stacklevel=find_stack_level(), -) - -__all__: list[str] = [] diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py index 6431f12a08dc8..ba8a4f1d0ee7a 100644 --- a/pandas/core/indexers/__init__.py +++ b/pandas/core/indexers/__init__.py @@ -2,7 +2,7 @@ check_array_indexer, check_key_length, check_setitem_lengths, - deprecate_ndim_indexing, + disallow_ndim_indexing, is_empty_indexer, is_list_like_indexer, is_scalar_indexer, @@ -23,7 +23,7 @@ "validate_indices", "maybe_convert_indices", "length_of_indexer", - "deprecate_ndim_indexing", + "disallow_ndim_indexing", "unpack_1tuple", "check_key_length", "check_array_indexer", diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index 0f3cdc4195c85..90503876ee5d5 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -7,12 +7,10 @@ TYPE_CHECKING, Any, ) -import warnings import numpy as np from pandas._typing import AnyArrayLike -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_array_like, @@ -333,22 +331,18 @@ def length_of_indexer(indexer, target=None) -> int: raise AssertionError("cannot find the length of the indexer") -def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None: +def disallow_ndim_indexing(result) -> None: """ - Helper function to raise the deprecation warning for multi-dimensional - indexing on 1D Series/Index. + Helper function to disallow multi-dimensional indexing on 1D Series/Index. GH#27125 indexer like idx[:, None] expands dim, but we cannot do that - and keep an index, so we currently return ndarray, which is deprecated - (Deprecation GH#30588). + and keep an index, so we used to return ndarray, which was deprecated + in GH#30588. """ if np.ndim(result) > 1: - warnings.warn( - "Support for multi-dimensional indexing (e.g. `obj[:, None]`) " - "is deprecated and will be removed in a future " - "version. Convert to a numpy array before indexing instead.", - FutureWarning, - stacklevel=find_stack_level(), + raise ValueError( + "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer " + "supported. Convert to a numpy array before indexing instead." ) @@ -367,12 +361,9 @@ def unpack_1tuple(tup): if isinstance(tup, list): # GH#31299 - warnings.warn( + raise ValueError( "Indexing with a single-item list containing a " - "slice is deprecated and will raise in a future " - "version. Pass a tuple instead.", - FutureWarning, - stacklevel=find_stack_level(), + "slice is not allowed. Pass a tuple instead.", ) return tup[0] diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 46959aa5cd3e2..da2a0a2a87137 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -4,12 +4,9 @@ from __future__ import annotations from typing import TYPE_CHECKING -import warnings import numpy as np -from pandas.util._exceptions import find_stack_level - from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64_dtype, @@ -276,31 +273,6 @@ def isocalendar(self) -> DataFrame: """ return self._get_values().isocalendar().set_index(self._parent.index) - @property - def weekofyear(self): - """ - The week ordinal of the year according to the ISO 8601 standard. - - .. deprecated:: 1.1.0 - - Series.dt.weekofyear and Series.dt.week have been deprecated. Please - call :func:`Series.dt.isocalendar` and access the ``week`` column - instead. - """ - warnings.warn( - "Series.dt.weekofyear and Series.dt.week have been deprecated. " - "Please use Series.dt.isocalendar().week instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - week_series = self.isocalendar().week - week_series.name = self.name - if week_series.hasnans: - return week_series.astype("float64") - return week_series.astype("int64") - - week = weekofyear - @delegate_names( delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property" diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 58dd207bb4353..c562eaffd241d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -13,7 +13,6 @@ from pandas.errors import InvalidIndexError from pandas.core.dtypes.cast import find_common_type -from pandas.core.dtypes.common import is_dtype_equal from pandas.core.algorithms import safe_sort from pandas.core.indexes.base import ( @@ -276,7 +275,6 @@ def _find_common_index_dtype(inds): if kind == "special": result = indexes[0] - first = result dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] dti_tzs = [x for x in dtis if x.tz is not None] @@ -289,12 +287,6 @@ def _find_common_index_dtype(inds): if len(dtis) == len(indexes): sort = True - if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes): - # i.e. timezones mismatch - # TODO(2.0): once deprecation is enforced, this union will - # cast to UTC automatically. - indexes = [x.tz_convert("UTC") for x in indexes] - result = indexes[0] elif len(dtis) > 1: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1d3efe8bedd94..10c2349f05dfd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -41,7 +41,6 @@ IncompatibleFrequency, OutOfBoundsDatetime, Timestamp, - is_unitless, tz_compare, ) from pandas._typing import ( @@ -61,13 +60,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import ( DuplicateLabelError, - IntCastingNaNError, InvalidIndexError, ) from pandas.util._decorators import ( Appender, cache_readonly, - deprecate_nonkeyword_arguments, doc, ) from pandas.util._exceptions import ( @@ -107,7 +104,6 @@ is_scalar, is_signed_integer_dtype, is_string_dtype, - is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, validate_all_hashable, @@ -152,11 +148,6 @@ Categorical, ExtensionArray, ) -from pandas.core.arrays.datetimes import ( - tz_to_dtype, - validate_tz_from_dtype, -) -from pandas.core.arrays.sparse import SparseDtype from pandas.core.arrays.string_ import StringArray from pandas.core.base import ( IndexOpsMixin, @@ -168,7 +159,7 @@ extract_array, sanitize_array, ) -from pandas.core.indexers import deprecate_ndim_indexing +from pandas.core.indexers import disallow_ndim_indexing from pandas.core.indexes.frozen import FrozenList from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op @@ -223,7 +214,8 @@ def _maybe_return_indexers(meth: F) -> F: @functools.wraps(meth) def join( self, - other, + other: Index, + *, how: str_t = "left", level=None, return_indexers: bool = False, @@ -242,11 +234,6 @@ def join( return cast(F, join) -def disallow_kwargs(kwargs: dict[str, Any]) -> None: - if kwargs: - raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") - - def _new_Index(cls, d): """ This is called upon unpickling, rather than the default which doesn't @@ -325,13 +312,6 @@ class Index(IndexOpsMixin, PandasObject): Index(['a', 'b', 'c'], dtype='object') """ - # tolist is not actually deprecated, just suppressed in the __dir__ - _hidden_attrs: frozenset[str] = ( - PandasObject._hidden_attrs - | IndexOpsMixin._hidden_attrs - | frozenset(["contains", "set_value"]) - ) - # To hand over control to subclasses _join_precedence = 1 @@ -438,18 +418,8 @@ def __new__( copy: bool = False, name=None, tupleize_cols: bool = True, - **kwargs, ) -> Index: - if kwargs: - warnings.warn( - "Passing keywords other than 'data', 'dtype', 'copy', 'name', " - "'tupleize_cols' is deprecated and will raise TypeError in a " - "future version. Use the specific Index subclass directly instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - from pandas.core.arrays import PandasArray from pandas.core.indexes.range import RangeIndex @@ -457,10 +427,6 @@ def __new__( if dtype is not None: dtype = pandas_dtype(dtype) - if "tz" in kwargs: - tz = kwargs.pop("tz") - validate_tz_from_dtype(dtype, tz) - dtype = tz_to_dtype(tz) if type(data) is PandasArray: # ensure users don't accidentally put a PandasArray in an index, @@ -482,18 +448,17 @@ def __new__( # non-EA dtype indexes have special casting logic, so we punt here klass = cls._dtype_to_subclass(dtype) if klass is not Index: - return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) + return klass(data, dtype=dtype, copy=copy, name=name) ea_cls = dtype.construct_array_type() data = ea_cls._from_sequence(data, dtype=dtype, copy=copy) - disallow_kwargs(kwargs) return Index._simple_new(data, name=name) elif is_ea_or_datetimelike_dtype(data_dtype): data_dtype = cast(DtypeObj, data_dtype) klass = cls._dtype_to_subclass(data_dtype) if klass is not Index: - result = klass(data, copy=copy, name=name, **kwargs) + result = klass(data, copy=copy, name=name) if dtype is not None: return result.astype(dtype, copy=False) return result @@ -501,7 +466,6 @@ def __new__( # GH#45206 data = data.astype(dtype, copy=False) - disallow_kwargs(kwargs) data = extract_array(data, extract_numpy=True) return Index._simple_new(data, name=name) @@ -543,18 +507,14 @@ def __new__( ) dtype = arr.dtype - if kwargs: - return cls(arr, dtype, copy=copy, name=name, **kwargs) - klass = cls._dtype_to_subclass(arr.dtype) arr = klass._ensure_array(arr, dtype, copy) - disallow_kwargs(kwargs) return klass._simple_new(arr, name) elif is_scalar(data): raise cls._raise_scalar_data_error(data) elif hasattr(data, "__array__"): - return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) + return Index(np.asarray(data), dtype=dtype, copy=copy, name=name) else: if tupleize_cols and is_list_like(data): @@ -567,9 +527,7 @@ def __new__( # 10697 from pandas.core.indexes.multi import MultiIndex - return MultiIndex.from_tuples( - data, names=name or kwargs.get("names") - ) + return MultiIndex.from_tuples(data, names=name) # other iterable of some kind subarr = com.asarray_tuplesafe(data, dtype=_dtype_obj) @@ -579,7 +537,7 @@ def __new__( subarr, cast_numeric_deprecated=False ) dtype = subarr.dtype - return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + return Index(subarr, dtype=dtype, copy=copy, name=name) @classmethod def _ensure_array(cls, data, dtype, copy: bool): @@ -618,17 +576,6 @@ def _dtype_to_subclass(cls, dtype: DtypeObj): return PeriodIndex - elif isinstance(dtype, SparseDtype): - warnings.warn( - "In a future version, passing a SparseArray to pd.Index " - "will store that array directly instead of converting to a " - "dense numpy ndarray. To retain the old behavior, use " - "pd.Index(arr.to_numpy()) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - return cls._dtype_to_subclass(dtype.subtype) - return Index if dtype.kind == "M": @@ -641,20 +588,20 @@ def _dtype_to_subclass(cls, dtype: DtypeObj): return TimedeltaIndex - elif is_float_dtype(dtype): + elif dtype.kind == "f": from pandas.core.api import Float64Index return Float64Index - elif is_unsigned_integer_dtype(dtype): + elif dtype.kind == "u": from pandas.core.api import UInt64Index return UInt64Index - elif is_signed_integer_dtype(dtype): + elif dtype.kind == "i": from pandas.core.api import Int64Index return Int64Index - elif dtype == _dtype_obj: + elif dtype.kind == "O": # NB: assuming away MultiIndex return Index @@ -704,9 +651,7 @@ def _with_infer(cls, *args, **kwargs): Constructor that uses the 1.0.x behavior inferring numeric dtypes for ndarray[object] inputs. """ - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning) - result = cls(*args, **kwargs) + result = cls(*args, **kwargs) if result.dtype == _dtype_obj and not result._is_multi: # error: Argument 1 to "maybe_convert_objects" has incompatible type @@ -991,34 +936,19 @@ def dtype(self) -> DtypeObj: return self._data.dtype @final - def ravel(self, order: str_t = "C"): + def ravel(self, order: str_t = "C") -> Index: """ - Return an ndarray of the flattened values of the underlying data. + Return a view on self. Returns ------- - numpy.ndarray - Flattened array. + Index See Also -------- numpy.ndarray.ravel : Return a flattened array. """ - warnings.warn( - "Index.ravel returning ndarray is deprecated; in a future version " - "this will return a view on self.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if needs_i8_conversion(self.dtype): - # Item "ndarray[Any, Any]" of "Union[ExtensionArray, ndarray[Any, Any]]" - # has no attribute "_ndarray" - values = self._data._ndarray # type: ignore[union-attr] - elif is_interval_dtype(self.dtype): - values = np.asarray(self._data) - else: - values = self._get_engine_target() - return values.ravel(order=order) + return self[:] def view(self, cls=None): @@ -1084,11 +1014,6 @@ def astype(self, dtype, copy: bool = True): values = self._data if isinstance(values, ExtensionArray): - if isinstance(dtype, np.dtype) and dtype.kind == "M" and is_unitless(dtype): - # TODO(2.0): remove this special-casing once this is enforced - # in DTA.astype - raise TypeError(f"Cannot cast {type(self).__name__} to dtype") - with rewrite_exception(type(values).__name__, type(self).__name__): new_values = values.astype(dtype, copy=copy) @@ -1107,22 +1032,12 @@ def astype(self, dtype, copy: bool = True): new_values = cls._from_sequence(self, dtype=dtype, copy=copy) else: - try: - if dtype == str: - # GH#38607 - new_values = values.astype(dtype, copy=copy) - else: - # GH#13149 specifically use astype_nansafe instead of astype - new_values = astype_nansafe(values, dtype=dtype, copy=copy) - except IntCastingNaNError: - raise - except (TypeError, ValueError) as err: - if dtype.kind == "u" and "losslessly" in str(err): - # keep the message from _astype_float_to_int_nansafe - raise - raise TypeError( - f"Cannot cast {type(self).__name__} to dtype {dtype}" - ) from err + if dtype == str: + # GH#38607 see test_astype_str_from_bytes + new_values = values.astype(dtype, copy=copy) + else: + # GH#13149 specifically use astype_nansafe instead of astype + new_values = astype_nansafe(values, dtype=dtype, copy=copy) # pass copy=False because any copying will be done in the astype above if self._is_backward_compat_public_numeric_index: @@ -1605,7 +1520,7 @@ def to_frame( index : bool, default True Set the index of the returned DataFrame as the original Index. - name : object, default None + name : object, defaults to index.name The passed name should substitute for the index name (if it has one). @@ -1647,17 +1562,6 @@ def to_frame( """ from pandas import DataFrame - if name is None: - warnings.warn( - "Explicitly passing `name=None` currently preserves the Index's name " - "or uses a default name of 0. This behaviour is deprecated, and in " - "the future `None` will be used as the name of the resulting " - "DataFrame column.", - FutureWarning, - stacklevel=find_stack_level(), - ) - name = lib.no_default - if name is lib.no_default: name = self._get_level_names() result = DataFrame({name: self._values.copy()}) @@ -1699,7 +1603,7 @@ def _validate_names( if names is not None and name is not None: raise TypeError("Can only provide one of `names` and `name`") - elif names is None and name is None: + if names is None and name is None: new_names = deepcopy(self.names) if deep else self.names elif names is not None: if not is_list_like(names): @@ -1803,9 +1707,8 @@ def set_names( ) -> _IndexT | None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) def set_names( - self: _IndexT, names, level=None, inplace: bool = False + self: _IndexT, names, *, level=None, inplace: bool = False ) -> _IndexT | None: """ Set Index or MultiIndex name. @@ -1881,16 +1784,16 @@ def set_names( if level is not None and not isinstance(self, ABCMultiIndex): raise ValueError("Level must be None for non-MultiIndex") - elif level is not None and not is_list_like(level) and is_list_like(names): + if level is not None and not is_list_like(level) and is_list_like(names): raise TypeError("Names must be a string when a single level is provided.") - elif not is_list_like(names) and level is None and self.nlevels > 1: + if not is_list_like(names) and level is None and self.nlevels > 1: raise TypeError("Must pass list-like as `names`.") - elif is_dict_like(names) and not isinstance(self, ABCMultiIndex): + if is_dict_like(names) and not isinstance(self, ABCMultiIndex): raise TypeError("Can only pass dict-like as `names` for MultiIndex.") - elif is_dict_like(names) and level is not None: + if is_dict_like(names) and level is not None: raise TypeError("Can not pass level for dictlike `names`.") if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None: @@ -1999,7 +1902,7 @@ def _validate_index_level(self, level) -> None: "Too many levels: Index has only 1 level, " f"{level} is not a valid level number" ) - elif level > 0: + if level > 0: raise IndexError( f"Too many levels: Index has only 1 level, not {level + 1}" ) @@ -2251,24 +2154,6 @@ def _can_hold_na(self) -> bool: return False return True - @final - @property - def is_monotonic(self) -> bool: - """ - Alias for is_monotonic_increasing. - - .. deprecated:: 1.5.0 - is_monotonic is deprecated and will be removed in a future version. - Use is_monotonic_increasing instead. - """ - warnings.warn( - "is_monotonic is deprecated and will be removed in a future version. " - "Use is_monotonic_increasing instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self.is_monotonic_increasing - @property def is_monotonic_increasing(self) -> bool: """ @@ -2680,20 +2565,6 @@ def _is_all_dates(self) -> bool: return False return is_datetime_array(ensure_object(self._values)) - @cache_readonly - @final - def is_all_dates(self) -> bool: - """ - Whether or not the index values only consist of dates. - """ - warnings.warn( - "Index.is_all_dates is deprecated, will be removed in a future version. " - "check index.inferred_type instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._is_all_dates - @final @cache_readonly def _is_multi(self) -> bool: @@ -2944,8 +2815,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: result = super().unique() return self._shallow_copy(result) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates(self: _IndexT, keep: DropKeep = "first") -> _IndexT: + def drop_duplicates(self: _IndexT, *, keep: DropKeep = "first") -> _IndexT: """ Return Index with duplicate values removed. @@ -3127,10 +2997,9 @@ def _validate_sort_keyword(self, sort): ) @final - def _deprecate_dti_setop(self, other: Index, setop: str_t) -> None: + def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]: """ - Deprecate setop behavior between timezone-aware DatetimeIndexes with - mismatched timezones. + With mismatched timezones, cast both to UTC. """ # Caller is responsibelf or checking # `not is_dtype_equal(self.dtype, other.dtype)` @@ -3141,14 +3010,10 @@ def _deprecate_dti_setop(self, other: Index, setop: str_t) -> None: and other.tz is not None ): # GH#39328, GH#45357 - warnings.warn( - f"In a future version, the {setop} of DatetimeIndex objects " - "with mismatched timezones will cast both to UTC instead of " - "object dtype. To retain the old behavior, " - f"use `index.astype(object).{setop}(other)`", - FutureWarning, - stacklevel=find_stack_level(), - ) + left = self.tz_convert("UTC") + right = other.tz_convert("UTC") + return left, right + return self, other @final def union(self, other, sort=None): @@ -3248,7 +3113,7 @@ def union(self, other, sort=None): "Can only union MultiIndex with MultiIndex or Index of tuples, " "try mi.to_flat_index().union(other) instead." ) - self._deprecate_dti_setop(other, "union") + self, other = self._dti_setop_align_tzs(other, "union") dtype = self._find_common_type_compat(other) left = self.astype(dtype, copy=False) @@ -3385,7 +3250,7 @@ def intersection(self, other, sort: bool = False): other, result_name = self._convert_can_do_setop(other) if not is_dtype_equal(self.dtype, other.dtype): - self._deprecate_dti_setop(other, "intersection") + self, other = self._dti_setop_align_tzs(other, "intersection") if self.equals(other): if self.has_duplicates: @@ -3533,7 +3398,7 @@ def difference(self, other, sort=None): self._assert_can_do_setop(other) other, result_name = self._convert_can_do_setop(other) - # Note: we do NOT call _deprecate_dti_setop here, as there + # Note: we do NOT call _dti_setop_align_tzs here, as there # is no requirement that .difference be commutative, so it does # not cast to object. @@ -3617,7 +3482,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): result_name = result_name_update if not is_dtype_equal(self.dtype, other.dtype): - self._deprecate_dti_setop(other, "symmetric_difference") + self, other = self._dti_setop_align_tzs(other, "symmetric_difference") if not self._should_compare(other): return self.union(other, sort=sort).rename(result_name) @@ -3965,7 +3830,7 @@ def _check_indexing_method( "method='nearest' not implemented yet " "for MultiIndex; see GitHub issue 9365" ) - elif method in ("pad", "backfill"): + if method in ("pad", "backfill"): if tolerance is not None: raise NotImplementedError( "tolerance not implemented yet for MultiIndex" @@ -4241,12 +4106,9 @@ def is_int(v): elif is_positional: if kind == "loc": # GH#16121, GH#24612, GH#31810 - warnings.warn( - "Slicing a positional slice with .loc is not supported, " - "and will raise TypeError in a future version. " + raise TypeError( + "Slicing a positional slice with .loc is not allowed, " "Use .loc with labels or .iloc with positions instead.", - FutureWarning, - stacklevel=find_stack_level(), ) indexer = key else: @@ -4270,8 +4132,7 @@ def _raise_invalid_indexer( ) if reraise is not lib.no_default: raise TypeError(msg) from reraise - else: - raise TypeError(msg) + raise TypeError(msg) # -------------------------------------------------------------------- # Reindex Methods @@ -4402,12 +4263,7 @@ def reindex( if not self.is_unique: # GH#42568 - warnings.warn( - "reindexing with a non-unique Index is deprecated and " - "will raise in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) + raise ValueError("cannot reindex on an axis with duplicate labels") target = self._wrap_reindex_result(target, indexer, preserve_names) return target, indexer @@ -4537,11 +4393,11 @@ def join( ... @final - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "other"]) @_maybe_return_indexers def join( self, other: Index, + *, how: str_t = "left", level: Level = None, return_indexers: bool = False, @@ -5276,7 +5132,7 @@ def __getitem__(self, key): if is_integer(key) or is_float(key): # GH#44051 exclude bool, which would return a 2d ndarray - key = com.cast_scalar_indexer(key, warn_float=True) + key = com.cast_scalar_indexer(key) return getitem(key) if isinstance(key, slice): @@ -5299,15 +5155,7 @@ def __getitem__(self, key): result = getitem(key) # Because we ruled out integer above, we always get an arraylike here if result.ndim > 1: - deprecate_ndim_indexing(result) - if hasattr(result, "_ndarray"): - # i.e. NDArrayBackedExtensionArray - # Unpack to ndarray for MPL compat - # error: Item "ndarray[Any, Any]" of - # "Union[ExtensionArray, ndarray[Any, Any]]" - # has no attribute "_ndarray" - return result._ndarray # type: ignore[union-attr] - return result + disallow_ndim_indexing(result) # NB: Using _constructor._simple_new would break if MultiIndex # didn't override __getitem__ @@ -5852,20 +5700,6 @@ def _should_fallback_to_positional(self) -> bool: """ return not self.holds_integer() - def _get_values_for_loc(self, series: Series, loc, key): - """ - Do a positional lookup on the given Series, returning either a scalar - or a Series. - - Assumes that `series.index is self` - - key is included for MultiIndex compat. - """ - if is_integer(loc): - return series._values[loc] - - return series.iloc[loc] - _index_shared_docs[ "get_indexer_non_unique" ] = """ @@ -6858,6 +6692,7 @@ def _cmp_method(self, other, op): return result + @final def _construct_result(self, result, name): if isinstance(result, tuple): return ( @@ -7093,19 +6928,6 @@ def shape(self) -> Shape: # See GH#27775, GH#27384 for history/reasoning in how this is defined. return (len(self),) - @final - def _deprecated_arg(self, value, name: str_t, methodname: str_t) -> None: - """ - Issue a FutureWarning if the arg/kwarg is not no_default. - """ - if value is not no_default: - warnings.warn( - f"'{name}' argument in {methodname} is deprecated " - "and will be removed in a future version. Do not pass it.", - FutureWarning, - stacklevel=find_stack_level(), - ) - def ensure_index_from_sequences(sequences, names=None) -> Index: """ @@ -7258,14 +7080,6 @@ def maybe_extract_name(name, obj, cls) -> Hashable: return name -_cast_depr_msg = ( - "In a future version, passing an object-dtype arraylike to pd.Index will " - "not infer numeric values to numeric dtype (matching the Series behavior). " - "To retain the old behavior, explicitly pass the desired dtype or use the " - "desired Index subclass" -) - - def _maybe_cast_data_without_dtype( subarr: np.ndarray, cast_numeric_deprecated: bool = True ) -> ArrayLike: @@ -7296,13 +7110,8 @@ def _maybe_cast_data_without_dtype( if not cast_numeric_deprecated: # i.e. we started with a list, not an ndarray[object] return result + return subarr - warnings.warn( - "In a future version, the Index constructor will not infer numeric " - "dtypes when passed object-dtype sequences (matching Series behavior)", - FutureWarning, - stacklevel=find_stack_level(), - ) result = ensure_wrapped_if_datetimelike(result) return result diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 58b533cb576d9..287f94fb6b723 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -4,7 +4,6 @@ Any, Hashable, ) -import warnings import numpy as np @@ -18,7 +17,6 @@ cache_readonly, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -216,18 +214,8 @@ def __new__( name = maybe_extract_name(name, data, cls) - if data is None: - # GH#38944 - warnings.warn( - "Constructing a CategoricalIndex without passing data is " - "deprecated and will raise in a future version. " - "Use CategoricalIndex([], ...) instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - data = [] - if is_scalar(data): + # GH#38944 include None here, which pre-2.0 subbed in [] cls._raise_scalar_data_error(data) data = Categorical( @@ -377,7 +365,6 @@ def __contains__(self, key: Any) -> bool: return contains(self, key, container=self._engine) - # TODO(2.0): remove reindex once non-unique deprecation is enforced def reindex( self, target, method=None, level=None, limit=None, tolerance=None ) -> tuple[Index, npt.NDArray[np.intp] | None]: @@ -404,56 +391,7 @@ def reindex( raise NotImplementedError( "argument limit is not implemented for CategoricalIndex.reindex" ) - - target = ibase.ensure_index(target) - - if self.equals(target): - indexer = None - missing = np.array([], dtype=np.intp) - else: - indexer, missing = self.get_indexer_non_unique(target) - if not self.is_unique: - # GH#42568 - warnings.warn( - "reindexing with a non-unique Index is deprecated and will " - "raise in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - new_target: Index - if len(self) and indexer is not None: - new_target = self.take(indexer) - else: - new_target = target - - # filling in missing if needed - if len(missing): - cats = self.categories.get_indexer(target) - - if not isinstance(target, CategoricalIndex) or (cats == -1).any(): - new_target, indexer, _ = super()._reindex_non_unique(target) - else: - # error: "Index" has no attribute "codes" - codes = new_target.codes.copy() # type: ignore[attr-defined] - codes[indexer == -1] = cats[missing] - cat = self._data._from_backing_data(codes) - new_target = type(self)._simple_new(cat, name=self.name) - - # we always want to return an Index type here - # to be consistent with .reindex for other index types (e.g. they don't - # coerce based on the actual values, only on the dtype) - # unless we had an initial Categorical to begin with - # in which case we are going to conform to the passed Categorical - if is_categorical_dtype(target): - cat = Categorical(new_target, dtype=target.dtype) - new_target = type(self)._simple_new(cat, name=self.name) - else: - # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target - new_target_array = np.asarray(new_target) - new_target = Index._with_infer(new_target_array, name=self.name) - - return new_target, indexer + return super().reindex(target) # -------------------------------------------------------------------- # Indexing Methods diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 667deec23757f..f4eb67611b0e7 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -429,18 +429,6 @@ def _can_range_setop(self, other) -> bool: return False return super()._can_range_setop(other) - def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: - this = self - - if isinstance(other, DatetimeIndex): - if (self.tz is None) ^ (other.tz is None): - raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") - - if not timezones.tz_compare(self.tz, other.tz): - this = self.tz_convert("UTC") - other = other.tz_convert("UTC") - return this, other - # -------------------------------------------------------------------- def _get_time_micros(self) -> npt.NDArray[np.int64]: @@ -1011,7 +999,7 @@ def date_range( "Deprecated argument `closed` cannot be passed" "if argument `inclusive` is not None" ) - elif closed is not lib.no_default: + if closed is not lib.no_default: warnings.warn( "Argument `closed` is deprecated in favor of `inclusive`.", FutureWarning, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 8507280a6cc8d..4a24322d330f3 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -236,6 +236,11 @@ def __new__( _interval_shared_docs["from_breaks"] % { "klass": "IntervalIndex", + "name": textwrap.dedent( + """ + name : str, optional + Name of the resulting IntervalIndex.""" + ), "examples": textwrap.dedent( """\ Examples @@ -266,6 +271,11 @@ def from_breaks( _interval_shared_docs["from_arrays"] % { "klass": "IntervalIndex", + "name": textwrap.dedent( + """ + name : str, optional + Name of the resulting IntervalIndex.""" + ), "examples": textwrap.dedent( """\ Examples @@ -297,6 +307,11 @@ def from_arrays( _interval_shared_docs["from_tuples"] % { "klass": "IntervalIndex", + "name": textwrap.dedent( + """ + name : str, optional + Name of the resulting IntervalIndex.""" + ), "examples": textwrap.dedent( """\ Examples @@ -645,7 +660,7 @@ def get_loc( matches = mask.sum() if matches == 0: raise KeyError(key) - elif matches == 1: + if matches == 1: return mask.argmax() res = lib.maybe_booleans_to_slice(mask.view("u1")) @@ -771,7 +786,7 @@ def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): msg = "label-based slicing with step!=1 is not supported for IntervalIndex" if kind == "loc": raise ValueError(msg) - elif kind == "getitem": + if kind == "getitem": if not is_valid_positional_slice(key): # i.e. this cannot be interpreted as a positional slice raise ValueError(msg) @@ -1060,7 +1075,7 @@ def interval_range( if not _is_valid_endpoint(start): raise ValueError(f"start must be numeric or datetime-like, got {start}") - elif not _is_valid_endpoint(end): + if not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index bb7a48946d4ca..f35f02de533e2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -14,7 +14,6 @@ Sequence, Tuple, cast, - overload, ) import warnings @@ -50,7 +49,6 @@ from pandas.util._decorators import ( Appender, cache_readonly, - deprecate_nonkeyword_arguments, doc, ) from pandas.util._exceptions import find_stack_level @@ -205,7 +203,7 @@ def names_compat(meth: F) -> F: def new_meth(self_or_cls, *args, **kwargs): if "name" in kwargs and "names" in kwargs: raise TypeError("Can only provide one of `names` and `name`") - elif "name" in kwargs: + if "name" in kwargs: kwargs["names"] = kwargs.pop("name") return meth(self_or_cls, *args, **kwargs) @@ -483,7 +481,7 @@ def from_arrays( error_msg = "Input must be a list / sequence of array-likes." if not is_list_like(arrays): raise TypeError(error_msg) - elif is_iterator(arrays): + if is_iterator(arrays): arrays = list(arrays) # Check if elements of array are list-like @@ -554,7 +552,7 @@ def from_tuples( """ if not is_list_like(tuples): raise TypeError("Input must be a list / sequence of tuple-likes.") - elif is_iterator(tuples): + if is_iterator(tuples): tuples = list(tuples) tuples = cast(Collection[Tuple[Hashable, ...]], tuples) @@ -643,7 +641,7 @@ def from_product( if not is_list_like(iterables): raise TypeError("Input must be a list / sequence of iterables.") - elif is_iterator(iterables): + if is_iterator(iterables): iterables = list(iterables) codes, levels = factorize_from_iterables(iterables) @@ -734,23 +732,13 @@ def _values(self) -> np.ndarray: vals = cast("CategoricalIndex", vals) vals = vals._data._internal_get_values() - is_dti = isinstance(vals, ABCDatetimeIndex) - - if is_dti: - # TODO: this can be removed after Timestamp.freq is removed - # The astype(object) below does not remove the freq from - # the underlying Timestamps so we remove it here to match - # the behavior of self._get_level_values - vals = algos.take_nd(vals, codes, fill_value=index._na_value) - if isinstance(vals.dtype, ExtensionDtype) or isinstance( vals, (ABCDatetimeIndex, ABCTimedeltaIndex) ): vals = vals.astype(object) vals = np.array(vals, copy=False) - if not is_dti: - vals = algos.take_nd(vals, codes, fill_value=index._na_value) + vals = algos.take_nd(vals, codes, fill_value=index._na_value) values.append(vals) arr = lib.fast_zip(values) @@ -1500,7 +1488,7 @@ def _get_level_number(self, level) -> int: except ValueError as err: if not is_integer(level): raise KeyError(f"Level {level} not found") from err - elif level < 0: + if level < 0: level += self.nlevels if level < 0: orig_level = level - self.nlevels @@ -1739,17 +1727,6 @@ def to_frame( """ from pandas import DataFrame - if name is None: - warnings.warn( - "Explicitly passing `name=None` currently preserves the Index's name " - "or uses a default name of 0. This behaviour is deprecated, and in " - "the future `None` will be used as the name of the resulting " - "DataFrame column.", - FutureWarning, - stacklevel=find_stack_level(), - ) - name = lib.no_default - if name is not lib.no_default: if not is_list_like(name): raise TypeError("'name' must be a list / sequence of column names.") @@ -2032,7 +2009,7 @@ def __reduce__(self): def __getitem__(self, key): if is_scalar(key): - key = com.cast_scalar_indexer(key, warn_float=True) + key = com.cast_scalar_indexer(key) retval = [] for lev, level_codes in zip(self.levels, self.codes): @@ -2540,26 +2517,6 @@ def _should_fallback_to_positional(self) -> bool: # GH#33355 return self.levels[0]._should_fallback_to_positional - def _get_values_for_loc(self, series: Series, loc, key): - """ - Do a positional lookup on the given Series, returning either a scalar - or a Series. - - Assumes that `series.index is self` - """ - new_values = series._values[loc] - if is_scalar(loc): - return new_values - - if len(new_values) == 1 and not self.nlevels > 1: - # If more than one level left, we can not return a scalar - return new_values[0] - - new_index = self[loc] - new_index = maybe_droplevels(new_index, key) - new_ser = series._constructor(new_values, index=new_index, name=series.name) - return new_ser.__finalize__(series) - def _get_indexer_strict( self, key, axis_name: str ) -> tuple[Index, npt.NDArray[np.intp]]: @@ -3070,10 +3027,10 @@ def maybe_mi_droplevels(indexer, levels): ): # everything continue - else: - # e.g. test_xs_IndexSlice_argument_not_implemented - k_index = np.zeros(len(self), dtype=bool) - k_index[loc_level] = True + + # e.g. test_xs_IndexSlice_argument_not_implemented + k_index = np.zeros(len(self), dtype=bool) + k_index[loc_level] = True else: k_index = loc_level @@ -3611,7 +3568,6 @@ def _union(self, other, sort) -> MultiIndex: RuntimeWarning, stacklevel=find_stack_level(), ) - pass return result def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: @@ -3686,12 +3642,12 @@ def astype(self, dtype, copy: bool = True): if is_categorical_dtype(dtype): msg = "> 1 ndim Categorical are not supported at this time" raise NotImplementedError(msg) - elif not is_object_dtype(dtype): + if not is_object_dtype(dtype): raise TypeError( "Setting a MultiIndex dtype to anything other than object " "is not supported" ) - elif copy is True: + if copy is True: return self._view() return self @@ -3772,30 +3728,9 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: return np.zeros(len(levs), dtype=np.bool_) return levs.isin(values) - @overload - def set_names( - self, names, *, level=..., inplace: Literal[False] = ... - ) -> MultiIndex: - ... - - @overload - def set_names(self, names, *, level=..., inplace: Literal[True]) -> None: - ... - - @overload - def set_names(self, names, *, level=..., inplace: bool = ...) -> MultiIndex | None: - ... - - # error: Signature of "set_names" incompatible with supertype "Index" - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "names"]) - def set_names( # type: ignore[override] - self, names, level=None, inplace: bool = False - ) -> MultiIndex | None: - return super().set_names(names=names, level=level, inplace=inplace) - # error: Incompatible types in assignment (expression has type overloaded function, # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]") - rename = set_names # type: ignore[assignment] + rename = Index.set_names # type: ignore[assignment] # --------------------------------------------------------------- # Arithmetic/Numeric Methods - Disabled diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 648dca5d1b58b..816a1752c5bf0 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -254,11 +254,11 @@ def _convert_tolerance(self, tolerance, target): f"tolerance argument for {type(self).__name__} must contain " "numeric elements if it is list type" ) - else: - raise ValueError( - f"tolerance argument for {type(self).__name__} must be numeric " - f"if it is a scalar: {repr(tolerance)}" - ) + + raise ValueError( + f"tolerance argument for {type(self).__name__} must be numeric " + f"if it is a scalar: {repr(tolerance)}" + ) return tolerance @classmethod diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 70b0ee4b1d354..b6fd298a2d41a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -687,7 +687,7 @@ def _get_setitem_indexer(self, key): if isinstance(key, tuple): for x in key: - check_deprecated_indexers(x) + check_dict_or_set_indexers(x) if self.axis is not None: key = _tupleize_axis_indexer(self.ndim, self.axis, key) @@ -813,7 +813,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: - check_deprecated_indexers(key) + check_dict_or_set_indexers(key) if isinstance(key, tuple): key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -1004,7 +1004,7 @@ def _getitem_nested_tuple(self, tup: tuple): # we should be able to match up the dimensionality here for key in tup: - check_deprecated_indexers(key) + check_dict_or_set_indexers(key) # we have too many indexers for our dim, but have at least 1 # multi-index dimension, try to see if we have something like @@ -1062,7 +1062,7 @@ def _convert_to_indexer(self, key, axis: AxisInt): @final def __getitem__(self, key): - check_deprecated_indexers(key) + check_dict_or_set_indexers(key) if type(key) is tuple: key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -1499,12 +1499,9 @@ def _has_valid_setitem_indexer(self, indexer) -> bool: raise IndexError("iloc cannot enlarge its target object") if isinstance(indexer, ABCDataFrame): - warnings.warn( - "DataFrame indexer for .iloc is deprecated and will be removed in " - "a future version.\n" - "consider using .loc with a DataFrame indexer for automatic alignment.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + "DataFrame indexer for .iloc is not supported. " + "Consider using .loc with a DataFrame indexer for automatic alignment.", ) if not isinstance(indexer, tuple): @@ -2493,40 +2490,6 @@ def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple: return tuple(new_key) -def convert_to_index_sliceable(obj: DataFrame, key): - """ - If we are index sliceable, then return my slicer, otherwise return None. - """ - idx = obj.index - if isinstance(key, slice): - return idx._convert_slice_indexer(key, kind="getitem", is_frame=True) - - elif isinstance(key, str): - - # we are an actual column - if key in obj.columns: - return None - - # We might have a datetimelike string that we can translate to a - # slice here via partial string indexing - if idx._supports_partial_string_indexing: - try: - res = idx._get_string_slice(str(key)) - warnings.warn( - "Indexing a DataFrame with a datetimelike index using a single " - "string to slice the rows, like `frame[string]`, is deprecated " - "and will be removed in a future version. Use `frame.loc[string]` " - "instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return res - except (KeyError, ValueError, NotImplementedError): - return None - - return None - - def check_bool_indexer(index: Index, key) -> np.ndarray: """ Check if key is a valid boolean indexer for an object with such index and @@ -2661,27 +2624,24 @@ def need_slice(obj: slice) -> bool: ) -def check_deprecated_indexers(key) -> None: - """Checks if the key is a deprecated indexer.""" +def check_dict_or_set_indexers(key) -> None: + """ + Check if the indexer is or contains a dict or set, which is no longer allowed. + """ if ( isinstance(key, set) or isinstance(key, tuple) and any(isinstance(x, set) for x in key) ): - warnings.warn( - "Passing a set as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + "Passing a set as an indexer is not supported. Use a list instead." ) + if ( isinstance(key, dict) or isinstance(key, tuple) and any(isinstance(x, dict) for x in key) ): - warnings.warn( - "Passing a dict as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + "Passing a dict as an indexer is not supported. Use a list instead." ) diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py index 3ab87d9a60399..4ec905eef8646 100644 --- a/pandas/core/interchange/dataframe_protocol.py +++ b/pandas/core/interchange/dataframe_protocol.py @@ -134,7 +134,6 @@ def bufsize(self) -> int: """ Buffer size in bytes. """ - pass @property @abstractmethod @@ -142,7 +141,6 @@ def ptr(self) -> int: """ Pointer to start of the buffer as an integer. """ - pass @abstractmethod def __dlpack__(self): @@ -166,7 +164,6 @@ def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: Uses device type codes matching DLPack. Note: must be implemented even if ``__dlpack__`` is not. """ - pass class Column(ABC): @@ -222,7 +219,6 @@ def size(self) -> int: Corresponds to DataFrame.num_rows() if column is a single chunk; equal to size of this current chunk otherwise. """ - pass @property @abstractmethod @@ -234,7 +230,6 @@ def offset(self) -> int: equal size M (only the last chunk may be shorter), ``offset = n * M``, ``n = 0 .. N-1``. """ - pass @property @abstractmethod @@ -266,7 +261,6 @@ def dtype(self) -> tuple[DtypeKind, int, str, str]: - Data types not included: complex, Arrow-style null, binary, decimal, and nested (list, struct, map, union) dtypes. """ - pass @property @abstractmethod @@ -289,7 +283,6 @@ def describe_categorical(self) -> CategoricalDescription: TBD: are there any other in-memory representations that are needed? """ - pass @property @abstractmethod @@ -302,7 +295,6 @@ def describe_null(self) -> tuple[ColumnNullType, Any]: mask or a byte mask, the value (0 or 1) indicating a missing value. None otherwise. """ - pass @property @abstractmethod @@ -312,7 +304,6 @@ def null_count(self) -> int | None: Note: Arrow uses -1 to indicate "unknown", but None seems cleaner. """ - pass @property @abstractmethod @@ -320,14 +311,12 @@ def metadata(self) -> dict[str, Any]: """ The metadata for the column. See `DataFrame.metadata` for more details. """ - pass @abstractmethod def num_chunks(self) -> int: """ Return the number of chunks the column consists of. """ - pass @abstractmethod def get_chunks(self, n_chunks: int | None = None) -> Iterable[Column]: @@ -336,7 +325,6 @@ def get_chunks(self, n_chunks: int | None = None) -> Iterable[Column]: See `DataFrame.get_chunks` for details on ``n_chunks``. """ - pass @abstractmethod def get_buffers(self) -> ColumnBuffers: @@ -360,7 +348,6 @@ def get_buffers(self) -> ColumnBuffers: if the data buffer does not have an associated offsets buffer. """ - pass # def get_children(self) -> Iterable[Column]: @@ -391,7 +378,6 @@ class DataFrame(ABC): @abstractmethod def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): """Construct a new interchange object, potentially changing the parameters.""" - pass @property @abstractmethod @@ -405,14 +391,12 @@ def metadata(self) -> dict[str, Any]: entries, please add name the keys with the name of the library followed by a period and the desired name, e.g, ``pandas.indexcol``. """ - pass @abstractmethod def num_columns(self) -> int: """ Return the number of columns in the DataFrame. """ - pass @abstractmethod def num_rows(self) -> int | None: @@ -422,56 +406,48 @@ def num_rows(self) -> int | None: """ Return the number of rows in the DataFrame, if available. """ - pass @abstractmethod def num_chunks(self) -> int: """ Return the number of chunks the DataFrame consists of. """ - pass @abstractmethod def column_names(self) -> Iterable[str]: """ Return an iterator yielding the column names. """ - pass @abstractmethod def get_column(self, i: int) -> Column: """ Return the column at the indicated position. """ - pass @abstractmethod def get_column_by_name(self, name: str) -> Column: """ Return the column whose name is the indicated name. """ - pass @abstractmethod def get_columns(self) -> Iterable[Column]: """ Return an iterator yielding the columns. """ - pass @abstractmethod def select_columns(self, indices: Sequence[int]) -> DataFrame: """ Create a new DataFrame by selecting a subset of columns by index. """ - pass @abstractmethod def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: """ Create a new DataFrame by selecting a subset of columns by name. """ - pass @abstractmethod def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]: @@ -483,4 +459,3 @@ def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]: ``self.num_chunks()``, meaning the producer must subdivide each chunk before yielding it. """ - pass diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index ea69b567611e4..0797e62de7a9f 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -38,22 +38,3 @@ # this is preserved here for downstream compatibility (GH-33892) "create_block_manager_from_blocks", ] - - -def __getattr__(name: str): - import warnings - - from pandas.util._exceptions import find_stack_level - - if name == "CategoricalBlock": - warnings.warn( - "CategoricalBlock is deprecated and will be removed in a future version. " - "Use ExtensionBlock instead.", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - from pandas.core.internals.blocks import CategoricalBlock - - return CategoricalBlock - - raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 4aa16257b0802..f6e50d658a580 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -52,7 +52,6 @@ ABCDataFrame, ABCSeries, ) -from pandas.core.dtypes.inference import is_inferred_bool_dtype from pandas.core.dtypes.missing import ( array_equals, isna, @@ -488,7 +487,7 @@ def get_bool_data(self: T, copy: bool = False) -> T: copy : bool, default False Whether to copy the blocks """ - return self._get_data_subset(is_inferred_bool_dtype) + return self._get_data_subset(lambda x: x.dtype == np.dtype(bool)) def get_numeric_data(self: T, copy: bool = False) -> T: """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 83c1ca0084724..66071ea4edf52 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -11,7 +11,6 @@ cast, final, ) -import warnings import numpy as np @@ -36,7 +35,6 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.astype import astype_array_safe @@ -59,7 +57,6 @@ is_string_dtype, ) from pandas.core.dtypes.dtypes import ( - CategoricalDtype, ExtensionDtype, PandasDtype, PeriodDtype, @@ -70,13 +67,13 @@ ABCPandasArray, ABCSeries, ) -from pandas.core.dtypes.inference import is_inferred_bool_dtype from pandas.core.dtypes.missing import ( is_valid_na_for_dtype, isna, na_value_for_dtype, ) +from pandas.core import missing import pandas.core.algorithms as algos from pandas.core.array_algos.putmask import ( extract_bool_array, @@ -104,13 +101,12 @@ from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject import pandas.core.common as com -import pandas.core.computation.expressions as expressions +from pandas.core.computation import expressions from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, ) from pandas.core.indexers import check_setitem_lengths -import pandas.core.missing as missing if TYPE_CHECKING: from pandas import ( @@ -176,25 +172,13 @@ def _can_hold_na(self) -> bool: return dtype.kind not in ["b", "i", "u"] return dtype._can_hold_na - @final - @cache_readonly - def is_categorical(self) -> bool: - warnings.warn( - "Block.is_categorical is deprecated and will be removed in a " - "future version. Use isinstance(block.values, Categorical) " - "instead. See https://github.com/pandas-dev/pandas/issues/40226", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - return isinstance(self.values, Categorical) - @final @property def is_bool(self) -> bool: """ We can be bool if a) we are bool dtype or b) object dtype with bool objects. """ - return is_inferred_bool_dtype(self.values) + return self.values.dtype == np.dtype(bool) @final def external_values(self): @@ -241,24 +225,11 @@ def make_block_same_class( self, values, placement: BlockPlacement | None = None ) -> Block: """Wrap given values in a block of same type as self.""" + # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet + # relied on it, as of 2.0 the caller is responsible for this. if placement is None: placement = self._mgr_locs - if values.dtype.kind in ["m", "M"]: - - new_values = ensure_wrapped_if_datetimelike(values) - if new_values is not values: - # TODO(2.0): remove once fastparquet has stopped relying on it - warnings.warn( - "In a future version, Block.make_block_same_class will " - "assume that datetime64 and timedelta64 ndarrays have " - "already been cast to DatetimeArray and TimedeltaArray, " - "respectively.", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - values = new_values - # We assume maybe_coerce_values has already been called return type(self)(values, placement=placement, ndim=self.ndim) @@ -572,7 +543,6 @@ def replace( # Note: the checks we do in NDFrame.replace ensure we never get # here with listlike to_replace or value, as those cases # go through replace_list - values = self.values if isinstance(values, Categorical): @@ -611,7 +581,10 @@ def replace( return blocks elif self.ndim == 1 or self.shape[0] == 1: - blk = self.coerce_to_target_dtype(value) + if value is None: + blk = self.astype(np.dtype(object)) + else: + blk = self.coerce_to_target_dtype(value) return blk.replace( to_replace=to_replace, value=value, @@ -1570,35 +1543,6 @@ def putmask(self, mask, new) -> list[Block]: return [self] - def fillna( - self, value, limit: int | None = None, inplace: bool = False, downcast=None - ) -> list[Block]: - # Caller is responsible for validating limit; if int it is strictly positive - - if self.dtype.kind == "m": - try: - res_values = self.values.fillna(value, limit=limit) - except (ValueError, TypeError): - # GH#45746 - warnings.warn( - "The behavior of fillna with timedelta64[ns] dtype and " - f"an incompatible value ({type(value)}) is deprecated. " - "In a future version, this will cast to a common dtype " - "(usually object) instead of raising, matching the " - "behavior of other dtypes.", - FutureWarning, - stacklevel=find_stack_level(), - ) - raise - else: - res_blk = self.make_block(res_values) - return [res_blk] - - # TODO: since this now dispatches to super, which in turn dispatches - # to putmask, it may *actually* respect 'inplace=True'. If so, add - # tests for this. - return super().fillna(value, limit=limit, inplace=inplace, downcast=downcast) - def delete(self, loc) -> Block: # This will be unnecessary if/when __array_function__ is implemented values = self.values.delete(loc) @@ -1648,7 +1592,7 @@ class ExtensionBlock(libinternals.Block, EABackedBlock): Notes ----- This holds all 3rd-party extension array types. It's also the immediate - parent class for our internal extension types' blocks, CategoricalBlock. + parent class for our internal extension types' blocks. ExtensionArrays are limited to 1-D. """ @@ -1680,7 +1624,7 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]): col, loc = i if not com.is_null_slice(col) and col != 0: raise IndexError(f"{self} only contains one item") - elif isinstance(col, slice): + if isinstance(col, slice): # the is_null_slice check above assures that col is slice(None) # so what we want is a view on all our columns and row loc if loc < 0: @@ -2065,17 +2009,6 @@ def convert( return [self.make_block(res_values)] -class CategoricalBlock(ExtensionBlock): - # this Block type is kept for backwards-compatibility - __slots__ = () - - # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0, - # so this cannot be cached - @property - def dtype(self) -> DtypeObj: - return self.values.dtype - - # ----------------------------------------------------------------- # Constructor Helpers @@ -2131,8 +2064,6 @@ def get_block_type(dtype: DtypeObj): if isinstance(dtype, SparseDtype): # Need this first(ish) so that Sparse[datetime] is sparse cls = ExtensionBlock - elif isinstance(dtype, CategoricalDtype): - cls = CategoricalBlock elif vtype is Timestamp: cls = DatetimeTZBlock elif isinstance(dtype, PeriodDtype): @@ -2200,7 +2131,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: f"values.ndim > ndim [{values.ndim} > {ndim}]" ) - elif not is_1d_only_ea_dtype(values.dtype): + if not is_1d_only_ea_dtype(values.dtype): # TODO(EA2D): special case not needed with 2D EAs if values.ndim != ndim: raise ValueError( @@ -2373,7 +2304,7 @@ def external_values(values: ArrayLike) -> ArrayLike: elif isinstance(values, (DatetimeArray, TimedeltaArray)): # NB: for datetime64tz this is different from np.asarray(values), since # that returns an object-dtype ndarray of Timestamps. - # Avoid FutureWarning in .astype in casting from dt64tz to dt64 + # Avoid raising in .astype in casting from dt64tz to dt64 return values._data else: return values diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index c8ad7dd328edf..0592db8ad608d 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -24,6 +24,7 @@ ) from pandas.util._decorators import cache_readonly +from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( ensure_dtype_can_hold_na, find_common_type, @@ -34,10 +35,7 @@ is_scalar, needs_i8_conversion, ) -from pandas.core.dtypes.concat import ( - cast_to_common_type, - concat_compat, -) +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, ExtensionDtype, @@ -153,7 +151,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: to_concat = [ arr.to_array(target_dtype) if isinstance(arr, NullArrayProxy) - else cast_to_common_type(arr, target_dtype) + else astype_array(arr, target_dtype, copy=False) for arr in to_concat ] diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c1d0ab730fe7e..f7c8a0b91c3f4 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -15,13 +15,14 @@ import warnings import numpy as np -import numpy.ma as ma +from numpy import ma from pandas._libs import lib from pandas._typing import ( ArrayLike, DtypeObj, Manager, + npt, ) from pandas.util._exceptions import find_stack_level @@ -43,6 +44,7 @@ is_named_tuple, is_object_dtype, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -330,14 +332,11 @@ def ndarray_to_mgr( if dtype is not None and not is_dtype_equal(values.dtype, dtype): # GH#40110 see similar check inside sanitize_array - rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f") - values = sanitize_array( values, None, dtype=dtype, copy=copy_on_sanitize, - raise_cast_failure=rcf, allow_2d=True, ) @@ -602,7 +601,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: else: if isinstance(val, dict): # GH#41785 this _should_ be equivalent to (but faster than) - # val = create_series_with_explicit_dtype(val, index=index)._values + # val = Series(val, index=index)._values if oindex is None: oindex = index.astype("O") @@ -614,9 +613,7 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: val = dict(val) val = lib.fast_multiget(val, oindex._values, default=np.nan) - val = sanitize_array( - val, index, dtype=dtype, copy=False, raise_cast_failure=False - ) + val = sanitize_array(val, index, dtype=dtype, copy=False) com.require_length_match(val, index) homogenized.append(val) @@ -655,7 +652,7 @@ def _extract_index(data) -> Index: if not indexes and not raw_lengths: raise ValueError("If using all scalar values, you must pass an index") - elif have_series: + if have_series: index = union_indexes(indexes) elif have_dicts: index = union_indexes(indexes, sort=False) @@ -1018,7 +1015,7 @@ def _validate_or_indexify_columns( f"{len(columns)} columns passed, passed data had " f"{len(content)} columns" ) - elif is_mi_list: + if is_mi_list: # check if nested list column, length of each sub-list should be equal if len({len(col) for col in columns}) > 1: @@ -1027,7 +1024,7 @@ def _validate_or_indexify_columns( ) # if columns is not empty and length of sublist is not equal to content - elif columns and len(columns[0]) != len(content): + if columns and len(columns[0]) != len(content): raise ValueError( f"{len(columns[0])} columns passed, passed data had " f"{len(content)} columns" @@ -1036,7 +1033,7 @@ def _validate_or_indexify_columns( def _convert_object_array( - content: list[np.ndarray], dtype: DtypeObj | None + content: list[npt.NDArray[np.object_]], dtype: DtypeObj | None ) -> list[ArrayLike]: """ Internal function to convert object array. @@ -1054,7 +1051,16 @@ def _convert_object_array( def convert(arr): if dtype != np.dtype("O"): arr = lib.maybe_convert_objects(arr) - arr = maybe_cast_to_datetime(arr, dtype) + + if isinstance(dtype, ExtensionDtype): + # TODO: test(s) that get here + # TODO: try to de-duplicate this convert function with + # core.construction functions + cls = dtype.construct_array_type() + arr = cls._from_sequence(arr, dtype=dtype, copy=False) + else: + arr = maybe_cast_to_datetime(arr, dtype) + return arr arrays = [convert(arr) for arr in content] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f21c02a7823ae..91bb3a128ae27 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -57,6 +57,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.sparse import SparseDtype +import pandas.core.common as com from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, @@ -74,7 +75,6 @@ ) from pandas.core.internals.blocks import ( Block, - DatetimeTZBlock, NumpyBlock, ensure_block_shape, extend_blocks, @@ -148,6 +148,7 @@ class BaseBlockManager(DataManager): blocks: tuple[Block, ...] axes: list[Index] refs: list[weakref.ref | None] | None + parent: object @property def ndim(self) -> int: @@ -165,6 +166,7 @@ def from_blocks( blocks: list[Block], axes: list[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, ) -> T: raise NotImplementedError @@ -264,6 +266,8 @@ def _clear_reference_block(self, blkno: int) -> None: """ if self.refs is not None: self.refs[blkno] = None + if com.all_none(*self.refs): + self.parent = None def get_dtypes(self): dtypes = np.array([blk.dtype for blk in self.blocks]) @@ -605,7 +609,9 @@ def _combine( axes[-1] = index axes[0] = self.items.take(indexer) - return type(self).from_blocks(new_blocks, axes, new_refs) + return type(self).from_blocks( + new_blocks, axes, new_refs, parent=None if copy else self + ) @property def nblocks(self) -> int: @@ -648,11 +654,14 @@ def copy_func(ax): new_refs: list[weakref.ref | None] | None if deep: new_refs = None + parent = None else: new_refs = [weakref.ref(blk) for blk in self.blocks] + parent = self res.axes = new_axes res.refs = new_refs + res.parent = parent if self.ndim > 1: # Avoid needing to re-compute these @@ -744,6 +753,7 @@ def reindex_indexer( only_slice=only_slice, use_na_proxy=use_na_proxy, ) + parent = None if com.all_none(*new_refs) else self else: new_blocks = [ blk.take_nd( @@ -756,11 +766,12 @@ def reindex_indexer( for blk in self.blocks ] new_refs = None + parent = None new_axes = list(self.axes) new_axes[axis] = new_axis - new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs) + new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs, parent=parent) if axis == 1: # We can avoid the need to rebuild these new_mgr._blknos = self.blknos.copy() @@ -995,6 +1006,7 @@ def __init__( blocks: Sequence[Block], axes: Sequence[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, verify_integrity: bool = True, ) -> None: @@ -1008,27 +1020,9 @@ def __init__( f"Number of Block dimensions ({block.ndim}) must equal " f"number of axes ({self.ndim})" ) - if isinstance(block, DatetimeTZBlock) and block.values.ndim == 1: - # TODO(2.0): remove once fastparquet no longer needs this - warnings.warn( - "In a future version, the BlockManager constructor " - "will assume that a DatetimeTZBlock with block.ndim==2 " - "has block.values.ndim == 2.", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray]", variable has type - # "DatetimeArray") - block.values = ensure_block_shape( # type: ignore[assignment] - block.values, self.ndim - ) - try: - block._cache.clear() - except AttributeError: - # _cache not initialized - pass + # As of 2.0, the caller is responsible for ensuring that + # DatetimeTZBlock with block.ndim == 2 has block.values.ndim ==2; + # previously there was a special check for fastparquet compat. self._verify_integrity() @@ -1059,11 +1053,13 @@ def from_blocks( blocks: list[Block], axes: list[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, ) -> BlockManager: """ Constructor for BlockManager and SingleBlockManager with same signature. """ - return cls(blocks, axes, refs, verify_integrity=False) + parent = parent if _using_copy_on_write() else None + return cls(blocks, axes, refs, parent, verify_integrity=False) # ---------------------------------------------------------------- # Indexing @@ -1085,7 +1081,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: block = new_block(result, placement=slice(0, len(result)), ndim=1) # in the case of a single block, the new block is a view ref = weakref.ref(self.blocks[0]) - return SingleBlockManager(block, self.axes[0], [ref]) + return SingleBlockManager(block, self.axes[0], [ref], parent=self) dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) @@ -1119,7 +1115,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: block = new_block(result, placement=slice(0, len(result)), ndim=1) return SingleBlockManager(block, self.axes[0]) - def iget(self, i: int) -> SingleBlockManager: + def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: """ Return the data as a SingleBlockManager. """ @@ -1129,7 +1125,9 @@ def iget(self, i: int) -> SingleBlockManager: # shortcut for select a single-dim from a 2-dim BM bp = BlockPlacement(slice(0, len(values))) nb = type(block)(values, placement=bp, ndim=1) - return SingleBlockManager(nb, self.axes[1], [weakref.ref(block)]) + ref = weakref.ref(block) if track_ref else None + parent = self if track_ref else None + return SingleBlockManager(nb, self.axes[1], [ref], parent) def iget_values(self, i: int) -> ArrayLike: """ @@ -1371,7 +1369,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None self.blocks = tuple(blocks) self._clear_reference_block(blkno) - col_mgr = self.iget(loc) + # this manager is only created temporarily to mutate the values in place + # so don't track references, otherwise the `setitem` would perform CoW again + col_mgr = self.iget(loc, track_ref=False) new_mgr = col_mgr.setitem((idx,), value) self.iset(loc, new_mgr._block.values, inplace=True) @@ -1469,7 +1469,9 @@ def idelete(self, indexer) -> BlockManager: nbs, new_refs = self._slice_take_blocks_ax0(taker, only_slice=True) new_columns = self.items[~is_deleted] axes = [new_columns, self.axes[1]] - return type(self)(tuple(nbs), axes, new_refs, verify_integrity=False) + # TODO this might not be needed (can a delete ever be done in chained manner?) + parent = None if com.all_none(*new_refs) else self + return type(self)(tuple(nbs), axes, new_refs, parent, verify_integrity=False) # ---------------------------------------------------------------- # Block-wise Operation @@ -1875,24 +1877,17 @@ def __init__( block: Block, axis: Index, refs: list[weakref.ref | None] | None = None, + parent: object = None, verify_integrity: bool = False, - fastpath=lib.no_default, ) -> None: # Assertions disabled for performance # assert isinstance(block, Block), type(block) # assert isinstance(axis, Index), type(axis) - if fastpath is not lib.no_default: - warnings.warn( - "The `fastpath` keyword is deprecated and will be removed " - "in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - self.axes = [axis] self.blocks = (block,) self.refs = refs + self.parent = parent if _using_copy_on_write() else None @classmethod def from_blocks( @@ -1900,6 +1895,7 @@ def from_blocks( blocks: list[Block], axes: list[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, ) -> SingleBlockManager: """ Constructor for BlockManager and SingleBlockManager with same signature. @@ -1908,7 +1904,7 @@ def from_blocks( assert len(axes) == 1 if refs is not None: assert len(refs) == 1 - return cls(blocks[0], axes[0], refs, verify_integrity=False) + return cls(blocks[0], axes[0], refs, parent, verify_integrity=False) @classmethod def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager: @@ -1928,7 +1924,10 @@ def to_2d_mgr(self, columns: Index) -> BlockManager: new_blk = type(blk)(arr, placement=bp, ndim=2) axes = [columns, self.axes[0]] refs: list[weakref.ref | None] = [weakref.ref(blk)] - return BlockManager([new_blk], axes=axes, refs=refs, verify_integrity=False) + parent = self if _using_copy_on_write() else None + return BlockManager( + [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False + ) def _has_no_reference(self, i: int = 0) -> bool: """ @@ -2010,7 +2009,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana new_idx = self.index[indexer] # TODO(CoW) in theory only need to track reference if new_array is a view ref = weakref.ref(blk) - return type(self)(block, new_idx, [ref]) + return type(self)(block, new_idx, [ref], parent=self) def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager: # Assertion disabled for performance @@ -2023,7 +2022,9 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager: bp = BlockPlacement(slice(0, len(array))) block = type(blk)(array, placement=bp, ndim=1) new_index = self.index._getitem_slice(slobj) - return type(self)(block, new_index, [weakref.ref(blk)]) + # TODO this method is only used in groupby SeriesSplitter at the moment, + # so passing refs / parent is not yet covered by the tests + return type(self)(block, new_index, [weakref.ref(blk)], parent=self) @property def index(self) -> Index: @@ -2070,6 +2071,7 @@ def setitem_inplace(self, indexer, value) -> None: if _using_copy_on_write() and not self._has_no_reference(0): self.blocks = (self._block.copy(),) self.refs = None + self.parent = None self._cache.clear() super().setitem_inplace(indexer, value) @@ -2086,6 +2088,7 @@ def idelete(self, indexer) -> SingleBlockManager: self._cache.clear() # clear reference since delete always results in a new array self.refs = None + self.parent = None return self def fast_xs(self, loc): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 6ad8403c62720..0d058ead9d22c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -258,7 +258,6 @@ def interpolate_array_2d( fill_value=fill_value, **kwargs, ) - return def _interpolate_2d_with_fill( @@ -341,7 +340,6 @@ def func(yvalues: np.ndarray) -> None: # Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]], # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[]]]]]]]]" np.apply_along_axis(func, axis, data) # type: ignore[arg-type] - return def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: @@ -762,8 +760,6 @@ def _interpolate_with_limit_area( values[invalid] = np.nan - return - def interpolate_2d( values: np.ndarray, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 326e6c4251152..50ebf5c2032be 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -519,12 +519,12 @@ def nanany( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, 2]) >>> nanops.nanany(s) True - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([np.nan]) >>> nanops.nanany(s) False @@ -565,12 +565,12 @@ def nanall( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, 2, np.nan]) >>> nanops.nanall(s) True - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, 0]) >>> nanops.nanall(s) False @@ -616,7 +616,7 @@ def nansum( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, 2, np.nan]) >>> nanops.nansum(s) 3.0 @@ -684,7 +684,7 @@ def nanmean( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, 2, np.nan]) >>> nanops.nanmean(s) 1.5 @@ -740,7 +740,7 @@ def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask= Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 2]) >>> nanops.nanmedian(s) 2.0 @@ -901,7 +901,7 @@ def nanstd( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 3]) >>> nanops.nanstd(s) 1.0 @@ -948,7 +948,7 @@ def nanvar( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 3]) >>> nanops.nanvar(s) 1.0 @@ -1023,7 +1023,7 @@ def nansem( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 2, 3]) >>> nanops.nansem(s) 0.5773502691896258 @@ -1100,7 +1100,7 @@ def nanargmax( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> arr = np.array([1, 2, 3, np.nan, 4]) >>> nanops.nanargmax(arr) 4 @@ -1146,7 +1146,7 @@ def nanargmin( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> arr = np.array([1, 2, 3, np.nan, 4]) >>> nanops.nanargmin(arr) 0 @@ -1200,7 +1200,7 @@ def nanskew( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 1, 2]) >>> nanops.nanskew(s) 1.7320508075688787 @@ -1288,7 +1288,7 @@ def nankurt( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, np.nan, 1, 3, 2]) >>> nanops.nankurt(s) -1.2892561983471076 @@ -1380,7 +1380,7 @@ def nanprod( Examples -------- - >>> import pandas.core.nanops as nanops + >>> from pandas.core import nanops >>> s = pd.Series([1, 2, 3, np.nan]) >>> nanops.nanprod(s) 6.0 diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 6a1c586d90b6e..bc05e9a3d7c3f 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -47,7 +47,7 @@ notna, ) -import pandas.core.computation.expressions as expressions +from pandas.core.computation import expressions from pandas.core.construction import ensure_wrapped_if_datetimelike from pandas.core.ops import ( missing, @@ -97,8 +97,7 @@ def _masked_arith_op(x: np.ndarray, y, op): if len(x) != len(y): raise ValueError(x.shape, y.shape) - else: - ymask = notna(y) + ymask = notna(y) # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex # we would get int64 dtype, see GH#19956 diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9f9fdef089353..f0ffd694ff953 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -46,7 +46,6 @@ from pandas.util._decorators import ( Appender, Substitution, - deprecate_nonkeyword_arguments, doc, ) @@ -90,7 +89,6 @@ is_superperiod, ) from pandas.tseries.offsets import ( - DateOffset, Day, Nano, Tick, @@ -140,7 +138,6 @@ class Resampler(BaseGroupBy, PandasObject): "closed", "label", "convention", - "loffset", "kind", "origin", "offset", @@ -359,7 +356,6 @@ def aggregate(self, func=None, *args, **kwargs): how = func result = self._groupby_and_aggregate(how, *args, **kwargs) - result = self._apply_loffset(result) return result agg = aggregate @@ -476,38 +472,8 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): # try to evaluate result = grouped.apply(how, *args, **kwargs) - result = self._apply_loffset(result) return self._wrap_result(result) - def _apply_loffset(self, result): - """ - If loffset is set, offset the result index. - - This is NOT an idempotent routine, it will be applied - exactly once to the result. - - Parameters - ---------- - result : Series or DataFrame - the result of resample - """ - # error: Cannot determine type of 'loffset' - needs_offset = ( - isinstance( - self.loffset, # type: ignore[has-type] - (DateOffset, timedelta, np.timedelta64), - ) - and isinstance(result.index, DatetimeIndex) - and len(result.index) > 0 - ) - - if needs_offset: - # error: Cannot determine type of 'loffset' - result.index = result.index + self.loffset # type: ignore[has-type] - - self.loffset = None - return result - def _get_resampler_for_grouping(self, groupby, key=None): """ Return the correct class for resampling with groupby. @@ -881,11 +847,11 @@ def fillna(self, method, limit=None): """ return self._upsample(method, limit=limit) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) @doc(NDFrame.interpolate, **_shared_docs_kwargs) def interpolate( self, method: QuantileInterpolation = "linear", + *, axis: Axis = 0, limit=None, inplace: bool = False, @@ -1296,7 +1262,6 @@ def _downsample(self, how, **kwargs): # we want to call the actual grouper method here result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs) - result = self._apply_loffset(result) return self._wrap_result(result) def _adjust_binner_for_upsample(self, binner): @@ -1354,7 +1319,6 @@ def _upsample(self, method, limit=None, fill_value=None): res_index, method=method, limit=limit, fill_value=fill_value ) - result = self._apply_loffset(result) return self._wrap_result(result) def _wrap_result(self, result): @@ -1399,11 +1363,6 @@ def _convert_obj(self, obj: NDFrameT) -> NDFrameT: ) raise NotImplementedError(msg) - if self.loffset is not None: - # Cannot apply loffset/timedelta to PeriodIndex -> convert to - # timestamps - self.kind = "timestamp" - # convert to timestamp if self.kind == "timestamp": obj = obj.to_timestamp(how=self.convention) @@ -1564,7 +1523,6 @@ class TimeGrouper(Grouper): "closed", "label", "how", - "loffset", "kind", "convention", "origin", @@ -1582,10 +1540,8 @@ def __init__( axis: Axis = 0, fill_method=None, limit=None, - loffset=None, kind: str | None = None, convention: Literal["start", "end", "e", "s"] | None = None, - base: int | None = None, origin: Literal["epoch", "start", "start_day", "end", "end_day"] | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, @@ -1665,22 +1621,6 @@ def __init__( # always sort time groupers kwargs["sort"] = True - # Handle deprecated arguments since v1.1.0 of `base` and `loffset` (GH #31809) - if base is not None and offset is not None: - raise ValueError("'offset' and 'base' cannot be present at the same time") - - if base and isinstance(freq, Tick): - # this conversion handle the default behavior of base and the - # special case of GH #10530. Indeed in case when dealing with - # a TimedeltaIndex base was treated as a 'pure' offset even though - # the default behavior of base was equivalent of a modulo on - # freq_nanos. - self.offset = Timedelta(base * freq.nanos // freq.n) - - if isinstance(loffset, str): - loffset = to_offset(loffset) - self.loffset = loffset - super().__init__(freq=freq, axis=axis, **kwargs) def _get_resampler(self, obj, kind=None): @@ -1841,9 +1781,6 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): if self.offset: # GH 10530 & 31809 labels += self.offset - if self.loffset: - # GH 33498 - labels += self.loffset return binner, bins, labels @@ -2010,7 +1947,7 @@ def _get_timestamp_range_edges( index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): raise ValueError("The origin must have the same timezone as the index.") - elif origin == "epoch": + if origin == "epoch": # set the epoch based on the timezone to have similar bins results when # resampling on the same kind of indexes on different timezones origin = Timestamp("1970-01-01", tz=index_tz) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index e8fd8ed737fd6..5ce69d2c2ab4c 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -14,7 +14,6 @@ cast, overload, ) -import warnings import numpy as np @@ -23,11 +22,7 @@ AxisInt, HashableT, ) -from pandas.util._decorators import ( - cache_readonly, - deprecate_nonkeyword_arguments, -) -from pandas.util._exceptions import find_stack_level +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( @@ -67,6 +62,7 @@ @overload def concat( objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame], + *, axis: Literal[0, "index"] = ..., join: str = ..., ignore_index: bool = ..., @@ -83,6 +79,7 @@ def concat( @overload def concat( objs: Iterable[Series] | Mapping[HashableT, Series], + *, axis: Literal[0, "index"] = ..., join: str = ..., ignore_index: bool = ..., @@ -99,6 +96,7 @@ def concat( @overload def concat( objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + *, axis: Literal[0, "index"] = ..., join: str = ..., ignore_index: bool = ..., @@ -115,6 +113,7 @@ def concat( @overload def concat( objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + *, axis: Literal[1, "columns"], join: str = ..., ignore_index: bool = ..., @@ -131,6 +130,7 @@ def concat( @overload def concat( objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + *, axis: Axis = ..., join: str = ..., ignore_index: bool = ..., @@ -144,9 +144,9 @@ def concat( ... -@deprecate_nonkeyword_arguments(version=None, allowed_args=["objs"]) def concat( objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame], + *, axis: Axis = 0, join: str = "outer", ignore_index: bool = False, @@ -549,11 +549,8 @@ def __init__( self.levels = levels if not is_bool(sort): - warnings.warn( - "Passing non boolean values for sort is deprecated and " - "will error in a future version!", - FutureWarning, - stacklevel=find_stack_level(), + raise ValueError( + f"The 'sort' keyword only accepts boolean values; {sort} was passed." ) self.sort = sort diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index a39e3c1f10956..fa1d6783e8f41 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -21,7 +21,10 @@ from pandas.core.arrays import SparseArray from pandas.core.arrays.categorical import factorize_from_iterable from pandas.core.frame import DataFrame -from pandas.core.indexes.api import Index +from pandas.core.indexes.api import ( + Index, + default_index, +) from pandas.core.series import Series @@ -249,7 +252,7 @@ def get_empty_frame(data) -> DataFrame: if isinstance(data, Series): index = data.index else: - index = Index(range(len(data))) + index = default_index(len(data)) return DataFrame(index=index) # if all NaN @@ -508,7 +511,7 @@ def from_dummies( "Dummy DataFrame contains multi-assignment(s); " f"First instance in row: {assigned.idxmax()}" ) - elif any(assigned == 0): + if any(assigned == 0): if isinstance(default_category, dict): cats.append(default_category[prefix]) else: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 539e585e01acc..300073d893c67 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -5,7 +5,6 @@ TYPE_CHECKING, Hashable, ) -import warnings import numpy as np @@ -13,7 +12,6 @@ Appender, deprecate_kwarg, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_extension_array_dtype, @@ -56,13 +54,9 @@ def melt( cols = list(frame.columns) if value_name in frame.columns: - warnings.warn( - "This dataframe has a column name that matches the 'value_name' column " - "name of the resulting Dataframe. " - "In the future this will raise an error, please set the 'value_name' " - "parameter of DataFrame.melt to a unique name.", - FutureWarning, - stacklevel=find_stack_level(), + raise ValueError( + f"value_name ({value_name}) cannot match an element in " + "the DataFrame columns." ) if id_vars is not None: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f198db72460fd..f46cf6085b06d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -84,6 +84,7 @@ import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.frame import _merge_doc +from pandas.core.indexes.api import default_index from pandas.core.sorting import is_int64_overflow_possible if TYPE_CHECKING: @@ -306,7 +307,7 @@ def _merger(x, y) -> DataFrame: if left_by is not None and right_by is not None: raise ValueError("Can only group either left or right frames") - elif left_by is not None: + if left_by is not None: if isinstance(left_by, str): left_by = [left_by] check = set(left_by).difference(left.columns) @@ -676,16 +677,14 @@ def __init__( f"right_index parameter must be of type bool, not {type(right_index)}" ) - # warn user when merging between different levels + # GH 40993: raise when merging between different levels; enforced in 2.0 if _left.columns.nlevels != _right.columns.nlevels: msg = ( - "merging between different levels is deprecated and will be removed " - f"in a future version. ({_left.columns.nlevels} levels on the left, " + "Not allowed to merge between different levels. " + f"({_left.columns.nlevels} levels on the left, " f"{_right.columns.nlevels} on the right)" ) - # stacklevel chosen to be correct when this is reached via pd.merge - # (and not DataFrame.join) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + raise MergeError(msg) self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on) @@ -1062,7 +1061,7 @@ def _get_join_info( else: join_index = self.left.index.take(left_indexer) else: - join_index = Index(np.arange(len(left_indexer))) + join_index = default_index(len(left_indexer)) if len(join_index) == 0: join_index = join_index.astype(object) @@ -1292,7 +1291,7 @@ def _maybe_coerce_merge_keys(self) -> None: continue # check whether ints and floats - elif is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): + if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int with np.errstate(invalid="ignore"): # error: Argument 1 to "astype" of "ndarray" has incompatible @@ -1314,7 +1313,7 @@ def _maybe_coerce_merge_keys(self) -> None: ) continue - elif is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype): + if is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype): # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int with np.errstate(invalid="ignore"): # error: Argument 1 to "astype" of "ndarray" has incompatible @@ -1337,7 +1336,7 @@ def _maybe_coerce_merge_keys(self) -> None: continue # let's infer and see if we are ok - elif lib.infer_dtype(lk, skipna=False) == lib.infer_dtype( + if lib.infer_dtype(lk, skipna=False) == lib.infer_dtype( rk, skipna=False ): continue @@ -1544,11 +1543,11 @@ def _validate(self, validate: str) -> None: "Merge keys are not unique in either left " "or right dataset; not a one-to-one merge" ) - elif not left_unique: + if not left_unique: raise MergeError( "Merge keys are not unique in left dataset; not a one-to-one merge" ) - elif not right_unique: + if not right_unique: raise MergeError( "Merge keys are not unique in right dataset; not a one-to-one merge" ) @@ -1570,7 +1569,18 @@ def _validate(self, validate: str) -> None: pass else: - raise ValueError("Not a valid argument for validate") + raise ValueError( + f'"{validate}" is not a valid argument. ' + "Valid arguments are:\n" + '- "1:1"\n' + '- "1:m"\n' + '- "m:1"\n' + '- "m:m"\n' + '- "one_to_one"\n' + '- "one_to_many"\n' + '- "many_to_one"\n' + '- "many_to_many"' + ) def get_join_indexers( @@ -2064,15 +2074,13 @@ def injection(obj): side = "left" if isna(left_values).any(): raise ValueError(f"Merge keys contain null values on {side} side") - else: - raise ValueError(f"{side} keys must be sorted") + raise ValueError(f"{side} keys must be sorted") if not Index(right_values).is_monotonic_increasing: side = "right" if isna(right_values).any(): raise ValueError(f"Merge keys contain null values on {side} side") - else: - raise ValueError(f"{side} keys must be sorted") + raise ValueError(f"{side} keys must be sorted") # initial type conversion as needed if needs_i8_conversion(left_values): @@ -2456,8 +2464,7 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame: elif isinstance(obj, ABCSeries): if obj.name is None: raise ValueError("Cannot merge a Series without a name") - else: - return obj.to_frame() + return obj.to_frame() else: raise TypeError( f"Can only merge Series or DataFrame objects, a {type(obj)} was passed" @@ -2475,13 +2482,10 @@ def _items_overlap_with_suffix( If corresponding suffix is empty, the entry is simply converted to string. """ - if not is_list_like(suffixes, allow_sets=False): - warnings.warn( - f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " - "unexpected results. Provide 'suffixes' as a tuple instead. In the " - "future a 'TypeError' will be raised.", - FutureWarning, - stacklevel=find_stack_level(), + if not is_list_like(suffixes, allow_sets=False) or isinstance(suffixes, dict): + raise TypeError( + f"Passing 'suffixes' as a {type(suffixes)}, is not supported. " + "Provide 'suffixes' as a tuple instead." ) to_rename = left.intersection(right) @@ -2527,11 +2531,9 @@ def renamer(x, suffix): if not rlabels.is_unique: dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist()) if dups: - warnings.warn( - f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the " - f"result is deprecated and will raise a MergeError in a future version.", - FutureWarning, - stacklevel=find_stack_level(), + raise MergeError( + f"Passing 'suffixes' which cause duplicate columns {set(dups)} is " + f"not allowed.", ) return llabels, rlabels diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 51ca0e90f8809..37e78c7dbf7a2 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -20,7 +20,6 @@ from pandas.util._decorators import ( Appender, Substitution, - deprecate_nonkeyword_arguments, ) from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -412,7 +411,11 @@ def _all_key(key): table_pieces.append(transformed_piece) margin_keys.append(all_key) - result = concat(table_pieces, axis=cat_axis) + if not table_pieces: + # GH 49240 + return table + else: + result = concat(table_pieces, axis=cat_axis) if len(rows) == 0: return result @@ -493,9 +496,9 @@ def _convert_by(by): @Substitution("\ndata : DataFrame") @Appender(_shared_docs["pivot"], indents=1) -@deprecate_nonkeyword_arguments(version=None, allowed_args=["data"]) def pivot( data: DataFrame, + *, index: IndexLabel | lib.NoDefault = lib.NoDefault, columns: IndexLabel | lib.NoDefault = lib.NoDefault, values: IndexLabel | lib.NoDefault = lib.NoDefault, diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 94705790e40bd..243a7c547bbb5 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -43,8 +43,8 @@ to_datetime, to_timedelta, ) +from pandas.core import nanops import pandas.core.algorithms as algos -import pandas.core.nanops as nanops def cut( @@ -263,7 +263,7 @@ def cut( raise ValueError( "cannot specify integer `bins` when input data contains infinity" ) - elif mn == mx: # adjust end points before binning + if mn == mx: # adjust end points before binning mn -= 0.001 * abs(mn) if mn != 0 else 0.001 mx += 0.001 * abs(mx) if mx != 0 else 0.001 bins = np.linspace(mn, mx, bins + 1, endpoint=True) @@ -421,8 +421,7 @@ def _bins_to_cuts( f"Bin edges must be unique: {repr(bins)}.\n" f"You can drop duplicate edges by setting the 'duplicates' kwarg" ) - else: - bins = unique_bins + bins = unique_bins side: Literal["left", "right"] = "left" if right else "right" ids = ensure_platform_int(bins.searchsorted(x, side=side)) @@ -440,7 +439,7 @@ def _bins_to_cuts( "list-like argument" ) - elif labels is None: + if labels is None: labels = _format_labels( bins, precision, right=right, include_lowest=include_lowest, dtype=dtype ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2987858492a25..0c104111f9db4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -67,8 +67,6 @@ from pandas.util._decorators import ( Appender, Substitution, - deprecate_kwarg, - deprecate_nonkeyword_arguments, doc, ) from pandas.util._exceptions import find_stack_level @@ -119,14 +117,12 @@ from pandas.core.arrays.categorical import CategoricalAccessor from pandas.core.arrays.sparse import SparseAccessor from pandas.core.construction import ( - create_series_with_explicit_dtype, extract_array, - is_empty_data, sanitize_array, ) from pandas.core.generic import NDFrame from pandas.core.indexers import ( - deprecate_ndim_indexing, + disallow_ndim_indexing, unpack_1tuple, ) from pandas.core.indexes.accessors import CombinedDatetimelikeProperties @@ -143,7 +139,7 @@ from pandas.core.indexes.multi import maybe_droplevels from pandas.core.indexing import ( check_bool_indexer, - check_deprecated_indexers, + check_dict_or_set_indexers, ) from pandas.core.internals import ( SingleArrayManager, @@ -389,18 +385,6 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) - if is_empty_data(data) and dtype is None: - # gh-17261 - warnings.warn( - "The default dtype for empty Series will be 'object' instead " - "of 'float64' in a future version. Specify a dtype explicitly " - "to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # uncomment the line below when removing the FutureWarning - # dtype = np.dtype(object) - if index is not None: index = ensure_index(index) @@ -413,7 +397,7 @@ def __init__( raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" ) - elif isinstance(data, Index): + if isinstance(data, Index): if dtype is not None: # astype copies @@ -458,6 +442,9 @@ def __init__( pass else: data = com.maybe_iterable_to_list(data) + if is_list_like(data) and not len(data) and dtype is None: + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) if index is None: if not is_list_like(data): @@ -531,15 +518,10 @@ def _init_dict( # Input is now list-like, so rely on "standard" construction: - # TODO: passing np.float64 to not break anything yet. See GH-17261 - s = create_series_with_explicit_dtype( - # error: Argument "index" to "create_series_with_explicit_dtype" has - # incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray, - # ndarray, Index, None]" + s = self._constructor( values, - index=keys, # type: ignore[arg-type] + index=keys, dtype=dtype, - dtype_if_empty=np.float64, ) # Now we just make sure the order is respected, if any @@ -722,13 +704,13 @@ def array(self) -> ExtensionArray: return self._mgr.array_values() # ops - def ravel(self, order: str = "C") -> np.ndarray: + def ravel(self, order: str = "C") -> ArrayLike: """ - Return the flattened underlying data as an ndarray. + Return the flattened underlying data as an ndarray or ExtensionArray. Returns ------- - numpy.ndarray or ndarray-like + numpy.ndarray or ExtensionArray Flattened data of the Series. See Also @@ -931,7 +913,7 @@ def _slice(self, slobj: slice, axis: Axis = 0) -> Series: return self._get_values(slobj) def __getitem__(self, key): - check_deprecated_indexers(key) + check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) if key is Ellipsis: @@ -1020,7 +1002,7 @@ def _get_values_tuple(self, key: tuple): # see tests.series.timeseries.test_mpl_compat_hack # the asarray is needed to avoid returning a 2D DatetimeArray result = np.asarray(self._values[key]) - deprecate_ndim_indexing(result, stacklevel=find_stack_level()) + disallow_ndim_indexing(result) return result if not isinstance(self.index, MultiIndex): @@ -1074,7 +1056,7 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - check_deprecated_indexers(key) + check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() @@ -1409,10 +1391,10 @@ def reset_index( ) -> None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) def reset_index( self, level: IndexLabel = None, + *, drop: bool = False, name: Level = lib.no_default, inplace: bool = False, @@ -2000,7 +1982,7 @@ def groupby( self, by=None, axis: Axis = 0, - level: Level = None, + level: IndexLabel = None, as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = no_default, @@ -2186,23 +2168,22 @@ def unique(self) -> ArrayLike: @overload def drop_duplicates( - self, keep: DropKeep = ..., *, inplace: Literal[False] = ... + self, *, keep: DropKeep = ..., inplace: Literal[False] = ... ) -> Series: ... @overload - def drop_duplicates(self, keep: DropKeep = ..., *, inplace: Literal[True]) -> None: + def drop_duplicates(self, *, keep: DropKeep = ..., inplace: Literal[True]) -> None: ... @overload def drop_duplicates( - self, keep: DropKeep = ..., *, inplace: bool = ... + self, *, keep: DropKeep = ..., inplace: bool = ... ) -> Series | None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def drop_duplicates( - self, keep: DropKeep = "first", inplace: bool = False + self, *, keep: DropKeep = "first", inplace: bool = False ) -> Series | None: """ Return Series with duplicate values removed. @@ -2994,92 +2975,6 @@ def searchsorted( # type: ignore[override] # ------------------------------------------------------------------- # Combination - def append( - self, to_append, ignore_index: bool = False, verify_integrity: bool = False - ) -> Series: - """ - Concatenate two or more Series. - - .. deprecated:: 1.4.0 - Use :func:`concat` instead. For further details see - :ref:`whatsnew_140.deprecations.frame_series_append` - - Parameters - ---------- - to_append : Series or list/tuple of Series - Series to append with self. - ignore_index : bool, default False - If True, the resulting axis will be labeled 0, 1, …, n - 1. - verify_integrity : bool, default False - If True, raise Exception on creating index with duplicates. - - Returns - ------- - Series - Concatenated Series. - - See Also - -------- - concat : General function to concatenate DataFrame or Series objects. - - Notes - ----- - Iteratively appending to a Series can be more computationally intensive - than a single concatenate. A better solution is to append values to a - list and then concatenate the list with the original Series all at - once. - - Examples - -------- - >>> s1 = pd.Series([1, 2, 3]) - >>> s2 = pd.Series([4, 5, 6]) - >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5]) - >>> s1.append(s2) - 0 1 - 1 2 - 2 3 - 0 4 - 1 5 - 2 6 - dtype: int64 - - >>> s1.append(s3) - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - 5 6 - dtype: int64 - - With `ignore_index` set to True: - - >>> s1.append(s2, ignore_index=True) - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - 5 6 - dtype: int64 - - With `verify_integrity` set to True: - - >>> s1.append(s2, verify_integrity=True) - Traceback (most recent call last): - ... - ValueError: Indexes have overlapping values: [0, 1, 2] - """ - warnings.warn( - "The series.append method is deprecated " - "and will be removed from pandas in a future version. " - "Use pandas.concat instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - return self._append(to_append, ignore_index, verify_integrity) - def _append( self, to_append, ignore_index: bool = False, verify_integrity: bool = False ): @@ -3767,10 +3662,9 @@ def sort_index( ) -> Series | None: ... - # error: Signature of "sort_index" incompatible with supertype "NDFrame" - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def sort_index( # type: ignore[override] + def sort_index( self, + *, axis: Axis = 0, level: IndexLabel = None, ascending: bool | Sequence[bool] = True, @@ -4944,40 +4838,6 @@ def rename( else: return self._set_name(index, inplace=inplace) - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: Literal[False] | lib.NoDefault = ..., - copy: bool | lib.NoDefault = ..., - ) -> Series: - ... - - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: Literal[True], - copy: bool | lib.NoDefault = ..., - ) -> None: - ... - - @overload - def set_axis( - self, - labels, - *, - axis: Axis = ..., - inplace: bool | lib.NoDefault = ..., - copy: bool | lib.NoDefault = ..., - ) -> Series | None: - ... - - # error: Signature of "set_axis" incompatible with supertype "NDFrame" @Appender( """ Examples @@ -5003,15 +4863,14 @@ def set_axis( see_also_sub="", ) @Appender(NDFrame.set_axis.__doc__) - def set_axis( # type: ignore[override] + def set_axis( self, labels, *, axis: Axis = 0, - inplace: bool | lib.NoDefault = lib.no_default, - copy: bool | lib.NoDefault = lib.no_default, - ) -> Series | None: - return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) + copy: bool = True, + ) -> Series: + return super().set_axis(labels, axis=axis, copy=copy) # error: Cannot determine type of 'reindex' @doc( @@ -5075,12 +4934,10 @@ def drop( ) -> Series | None: ... - # error: Signature of "drop" incompatible with supertype "NDFrame" - # github.com/python/mypy/issues/12387 - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) - def drop( # type: ignore[override] + def drop( self, labels: IndexLabel = None, + *, axis: Axis = 0, index: IndexLabel = None, columns: IndexLabel = None, @@ -5224,11 +5081,11 @@ def fillna( ... # error: Signature of "fillna" incompatible with supertype "NDFrame" - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) @doc(NDFrame.fillna, **_shared_doc_kwargs) def fillna( # type: ignore[override] self, value: Hashable | Mapping | Series | DataFrame = None, + *, method: FillnaOptions | None = None, axis: Axis | None = None, inplace: bool = False, @@ -5688,9 +5545,9 @@ def dropna( ) -> None: ... - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def dropna( self, + *, axis: Axis = 0, inplace: bool = False, how: AnyAll | None = None, @@ -5811,8 +5668,6 @@ def resample( label: str | None = None, convention: str = "start", kind: str | None = None, - loffset=None, - base: int | None = None, on: Level = None, level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", @@ -5826,8 +5681,6 @@ def resample( label=label, convention=convention, kind=kind, - loffset=loffset, - base=base, on=on, level=level, origin=origin, @@ -5993,10 +5846,10 @@ def clip( ) -> Series | None: return super().clip(lower, upper, axis=axis, inplace=inplace, **kwargs) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) def interpolate( self: Series, method: str = "linear", + *, axis: Axis = 0, limit: int | None = None, inplace: bool = False, @@ -6006,13 +5859,13 @@ def interpolate( **kwargs, ) -> Series | None: return super().interpolate( - method, - axis, - limit, - inplace, - limit_direction, - limit_area, - downcast, + method=method, + axis=axis, + limit=limit, + inplace=inplace, + limit_direction=limit_direction, + limit_area=limit_area, + downcast=downcast, **kwargs, ) @@ -6025,7 +5878,6 @@ def where( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> Series: ... @@ -6038,7 +5890,6 @@ def where( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> None: ... @@ -6051,13 +5902,10 @@ def where( inplace: bool = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> Series | None: ... - # error: Signature of "where" incompatible with supertype "NDFrame" - @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) - def where( # type: ignore[override] + def where( self, cond, other=lib.no_default, @@ -6065,7 +5913,6 @@ def where( # type: ignore[override] inplace: bool = False, axis: Axis | None = None, level: Level = None, - errors: IgnoreRaise | lib.NoDefault = lib.no_default, ) -> Series | None: return super().where( cond, @@ -6084,7 +5931,6 @@ def mask( inplace: Literal[False] = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> Series: ... @@ -6097,7 +5943,6 @@ def mask( inplace: Literal[True], axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> None: ... @@ -6110,13 +5955,10 @@ def mask( inplace: bool = ..., axis: Axis | None = ..., level: Level = ..., - errors: IgnoreRaise | lib.NoDefault = ..., ) -> Series | None: ... - # error: Signature of "mask" incompatible with supertype "NDFrame" - @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) - def mask( # type: ignore[override] + def mask( self, cond, other=lib.no_default, @@ -6124,7 +5966,6 @@ def mask( # type: ignore[override] inplace: bool = False, axis: Axis | None = None, level: Level = None, - errors: IgnoreRaise | lib.NoDefault = lib.no_default, ) -> Series | None: return super().mask( cond, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 58eef2a39b37a..b28a9def8a7ea 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -11,7 +11,6 @@ Sequence, cast, ) -import warnings import numpy as np @@ -341,14 +340,7 @@ def lexsort_indexer( keys = [ensure_key_mapped(k, key) for k in keys] for k, order in zip(keys, orders): - with warnings.catch_warnings(): - # TODO(2.0): unnecessary once deprecation is enforced - # GH#45618 don't issue warning user can't do anything about - warnings.filterwarnings( - "ignore", ".*(SparseArray|SparseDtype).*", category=FutureWarning - ) - - cat = Categorical(k, ordered=True) + cat = Categorical(k, ordered=True) if na_position not in ["last", "first"]: raise ValueError(f"invalid na_position: {na_position}") diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index ff9c72b74e5eb..71a50c69bfee1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -14,17 +14,14 @@ import numpy as np -import pandas._libs.lib as lib +from pandas._libs import lib from pandas._typing import ( AlignJoin, DtypeObj, F, Scalar, ) -from pandas.util._decorators import ( - Appender, - deprecate_nonkeyword_arguments, -) +from pandas.util._decorators import Appender from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -840,14 +837,13 @@ def cat( """, } ) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "pat"]) @forbid_nonstring_types(["bytes"]) def split( self, pat: str | re.Pattern | None = None, + *, n=-1, expand: bool = False, - *, regex: bool | None = None, ): if regex is False and is_re(pat): @@ -872,9 +868,8 @@ def split( "regex_examples": "", } ) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "pat"]) @forbid_nonstring_types(["bytes"]) - def rsplit(self, pat=None, n=-1, expand: bool = False): + def rsplit(self, pat=None, *, n=-1, expand: bool = False): result = self._data.array._str_rsplit(pat, n=n) return self._wrap_result(result, expand=expand, returns_string=expand) @@ -1328,7 +1323,7 @@ def replace( n: int = -1, case: bool | None = None, flags: int = 0, - regex: bool | None = None, + regex: bool = False, ): r""" Replace each occurrence of pattern/regex in the Series/Index. @@ -1356,7 +1351,7 @@ def replace( flags : int, default 0 (no flags) Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled regex. - regex : bool, default True + regex : bool, default False Determines if the passed-in pattern is a regular expression: - If True, assumes the passed-in pattern is a regular expression. @@ -1364,8 +1359,6 @@ def replace( - Cannot be set to False if `pat` is a compiled regex or `repl` is a callable. - .. versionadded:: 0.23.0 - Returns ------- Series or Index of object @@ -1449,20 +1442,6 @@ def replace( 2 NaN dtype: object """ - if regex is None: - if isinstance(pat, str) and any(c in pat for c in ".+*|^$?[](){}\\"): - # warn only in cases where regex behavior would differ from literal - msg = ( - "The default value of regex will change from True to False " - "in a future version." - ) - if len(pat) == 1: - msg += ( - " In addition, single character regular expressions will " - "*not* be treated as literal strings when regex=True." - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - # Check whether repl is valid (GH 13438, GH 15055) if not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") @@ -1481,14 +1460,6 @@ def replace( elif callable(repl): raise ValueError("Cannot use a callable replacement when regex=False") - # The current behavior is to treat single character patterns as literal strings, - # even when ``regex`` is set to ``True``. - if isinstance(pat, str) and len(pat) == 1: - regex = False - - if regex is None: - regex = True - if case is None: case = True diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 3e8cdc12e7216..21e7ede3ed386 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable # noqa: PDF001 +import functools import re import textwrap from typing import ( @@ -11,7 +12,7 @@ import numpy as np -import pandas._libs.lib as lib +from pandas._libs import lib import pandas._libs.missing as libmissing import pandas._libs.ops as libops from pandas._typing import ( @@ -380,9 +381,14 @@ def _str_get_dummies(self, sep: str = "|"): dummies = np.empty((len(arr), len(tags2)), dtype=np.int64) + def _isin(test_elements: str, element: str) -> bool: + return element in test_elements + for i, t in enumerate(tags2): pat = sep + t + sep - dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x) + dummies[:, i] = lib.map_infer( + arr.to_numpy(), functools.partial(_isin, element=pat) + ) return dummies, tags2 def _str_upper(self): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 7791ea804a52a..bb2f663dedb33 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -15,7 +15,6 @@ cast, overload, ) -import warnings import numpy as np @@ -42,7 +41,6 @@ Timezone, npt, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_object, @@ -476,7 +474,7 @@ def _array_strptime_with_fallback( except OutOfBoundsDatetime: if errors == "raise": raise - elif errors == "coerce": + if errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(iNaT) @@ -489,7 +487,7 @@ def _array_strptime_with_fallback( if not infer_datetime_format: if errors == "raise": raise - elif errors == "coerce": + if errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(iNaT) @@ -782,15 +780,7 @@ def to_datetime( `strftime documentation `_ for more information on choices, though - note the following differences: - - - :const:`"%f"` will parse all the way - up to nanoseconds; - - - :const:`"%S"` without :const:`"%f"` will capture all the way - up to nanoseconds if present as decimal places, and will also handle - the case where the number of seconds is an integer. - + note that :const:`"%f"` will parse all the way up to nanoseconds. exact : bool, default True Control how `format` is used: @@ -969,13 +959,6 @@ def to_datetime( ... format='%Y-%m-%d %H:%M:%S.%f') Timestamp('2018-10-26 12:00:00.000000001') - :const:`"%S"` without :const:`"%f"` will capture all the way - up to nanoseconds if present as decimal places. - - >>> pd.to_datetime('2017-03-22 15:16:45.433502912', - ... format='%Y-%m-%d %H:%M:%S') - Timestamp('2017-03-22 15:16:45.433502912') - **Non-convertible date/times** If a date does not meet the `timestamp limitations @@ -1309,27 +1292,8 @@ def calc_with_mask(carg, mask): return None -def to_time( - arg, - format=None, - infer_time_format: bool = False, - errors: DateTimeErrorChoices = "raise", -): - # GH#34145 - warnings.warn( - "`to_time` has been moved, should be imported from pandas.core.tools.times. " - "This alias will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - from pandas.core.tools.times import to_time - - return to_time(arg, format, infer_time_format, errors) - - __all__ = [ "DateParseError", "should_cache", "to_datetime", - "to_time", ] diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 705c77090e168..784549b53bc32 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -222,7 +222,7 @@ def _coerce_scalar_to_timedelta_type( except ValueError: if errors == "raise": raise - elif errors == "ignore": + if errors == "ignore": return r # coerce diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py index d0a4342254e16..edb6b97ad2e53 100644 --- a/pandas/core/tools/times.py +++ b/pandas/core/tools/times.py @@ -77,7 +77,7 @@ def _convert_listlike(arg, format): f"format {format}" ) raise ValueError(msg) from err - elif errors == "ignore": + if errors == "ignore": return arg else: times.append(None) diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py index 835085d41cffa..950bc047a692a 100644 --- a/pandas/core/window/doc.py +++ b/pandas/core/window/doc.py @@ -10,7 +10,7 @@ def create_section_header(header: str) -> str: """Create numpydoc section header""" - return "\n".join((header, "-" * len(header))) + "\n" + return f"{header}\n{'-' * len(header)}\n" template_header = "\nCalculate the {window_method} {aggregation_description}.\n\n" diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index eaf165f168a58..c5c401d415ad0 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -4,7 +4,6 @@ from functools import partial from textwrap import dedent from typing import TYPE_CHECKING -import warnings import numpy as np @@ -21,7 +20,6 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import doc -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_datetime64_ns_dtype, @@ -29,7 +27,7 @@ ) from pandas.core.dtypes.missing import isna -import pandas.core.common as common # noqa: PDF018 +from pandas.core import common # noqa: PDF018 from pandas.core.indexers.objects import ( BaseIndexer, ExponentialMovingWindowIndexer, @@ -653,17 +651,6 @@ def std(self, bias: bool = False, numeric_only: bool = False, *args, **kwargs): ) return zsqrt(self.var(bias=bias, numeric_only=numeric_only, **kwargs)) - def vol(self, bias: bool = False, *args, **kwargs): - warnings.warn( - ( - "vol is deprecated will be removed in a future version. " - "Use std instead." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - return self.std(bias, *args, **kwargs) - @doc( template_header, create_section_header("Parameters"), @@ -1015,13 +1002,12 @@ def mean(self, *args, update=None, update_times=None, **kwargs): 1 0.75 5.75 """ result_kwargs = {} - is_frame = True if self._selected_obj.ndim == 2 else False + is_frame = self._selected_obj.ndim == 2 if update_times is not None: raise NotImplementedError("update_times is not implemented.") - else: - update_deltas = np.ones( - max(self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64 - ) + update_deltas = np.ones( + max(self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64 + ) if update is not None: if self._mean.last_ewm is None: raise ValueError( diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index cde39daaacab9..08c1285b24d23 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -171,9 +171,9 @@ def _validate(self) -> None: if self.min_periods is not None: if not is_integer(self.min_periods): raise ValueError("min_periods must be an integer") - elif self.min_periods < 0: + if self.min_periods < 0: raise ValueError("min_periods must be >= 0") - elif is_integer(self.window) and self.min_periods > self.window: + if is_integer(self.window) and self.min_periods > self.window: raise ValueError( f"min_periods {self.min_periods} must be <= window {self.window}" ) @@ -205,7 +205,7 @@ def _validate(self) -> None: if self.step is not None: if not is_integer(self.step): raise ValueError("step must be an integer") - elif self.step < 0: + if self.step < 0: raise ValueError("step must be >= 0") def _check_window_bounds( @@ -216,7 +216,7 @@ def _check_window_bounds( f"start ({len(start)}) and end ({len(end)}) bounds must be the " f"same length" ) - elif len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1): + if len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1): raise ValueError( f"start and end bounds ({len(start)}) must be the same length " f"as the object ({num_vals}) divided by the step ({self.step}) " @@ -363,16 +363,15 @@ def _prep_values(self, values: ArrayLike) -> np.ndarray: f"ops for {type(self).__name__} for this " f"dtype {values.dtype} are not implemented" ) - else: - # GH #12373 : rolling functions error on float32 data - # make sure the data is coerced to float64 - try: - if isinstance(values, ExtensionArray): - values = values.to_numpy(np.float64, na_value=np.nan) - else: - values = ensure_float64(values) - except (ValueError, TypeError) as err: - raise TypeError(f"cannot handle this type -> {values.dtype}") from err + # GH #12373 : rolling functions error on float32 data + # make sure the data is coerced to float64 + try: + if isinstance(values, ExtensionArray): + values = values.to_numpy(np.float64, na_value=np.nan) + else: + values = ensure_float64(values) + except (ValueError, TypeError) as err: + raise TypeError(f"cannot handle this type -> {values.dtype}") from err # Convert inf to nan for C funcs inf = np.isinf(values) @@ -418,7 +417,7 @@ def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame: """Validate and finalize result.""" if out.shape[1] == 0 and obj.shape[1] > 0: raise DataError("No numeric types to aggregate") - elif out.shape[1] == 0: + if out.shape[1] == 0: return obj.astype("float64") self._insert_on_column(out, obj) @@ -964,7 +963,7 @@ class Window(BaseWindow): step : int, default None - ..versionadded:: 1.5.0 + .. versionadded:: 1.5.0 Evaluate the window at every ``step`` result, equivalent to slicing as ``[::step]``. ``window`` must be an integer. Using a step argument other @@ -1141,7 +1140,7 @@ def _validate(self): raise NotImplementedError( "BaseIndexer subclasses not implemented with win_types." ) - elif not is_integer(self.window) or self.window < 0: + if not is_integer(self.window) or self.window < 0: raise ValueError("window must be an integer 0 or greater") if self.method != "single": @@ -1559,10 +1558,9 @@ def std( if maybe_use_numba(engine): if self.method == "table": raise NotImplementedError("std not supported with method='table'") - else: - from pandas.core._numba.kernels import sliding_var + from pandas.core._numba.kernels import sliding_var - return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof)) + return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof)) window_func = window_aggregations.roll_var def zsqrt_func(values, begin, end, min_periods): @@ -1588,10 +1586,9 @@ def var( if maybe_use_numba(engine): if self.method == "table": raise NotImplementedError("var not supported with method='table'") - else: - from pandas.core._numba.kernels import sliding_var + from pandas.core._numba.kernels import sliding_var - return self._numba_apply(sliding_var, engine_kwargs, ddof) + return self._numba_apply(sliding_var, engine_kwargs, ddof) window_func = partial(window_aggregations.roll_var, ddof=ddof) return self._apply( window_func, diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 3e4f116953cb3..89ac1c10254cb 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -283,7 +283,7 @@ class SettingWithCopyError(ValueError): The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can happen unintentionally when chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, @@ -306,7 +306,7 @@ class SettingWithCopyWarning(Warning): 'Warn' is the default option. This can happen unintentionally when chained indexing. - For more information on eveluation order, + For more information on evaluation order, see :ref:`the user guide`. For more information on view vs. copy, diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 2867f1b067b2d..78289174b7e68 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -282,7 +282,7 @@ def copy_dev_clipboard(text): stacklevel=find_stack_level(), ) - with open("/dev/clipboard", "wt") as fd: + with open("/dev/clipboard", "w") as fd: fd.write(text) def paste_dev_clipboard() -> str: @@ -608,7 +608,7 @@ def set_clipboard(clipboard): } if clipboard not in clipboard_types: - allowed_clipboard_types = [repr(_) for _ in clipboard_types.keys()] + allowed_clipboard_types = [repr(_) for _ in clipboard_types] raise ValueError( f"Argument must be one of {', '.join(allowed_clipboard_types)}" ) diff --git a/pandas/io/common.py b/pandas/io/common.py index 64e703572f2bf..265de02dd5d6b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -50,6 +50,7 @@ CompressionOptions, FilePath, ReadBuffer, + ReadCsvBuffer, StorageOptions, WriteBuffer, ) @@ -1106,6 +1107,9 @@ def _maybe_memory_map( if not memory_map: return handle, memory_map, handles + # mmap used by only read_csv + handle = cast(ReadCsvBuffer, handle) + # need to open the file first if isinstance(handle, str): handle = open(handle, "rb") diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py index 854e2a1ec3a73..275cbf0148f94 100644 --- a/pandas/io/excel/__init__.py +++ b/pandas/io/excel/__init__.py @@ -7,17 +7,12 @@ from pandas.io.excel._openpyxl import OpenpyxlWriter as _OpenpyxlWriter from pandas.io.excel._util import register_writer from pandas.io.excel._xlsxwriter import XlsxWriter as _XlsxWriter -from pandas.io.excel._xlwt import XlwtWriter as _XlwtWriter __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] register_writer(_OpenpyxlWriter) - -register_writer(_XlwtWriter) - - register_writer(_XlsxWriter) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 19edb9acf0aa4..6362e892f0012 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -893,7 +893,6 @@ class ExcelWriter(metaclass=abc.ABCMeta): Default is to use: - * `xlwt `__ for xls files * `xlsxwriter `__ for xlsx files if xlsxwriter is installed otherwise `openpyxl `__ * `odswriter `__ for ods files @@ -911,13 +910,6 @@ class ExcelWriter(metaclass=abc.ABCMeta): Engine to use for writing. If None, defaults to ``io.excel..writer``. NOTE: can only be passed as a keyword argument. - - .. deprecated:: 1.2.0 - - As the `xlwt `__ package is no longer - maintained, the ``xlwt`` engine will be removed in a future - version of pandas. - date_format : str, default None Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). datetime_format : str, default None @@ -955,12 +947,6 @@ class ExcelWriter(metaclass=abc.ABCMeta): * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)`` .. versionadded:: 1.3.0 - **kwargs : dict, optional - Keyword arguments to be passed into the engine. - - .. deprecated:: 1.3.0 - - Use engine_kwargs instead. Notes ----- @@ -1101,17 +1087,7 @@ def __new__( storage_options: StorageOptions = None, if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = None, engine_kwargs: dict | None = None, - **kwargs, ) -> ExcelWriter: - if kwargs: - if engine_kwargs is not None: - raise ValueError("Cannot use both engine_kwargs and **kwargs") - warnings.warn( - "Use of **kwargs is deprecated, use engine_kwargs instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # only switch class if generic(ExcelWriter) if cls is ExcelWriter: if engine is None or (isinstance(engine, str) and engine == "auto"): @@ -1127,25 +1103,6 @@ def __new__( except KeyError as err: raise ValueError(f"No engine for filetype: '{ext}'") from err - if engine == "xlwt": - xls_config_engine = config.get_option( - "io.excel.xls.writer", silent=True - ) - # Don't warn a 2nd time if user has changed the default engine for xls - if xls_config_engine != "xlwt": - warnings.warn( - "As the xlwt package is no longer maintained, the xlwt " - "engine will be removed in a future version of pandas. " - "This is the only engine in pandas that supports writing " - "in the xls format. Install openpyxl and write to an xlsx " - "file instead. You can set the option io.excel.xls.writer " - "to 'xlwt' to silence this warning. While this option is " - "deprecated and will also raise a warning, it can " - "be globally set and the warning suppressed.", - FutureWarning, - stacklevel=find_stack_level(), - ) - # for mypy assert engine is not None cls = get_writer(engine) @@ -1169,7 +1126,6 @@ def engine(self) -> str: @abc.abstractmethod def sheets(self) -> dict[str, Any]: """Mapping of sheet names to sheet objects.""" - pass @property @abc.abstractmethod @@ -1179,7 +1135,6 @@ def book(self): This attribute can be used to access engine-specific features. """ - pass @book.setter @abc.abstractmethod @@ -1187,7 +1142,6 @@ def book(self, other) -> None: """ Set book instance. Class type will depend on the engine used. """ - pass def write_cells( self, @@ -1239,7 +1193,6 @@ def _write_cells( freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze """ - pass def save(self) -> None: """ @@ -1255,7 +1208,6 @@ def _save(self) -> None: """ Save workbook to disk. """ - pass def __init__( self, @@ -1267,7 +1219,6 @@ def __init__( storage_options: StorageOptions = None, if_sheet_exists: str | None = None, engine_kwargs: dict[str, Any] | None = None, - **kwargs, ) -> None: # validate that this engine can handle the extension if isinstance(path, str): @@ -1441,8 +1392,7 @@ def check_extension(cls, ext: str) -> Literal[True]: ext = ext[1:] if not any(ext in extension for extension in cls._supported_extensions): raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") - else: - return True + return True # Allow use as a contextmanager def __enter__(self) -> ExcelWriter: @@ -1511,9 +1461,8 @@ def inspect_excel_format( buf = stream.read(PEEK_SIZE) if buf is None: raise ValueError("stream is empty") - else: - assert isinstance(buf, bytes) - peek = buf + assert isinstance(buf, bytes) + peek = buf stream.seek(0) if any(peek.startswith(sig) for sig in XLS_SIGNATURES): @@ -1577,8 +1526,6 @@ class ExcelFile: - Otherwise if `openpyxl `_ is installed, then ``openpyxl`` will be used. - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. - - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. - This case will raise a ``ValueError`` in a future version of pandas. .. warning:: @@ -1624,9 +1571,9 @@ def __init__( xlrd_version = Version(get_version(xlrd)) - ext = None if engine is None: # Only determine ext if it is needed + ext: str | None if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): ext = "xls" else: @@ -1643,32 +1590,6 @@ def __init__( if engine == "auto": engine = get_default_engine(ext, mode="reader") - if engine == "xlrd" and xlrd_version is not None: - if ext is None: - # Need ext to determine ext in order to raise/warn - if isinstance(path_or_buffer, xlrd.Book): - ext = "xls" - else: - ext = inspect_excel_format( - path_or_buffer, storage_options=storage_options - ) - - # Pass through if ext is None, otherwise check if ext valid for xlrd - if ext and ext != "xls" and xlrd_version >= Version("2"): - raise ValueError( - f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " - f"only the xls format is supported. Install openpyxl instead." - ) - elif ext and ext != "xls": - stacklevel = find_stack_level() - warnings.warn( - f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " - f"only the xls format is supported. Install " - f"openpyxl instead.", - FutureWarning, - stacklevel=stacklevel, - ) - assert engine is not None self.engine = engine self.storage_options = storage_options diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 5603c601e2c45..7c90178226408 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -10,7 +10,7 @@ cast, ) -import pandas._libs.json as json +from pandas._libs import json from pandas._typing import ( FilePath, StorageOptions, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 5572116ca29fe..85f1e7fda8daa 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -139,8 +139,7 @@ def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: style_kwargs: dict[str, Serialisable] = {} for k, v in style_dict.items(): - if k in _style_key_map: - k = _style_key_map[k] + k = _style_key_map.get(k, k) _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) new_v = _conv_to_x(v) if new_v: @@ -218,8 +217,7 @@ def _convert_to_font(cls, font_dict): font_kwargs = {} for k, v in font_dict.items(): - if k in _font_key_map: - k = _font_key_map[k] + k = _font_key_map.get(k, k) if k == "color": v = cls._convert_to_color(v) font_kwargs[k] = v @@ -288,11 +286,8 @@ def _convert_to_fill(cls, fill_dict: dict[str, Any]): pfill_kwargs = {} gfill_kwargs = {} for k, v in fill_dict.items(): - pk = gk = None - if k in _pattern_fill_key_map: - pk = _pattern_fill_key_map[k] - if k in _gradient_fill_key_map: - gk = _gradient_fill_key_map[k] + pk = _pattern_fill_key_map.get(k) + gk = _gradient_fill_key_map.get(k) if pk in ["start_color", "end_color"]: v = cls._convert_to_color(v) if gk == "stop": @@ -336,8 +331,7 @@ def _convert_to_side(cls, side_spec): side_kwargs = {} for k, v in side_spec.items(): - if k in _side_key_map: - k = _side_key_map[k] + k = _side_key_map.get(k, k) if k == "color": v = cls._convert_to_color(v) side_kwargs[k] = v @@ -375,8 +369,7 @@ def _convert_to_border(cls, border_dict): border_kwargs = {} for k, v in border_dict.items(): - if k in _border_key_map: - k = _border_key_map[k] + k = _border_key_map.get(k, k) if k == "color": v = cls._convert_to_color(v) if k in ["left", "right", "top", "bottom", "diagonal"]: diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index c315657170a97..72c64c5ec8939 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -73,7 +73,6 @@ def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") - "xlsx": "openpyxl", "xlsm": "openpyxl", "xlsb": "pyxlsb", - "xls": "xlwt", "ods": "odf", } assert mode in ["reader", "writer"] diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 171705dee6e59..c556e4c68c6c0 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -30,7 +30,7 @@ def __init__( Object to be parsed. {storage_options} """ - err_msg = "Install xlrd >= 1.0.0 for Excel support" + err_msg = "Install xlrd >= 2.0.1 for xls Excel support" import_optional_dependency("xlrd", extra=err_msg) super().__init__(filepath_or_buffer, storage_options=storage_options) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 8d11896cb7374..dd583c41a90d0 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -5,7 +5,7 @@ Any, ) -import pandas._libs.json as json +from pandas._libs import json from pandas._typing import ( FilePath, StorageOptions, diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py deleted file mode 100644 index f1455e472bb43..0000000000000 --- a/pandas/io/excel/_xlwt.py +++ /dev/null @@ -1,228 +0,0 @@ -from __future__ import annotations - -from typing import ( - TYPE_CHECKING, - Any, - Tuple, - cast, -) - -import pandas._libs.json as json -from pandas._typing import ( - FilePath, - StorageOptions, - WriteExcelBuffer, -) - -from pandas.io.excel._base import ExcelWriter -from pandas.io.excel._util import ( - combine_kwargs, - validate_freeze_panes, -) - -if TYPE_CHECKING: - from xlwt import ( - Workbook, - XFStyle, - ) - - -class XlwtWriter(ExcelWriter): - _engine = "xlwt" - _supported_extensions = (".xls",) - - def __init__( - self, - path: FilePath | WriteExcelBuffer | ExcelWriter, - engine: str | None = None, - date_format: str | None = None, - datetime_format: str | None = None, - encoding: str | None = None, - mode: str = "w", - storage_options: StorageOptions = None, - if_sheet_exists: str | None = None, - engine_kwargs: dict[str, Any] | None = None, - **kwargs, - ) -> None: - # Use the xlwt module as the Excel writer. - import xlwt - - engine_kwargs = combine_kwargs(engine_kwargs, kwargs) - - if mode == "a": - raise ValueError("Append mode is not supported with xlwt!") - - super().__init__( - path, - mode=mode, - storage_options=storage_options, - if_sheet_exists=if_sheet_exists, - engine_kwargs=engine_kwargs, - ) - - if encoding is None: - encoding = "ascii" - self._book = xlwt.Workbook(encoding=encoding, **engine_kwargs) - self._fm_datetime = xlwt.easyxf(num_format_str=self._datetime_format) - self._fm_date = xlwt.easyxf(num_format_str=self._date_format) - - @property - def book(self) -> Workbook: - """ - Book instance of class xlwt.Workbook. - - This attribute can be used to access engine-specific features. - """ - return self._book - - @book.setter - def book(self, other: Workbook) -> None: - """ - Set book instance. Class type will depend on the engine used. - """ - self._deprecate_set_book() - self._book = other - - @property - def sheets(self) -> dict[str, Any]: - """Mapping of sheet names to sheet objects.""" - result = {sheet.name: sheet for sheet in self.book._Workbook__worksheets} - return result - - @property - def fm_date(self): - """ - XFStyle formatter for dates. - """ - self._deprecate("fm_date") - return self._fm_date - - @property - def fm_datetime(self): - """ - XFStyle formatter for dates. - """ - self._deprecate("fm_datetime") - return self._fm_datetime - - def _save(self) -> None: - """ - Save workbook to disk. - """ - if self.sheets: - # fails when the ExcelWriter is just opened and then closed - self.book.save(self._handles.handle) - - def _write_cells( - self, - cells, - sheet_name: str | None = None, - startrow: int = 0, - startcol: int = 0, - freeze_panes: tuple[int, int] | None = None, - ) -> None: - - sheet_name = self._get_sheet_name(sheet_name) - - if sheet_name in self.sheets: - wks = self.sheets[sheet_name] - else: - wks = self.book.add_sheet(sheet_name) - self.sheets[sheet_name] = wks - - if validate_freeze_panes(freeze_panes): - freeze_panes = cast(Tuple[int, int], freeze_panes) - wks.set_panes_frozen(True) - wks.set_horz_split_pos(freeze_panes[0]) - wks.set_vert_split_pos(freeze_panes[1]) - - style_dict: dict[str, XFStyle] = {} - - for cell in cells: - val, fmt = self._value_with_fmt(cell.val) - - stylekey = json.dumps(cell.style) - if fmt: - stylekey += fmt - - if stylekey in style_dict: - style = style_dict[stylekey] - else: - style = self._convert_to_style(cell.style, fmt) - style_dict[stylekey] = style - - if cell.mergestart is not None and cell.mergeend is not None: - wks.write_merge( - startrow + cell.row, - startrow + cell.mergestart, - startcol + cell.col, - startcol + cell.mergeend, - val, - style, - ) - else: - wks.write(startrow + cell.row, startcol + cell.col, val, style) - - @classmethod - def _style_to_xlwt( - cls, item, firstlevel: bool = True, field_sep: str = ",", line_sep: str = ";" - ) -> str: - """ - helper which recursively generate an xlwt easy style string - for example: - - hstyle = {"font": {"bold": True}, - "border": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "align": {"horiz": "center"}} - will be converted to - font: bold on; \ - border: top thin, right thin, bottom thin, left thin; \ - align: horiz center; - """ - if hasattr(item, "items"): - if firstlevel: - it = [ - f"{key}: {cls._style_to_xlwt(value, False)}" - for key, value in item.items() - ] - out = f"{line_sep.join(it)} " - return out - else: - it = [ - f"{key} {cls._style_to_xlwt(value, False)}" - for key, value in item.items() - ] - out = f"{field_sep.join(it)} " - return out - else: - item = f"{item}" - item = item.replace("True", "on") - item = item.replace("False", "off") - return item - - @classmethod - def _convert_to_style( - cls, style_dict, num_format_str: str | None = None - ) -> XFStyle: - """ - converts a style_dict to an xlwt style object - - Parameters - ---------- - style_dict : style dictionary to convert - num_format_str : optional number format string - """ - import xlwt - - if style_dict: - xlwt_stylestr = cls._style_to_xlwt(style_dict) - style = xlwt.easyxf(xlwt_stylestr, field_sep=",", line_sep=";") - else: - style = xlwt.XFStyle() - if num_format_str is not None: - style.num_format_str = num_format_str - - return style diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index cfc95bc9d9569..4328683875223 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -194,11 +194,11 @@ class CSSResolver: CSS_EXPANSIONS = { **{ - "-".join(["border", prop] if prop else ["border"]): _border_expander(prop) + (f"border-{prop}" if prop else "border"): _border_expander(prop) for prop in ["", "top", "right", "bottom", "left"] }, **{ - "-".join(["border", prop]): _side_expander("border-{:s}-" + prop) + f"border-{prop}": _side_expander(f"border-{{:s}}-{prop}") for prop in ["color", "style", "width"] }, **{ diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 6ab57b0cce2a4..9f3d7d965f7c9 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -213,8 +213,7 @@ def write_cols(self) -> Sequence[Hashable]: raise ValueError( f"Writing {len(self.cols)} cols but got {len(self.header)} aliases" ) - else: - return self.header + return self.header else: # self.cols is an ndarray derived from Index._format_native_types, # so its entries are strings, i.e. hashable diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 466ffe5ac1b49..a26b85390fd49 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -658,8 +658,7 @@ def _format_header_regular(self) -> Iterable[ExcelCell]: f"Writing {len(self.columns)} cols " f"but got {len(self.header)} aliases" ) - else: - colnames = self.header + colnames = self.header for colindex, colname in enumerate(colnames): yield CssExcelCell( @@ -879,14 +878,8 @@ def write( is to be frozen engine : string, default None write engine to use if writer is a path - you can also set this - via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, - and ``io.excel.xlsm.writer``. - - .. deprecated:: 1.2.0 - - As the `xlwt `__ package is no longer - maintained, the ``xlwt`` engine will be removed in a future - version of pandas. + via the options ``io.excel.xlsx.writer``, + or ``io.excel.xlsm.writer``. {storage_options} diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index e0f6e01a65052..5e87db93cf56c 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -52,13 +52,6 @@ shows the counts, and False never shows the counts.""" ) -null_counts_sub = dedent( - """ - null_counts : bool, optional - .. deprecated:: 1.2.0 - Use show_counts instead.""" -) - frame_examples_sub = dedent( """\ @@ -159,7 +152,6 @@ "type_sub": " and columns", "max_cols_sub": frame_max_cols_sub, "show_counts_sub": show_counts_sub, - "null_counts_sub": null_counts_sub, "examples_sub": frame_examples_sub, "see_also_sub": frame_see_also_sub, "version_added_sub": "", @@ -240,7 +232,6 @@ "type_sub": "", "max_cols_sub": "", "show_counts_sub": show_counts_sub, - "null_counts_sub": "", "examples_sub": series_examples_sub, "see_also_sub": series_see_also_sub, "version_added_sub": "\n.. versionadded:: 1.4.0\n", @@ -280,7 +271,7 @@ at the cost of computational resources. See the :ref:`Frequently Asked Questions ` for more details. - {show_counts_sub}{null_counts_sub} + {show_counts_sub} Returns ------- @@ -500,10 +491,7 @@ def non_null_counts(self) -> Sequence[int]: @property def memory_usage_bytes(self) -> int: - if self.memory_usage == "deep": - deep = True - else: - deep = False + deep = self.memory_usage == "deep" return self.data.memory_usage(index=True, deep=deep).sum() def render( @@ -579,10 +567,7 @@ def memory_usage_bytes(self) -> int: memory_usage_bytes : int Object's total memory usage in bytes. """ - if self.memory_usage == "deep": - deep = True - else: - deep = False + deep = self.memory_usage == "deep" return self.data.memory_usage(index=True, deep=deep) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 0e7174c6ad765..06d0acfdf440c 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -16,13 +16,11 @@ Sequence, overload, ) -import warnings import numpy as np from pandas._config import get_option -from pandas._libs import lib from pandas._typing import ( Axis, AxisInt, @@ -39,7 +37,6 @@ Substitution, doc, ) -from pandas.util._exceptions import find_stack_level import pandas as pd from pandas import ( @@ -270,8 +267,6 @@ def __init__( formatter = formatter or get_option("styler.format.formatter") # precision is handled by superclass as default for performance - self.precision = precision # can be removed on set_precision depr cycle - self.na_rep = na_rep # can be removed on set_na_rep depr cycle self.format( formatter=formatter, precision=precision, @@ -390,72 +385,6 @@ def _repr_latex_(self) -> str | None: return self.to_latex() return None - def render( - self, - sparse_index: bool | None = None, - sparse_columns: bool | None = None, - **kwargs, - ) -> str: - """ - Render the ``Styler`` including all applied styles to HTML. - - .. deprecated:: 1.4.0 - - Parameters - ---------- - sparse_index : bool, optional - Whether to sparsify the display of a hierarchical index. Setting to False - will display each explicit level element in a hierarchical key for each row. - Defaults to ``pandas.options.styler.sparse.index`` value. - sparse_columns : bool, optional - Whether to sparsify the display of a hierarchical index. Setting to False - will display each explicit level element in a hierarchical key for each row. - Defaults to ``pandas.options.styler.sparse.columns`` value. - **kwargs - Any additional keyword arguments are passed - through to ``self.template.render``. - This is useful when you need to provide - additional variables for a custom template. - - Returns - ------- - rendered : str - The rendered HTML. - - Notes - ----- - This method is deprecated in favour of ``Styler.to_html``. - - Styler objects have defined the ``_repr_html_`` method - which automatically calls ``self.to_html()`` when it's the - last item in a Notebook cell. - - When calling ``Styler.render()`` directly, wrap the result in - ``IPython.display.HTML`` to view the rendered HTML in the notebook. - - Pandas uses the following keys in render. Arguments passed - in ``**kwargs`` take precedence, so think carefully if you want - to override them: - - * head - * cellstyle - * body - * uuid - * table_styles - * caption - * table_attributes - """ - warnings.warn( - "this method is deprecated in favour of `Styler.to_html()`", - FutureWarning, - stacklevel=find_stack_level(), - ) - if sparse_index is None: - sparse_index = get_option("styler.sparse.index") - if sparse_columns is None: - sparse_columns = get_option("styler.sparse.columns") - return self._render_html(sparse_index, sparse_columns, **kwargs) - def set_tooltips( self, ttips: DataFrame, @@ -2063,108 +1992,6 @@ def applymap( ) return self - @Substitution(subset=subset) - def where( - self, - cond: Callable, - value: str, - other: str | None = None, - subset: Subset | None = None, - **kwargs, - ) -> Styler: - """ - Apply CSS-styles based on a conditional function elementwise. - - .. deprecated:: 1.3.0 - - Updates the HTML representation with a style which is - selected in accordance with the return value of a function. - - Parameters - ---------- - cond : callable - ``cond`` should take a scalar, and optional keyword arguments, and return - a boolean. - value : str - Applied when ``cond`` returns true. - other : str - Applied when ``cond`` returns false. - %(subset)s - **kwargs : dict - Pass along to ``cond``. - - Returns - ------- - self : Styler - - See Also - -------- - Styler.applymap: Apply a CSS-styling function elementwise. - Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise. - - Notes - ----- - This method is deprecated. - - This method is a convenience wrapper for :meth:`Styler.applymap`, which we - recommend using instead. - - The example: - - >>> df = pd.DataFrame([[1, 2], [3, 4]]) - >>> def cond(v, limit=4): - ... return v > 1 and v != limit - >>> df.style.where(cond, value='color:green;', other='color:red;') - ... # doctest: +SKIP - - should be refactored to: - - >>> def style_func(v, value, other, limit=4): - ... cond = v > 1 and v != limit - ... return value if cond else other - >>> df.style.applymap(style_func, value='color:green;', other='color:red;') - ... # doctest: +SKIP - """ - warnings.warn( - "this method is deprecated in favour of `Styler.applymap()`", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if other is None: - other = "" - - return self.applymap( - lambda val: value if cond(val, **kwargs) else other, - subset=subset, - ) - - def set_precision(self, precision: int) -> StylerRenderer: - """ - Set the precision used to display values. - - .. deprecated:: 1.3.0 - - Parameters - ---------- - precision : int - - Returns - ------- - self : Styler - - Notes - ----- - This method is deprecated see `Styler.format`. - """ - warnings.warn( - "this method is deprecated in favour of `Styler.format(precision=..)`", - FutureWarning, - stacklevel=find_stack_level(), - ) - self.precision = precision - return self.format(precision=precision, na_rep=self.na_rep) - def set_table_attributes(self, attributes: str) -> Styler: """ Set the table attributes added to the ```` HTML element. @@ -2649,140 +2476,6 @@ def set_table_styles( self.table_styles = table_styles return self - def set_na_rep(self, na_rep: str) -> StylerRenderer: - """ - Set the missing data representation on a ``Styler``. - - .. versionadded:: 1.0.0 - - .. deprecated:: 1.3.0 - - Parameters - ---------- - na_rep : str - - Returns - ------- - self : Styler - - Notes - ----- - This method is deprecated. See `Styler.format()` - """ - warnings.warn( - "this method is deprecated in favour of `Styler.format(na_rep=..)`", - FutureWarning, - stacklevel=find_stack_level(), - ) - self.na_rep = na_rep - return self.format(na_rep=na_rep, precision=self.precision) - - def hide_index( - self, - subset: Subset | None = None, - level: Level | list[Level] | None = None, - names: bool = False, - ) -> Styler: - """ - Hide the entire index, or specific keys in the index from rendering. - - This method has dual functionality: - - - if ``subset`` is ``None`` then the entire index, or specified levels, will - be hidden whilst displaying all data-rows. - - if a ``subset`` is given then those specific rows will be hidden whilst the - index itself remains visible. - - .. versionchanged:: 1.3.0 - - .. deprecated:: 1.4.0 - This method should be replaced by ``hide(axis="index", **kwargs)`` - - Parameters - ---------- - subset : label, array-like, IndexSlice, optional - A valid 1d input or single key along the index axis within - `DataFrame.loc[, :]`, to limit ``data`` to *before* applying - the function. - level : int, str, list - The level(s) to hide in a MultiIndex if hiding the entire index. Cannot be - used simultaneously with ``subset``. - - .. versionadded:: 1.4.0 - names : bool - Whether to hide the index name(s), in the case the index or part of it - remains visible. - - .. versionadded:: 1.4.0 - - Returns - ------- - self : Styler - - See Also - -------- - Styler.hide: Hide the entire index / columns, or specific rows / columns. - """ - warnings.warn( - 'this method is deprecated in favour of `Styler.hide(axis="index")`', - FutureWarning, - stacklevel=find_stack_level(), - ) - return self.hide(axis="index", level=level, subset=subset, names=names) - - def hide_columns( - self, - subset: Subset | None = None, - level: Level | list[Level] | None = None, - names: bool = False, - ) -> Styler: - """ - Hide the column headers or specific keys in the columns from rendering. - - This method has dual functionality: - - - if ``subset`` is ``None`` then the entire column headers row, or - specific levels, will be hidden whilst the data-values remain visible. - - if a ``subset`` is given then those specific columns, including the - data-values will be hidden, whilst the column headers row remains visible. - - .. versionchanged:: 1.3.0 - - ..deprecated:: 1.4.0 - This method should be replaced by ``hide(axis="columns", **kwargs)`` - - Parameters - ---------- - subset : label, array-like, IndexSlice, optional - A valid 1d input or single key along the columns axis within - `DataFrame.loc[:, ]`, to limit ``data`` to *before* applying - the function. - level : int, str, list - The level(s) to hide in a MultiIndex if hiding the entire column headers - row. Cannot be used simultaneously with ``subset``. - - .. versionadded:: 1.4.0 - names : bool - Whether to hide the column index name(s), in the case all column headers, - or some levels, are visible. - - .. versionadded:: 1.4.0 - - Returns - ------- - self : Styler - - See Also - -------- - Styler.hide: Hide the entire index / columns, or specific rows / columns. - """ - warnings.warn( - 'this method is deprecated in favour of `Styler.hide(axis="columns")`', - FutureWarning, - stacklevel=find_stack_level(), - ) - return self.hide(axis="columns", level=level, subset=subset, names=names) - def hide( self, subset: Subset | None = None, @@ -2958,10 +2651,7 @@ def hide( setattr( self, f"hide_{objs}_", - [ - True if lev in levels_ else False - for lev in range(getattr(self, objs).nlevels) - ], + [lev in levels_ for lev in range(getattr(self, objs).nlevels)], ) else: if axis == 0: @@ -3212,7 +2902,7 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: return self.applymap(lambda x: values, subset=subset) @Substitution(subset=subset) - def bar( + def bar( # pylint: disable=disallowed-name self, subset: Subset | None = None, axis: Axis | None = 0, @@ -3312,7 +3002,7 @@ def bar( if not 0 <= width <= 100: raise ValueError(f"`width` must be a value in [0, 100], got {width}") - elif not 0 <= height <= 100: + if not 0 <= height <= 100: raise ValueError(f"`height` must be a value in [0, 100], got {height}") if subset is None: @@ -3337,10 +3027,9 @@ def bar( @Substitution(subset=subset, props=props, color=color.format(default="red")) def highlight_null( self, - color: str | None = None, + color: str = "red", subset: Subset | None = None, props: str | None = None, - null_color: str | lib.NoDefault = lib.no_default, ) -> Styler: """ Highlight missing values with a style. @@ -3359,13 +3048,6 @@ def highlight_null( .. versionadded:: 1.3.0 - null_color : str, default None - The background color for highlighting. - - .. deprecated:: 1.5.0 - Use ``color`` instead. If ``color`` is given ``null_color`` is - not used. - Returns ------- self : Styler @@ -3381,17 +3063,6 @@ def highlight_null( def f(data: DataFrame, props: str) -> np.ndarray: return np.where(pd.isna(data).to_numpy(), props, "") - if null_color != lib.no_default: - warnings.warn( - "`null_color` is deprecated: use `color` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if color is None and null_color == lib.no_default: - color = "red" - elif color is None and null_color != lib.no_default: - color = null_color if props is None: props = f"background-color: {color};" return self.apply(f, axis=None, subset=subset, props=props) @@ -3889,12 +3560,12 @@ def _validate_apply_axis_arg( f"'{arg_name}' is a Series but underlying data for operations " f"is a DataFrame since 'axis=None'" ) - elif isinstance(arg, DataFrame) and isinstance(data, Series): + if isinstance(arg, DataFrame) and isinstance(data, Series): raise ValueError( f"'{arg_name}' is a DataFrame but underlying data for " f"operations is a Series with 'axis in [0,1]'" ) - elif isinstance(arg, (Series, DataFrame)): # align indx / cols to data + if isinstance(arg, (Series, DataFrame)): # align indx / cols to data arg = arg.reindex_like(data, method=None).to_numpy(**dtype) else: arg = np.asarray(arg, **dtype) @@ -3933,15 +3604,11 @@ def _background_gradient( rng = smax - smin # extend lower / upper bounds, compresses color range norm = mpl.colors.Normalize(smin - (rng * low), smax + (rng * high)) - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 - if mpl_ge_3_6_0(): - if cmap is None: - rgbas = mpl.colormaps[mpl.rcParams["image.cmap"]](norm(gmap)) - else: - rgbas = mpl.colormaps[cmap](norm(gmap)) + if cmap is None: + rgbas = mpl.colormaps[mpl.rcParams["image.cmap"]](norm(gmap)) else: - rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) + rgbas = mpl.colormaps.get_cmap(cmap)(norm(gmap)) def relative_luminance(rgba) -> float: """ @@ -4220,10 +3887,8 @@ def css_calc(x, left: float, right: float, align: str, color: str | list | tuple if cmap is not None: # use the matplotlib colormap input with _mpl(Styler.bar) as (plt, mpl): - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 - cmap = ( - (mpl.colormaps[cmap] if mpl_ge_3_6_0() else mpl.cm.get_cmap(cmap)) + mpl.colormaps[cmap] if isinstance(cmap, str) else cmap # assumed to be a Colormap instance as documented ) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 6d815b43a9bcd..0f93027f3f775 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -399,11 +399,11 @@ def _translate_header(self, sparsify_cols: bool, max_cols: int): for r, hide in enumerate(self.hide_columns_): if hide or not clabels: continue - else: - header_row = self._generate_col_header_row( - (r, clabels), max_cols, col_lengths - ) - head.append(header_row) + + header_row = self._generate_col_header_row( + (r, clabels), max_cols, col_lengths + ) + head.append(header_row) # 2) index names if ( @@ -904,7 +904,7 @@ def concatenated_visible_rows(obj, n, row_indices): f"`clines` value of {clines} is invalid. Should either be None or one " f"of 'all;data', 'all;index', 'skip-last;data', 'skip-last;index'." ) - elif clines is not None: + if clines is not None: data_len = len(row_body_cells) if "data" in clines and d["body"] else 0 d["clines"] = defaultdict(list) @@ -2284,7 +2284,7 @@ def color(value, user_arg, command, comm_arg): if isinstance(value, str) and "--latex" in value: # return the style without conversion but drop '--latex' latex_styles.append((attribute, value.replace("--latex", ""))) - if attribute in CONVERTED_ATTRIBUTES.keys(): + if attribute in CONVERTED_ATTRIBUTES: arg = "" for x in ["--wrap", "--nowrap", "--lwrap", "--dwrap", "--rwrap"]: if x in str(value): diff --git a/pandas/io/html.py b/pandas/io/html.py index a08b73d94250b..eceff2d2ec1f3 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -27,14 +27,13 @@ AbstractMethodError, EmptyDataError, ) -from pandas.util._decorators import deprecate_nonkeyword_arguments from pandas.core.dtypes.common import is_list_like from pandas import isna -from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.indexes.base import Index from pandas.core.indexes.multi import MultiIndex +from pandas.core.series import Series from pandas.io.common import ( file_exists, @@ -858,7 +857,7 @@ def _parse_tfoot_tr(self, table): def _expand_elements(body) -> None: data = [len(elem) for elem in body] - lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) + lens = Series(data) lens_max = lens.max() not_max = lens[lens != lens_max] @@ -1026,9 +1025,9 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, extract_links, ** return ret -@deprecate_nonkeyword_arguments(version="2.0") def read_html( io: FilePath | ReadBuffer[str], + *, match: str | Pattern = ".+", flavor: str | None = None, header: int | Sequence[int] | None = None, diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 4bf883a7214bf..d6c154962ad83 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -21,7 +21,7 @@ import numpy as np -import pandas._libs.json as json +from pandas._libs import json from pandas._libs.tslibs import iNaT from pandas._typing import ( CompressionOptions, @@ -34,10 +34,7 @@ WriteBuffer, ) from pandas.errors import AbstractMethodError -from pandas.util._decorators import ( - deprecate_nonkeyword_arguments, - doc, -) +from pandas.util._decorators import doc from pandas.core.dtypes.common import ( ensure_str, @@ -52,7 +49,6 @@ notna, to_datetime, ) -from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.reshape.concat import concat from pandas.core.shared_docs import _shared_docs @@ -252,7 +248,6 @@ def write(self) -> str: @abstractmethod def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: """Object to write in JSON format.""" - pass class SeriesWriter(Writer): @@ -452,6 +447,7 @@ def read_json( @overload def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, orient: str | None = ..., typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., @@ -475,9 +471,9 @@ def read_json( storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "path_or_buf", ) -@deprecate_nonkeyword_arguments(version="2.0", allowed_args=["path_or_buf"]) def read_json( path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, orient: str | None = None, typ: Literal["frame", "series"] = "frame", dtype: DtypeArg | None = None, @@ -1223,9 +1219,9 @@ def _parse(self) -> None: if self.orient == "split": decoded = {str(k): v for k, v in data.items()} self.check_keys_split(decoded) - self.obj = create_series_with_explicit_dtype(**decoded) + self.obj = Series(**decoded) else: - self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + self.obj = Series(data) def _try_convert_types(self) -> None: if self.obj is None: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 03afdcda35021..3791dba6e36e3 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -112,9 +112,9 @@ def nested_to_record( v = new_d.pop(k) new_d[newkey] = v continue - else: - v = new_d.pop(k) - new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) + + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) new_ds.append(new_d) if singleton: @@ -403,7 +403,7 @@ def _pull_field( f"Key {e} not found. If specifying a record_path, all elements of " f"data should have the path." ) from e - elif errors == "ignore": + if errors == "ignore": return np.nan else: raise KeyError( diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 0d6cab20f2a59..e1b8388788143 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -12,7 +12,7 @@ ) import warnings -import pandas._libs.json as json +from pandas._libs import json from pandas._typing import ( DtypeObj, JSONSerializable, diff --git a/pandas/io/meson.build b/pandas/io/meson.build index 5e7e68dacec49..04d7585ebc274 100644 --- a/pandas/io/meson.build +++ b/pandas/io/meson.build @@ -15,7 +15,6 @@ top_level_py_list = [ 'api.py', 'clipboards.py', 'common.py', - 'date_converters.py', 'feather_format.py', 'gbq.py', 'html.py', diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index df02a6fbca295..1c14722227124 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -305,7 +305,7 @@ def write( "Cannot use both partition_on and " "partition_cols. Use partition_cols for partitioning data" ) - elif "partition_on" in kwargs: + if "partition_on" in kwargs: partition_cols = kwargs.pop("partition_on") if partition_cols is not None: diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 49b71efbfb6ec..68158a30f7fdf 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -1,16 +1,17 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from pandas._typing import ReadBuffer from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.inference import is_integer -from pandas.io.parsers.base_parser import ParserBase +from pandas import ( + DataFrame, + arrays, + get_option, +) -if TYPE_CHECKING: - from pandas import DataFrame +from pandas.io.parsers.base_parser import ParserBase class ArrowParserWrapper(ParserBase): @@ -77,7 +78,7 @@ def _get_pyarrow_options(self) -> None: else self.kwds["skiprows"], } - def _finalize_output(self, frame: DataFrame) -> DataFrame: + def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame: """ Processes data read in based on kwargs. @@ -95,9 +96,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame: multi_index_named = True if self.header is None: if self.names is None: - if self.prefix is not None: - self.names = [f"{self.prefix}{i}" for i in range(num_cols)] - elif self.header is None: + if self.header is None: self.names = range(num_cols) if len(self.names) != num_cols: # usecols is passed through to pyarrow, we only handle index col here @@ -150,6 +149,16 @@ def read(self) -> DataFrame: parse_options=pyarrow_csv.ParseOptions(**self.parse_options), convert_options=pyarrow_csv.ConvertOptions(**self.convert_options), ) - - frame = table.to_pandas() - return self._finalize_output(frame) + if ( + self.kwds["use_nullable_dtypes"] + and get_option("io.nullable_backend") == "pyarrow" + ): + frame = DataFrame( + { + col_name: arrays.ArrowExtensionArray(pa_col) + for col_name, pa_col in zip(table.column_names, table.itercolumns()) + } + ) + else: + frame = table.to_pandas() + return self._finalize_pandas_output(frame) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 44773f13276c0..dd4e801af5894 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -26,9 +26,11 @@ import numpy as np -import pandas._libs.lib as lib +from pandas._libs import ( + lib, + parsers, +) import pandas._libs.ops as libops -import pandas._libs.parsers as parsers from pandas._libs.parsers import STR_NA_VALUES from pandas._libs.tslibs import parsing from pandas._typing import ( @@ -95,7 +97,6 @@ def __init__(self, kwds) -> None: self.names = kwds.get("names") self.orig_names: list | None = None - self.prefix = kwds.pop("prefix", None) self.index_col = kwds.get("index_col", None) self.unnamed_cols: set = set() @@ -153,11 +154,6 @@ def __init__(self, kwds) -> None: "index_col must only contain row numbers " "when specifying a multi-index header" ) - elif self.header is not None and self.prefix is not None: - # GH 27394 - raise ValueError( - "Argument prefix must be None if argument header is not None" - ) self._name_processed = False @@ -775,7 +771,7 @@ def _infer_types( result = BooleanArray(result, bool_mask) elif result.dtype == np.object_ and use_nullable_dtypes: # read_excel sends array of datetime objects - inferred_type, _ = lib.infer_datetimelike_array(result) + inferred_type = lib.infer_datetimelike_array(result) if inferred_type != "datetime": result = StringDtype().construct_array_type()._from_sequence(values) @@ -1159,7 +1155,6 @@ def converter(*date_cols): "header": "infer", "index_col": None, "names": None, - "prefix": None, "skiprows": None, "skipfooter": 0, "nrows": None, @@ -1183,15 +1178,12 @@ def converter(*date_cols): "chunksize": None, "verbose": False, "encoding": None, - "squeeze": None, "compression": None, "mangle_dupe_cols": True, "infer_datetime_format": False, "skip_blank_lines": True, "encoding_errors": "strict", "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR, - "error_bad_lines": None, - "warn_bad_lines": None, "use_nullable_dtypes": False, } diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 106f4df4370b3..c1f2e6ddb2388 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -11,7 +11,7 @@ import numpy as np -import pandas._libs.parsers as parsers +from pandas._libs import parsers from pandas._typing import ( ArrayLike, DtypeArg, @@ -71,8 +71,6 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: "encoding", "memory_map", "compression", - "error_bad_lines", - "warn_bad_lines", ): kwds.pop(key, None) @@ -102,16 +100,8 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None: # error: Cannot determine type of 'names' if self.names is None: # type: ignore[has-type] - if self.prefix: - # error: Cannot determine type of 'names' - self.names = [ # type: ignore[has-type] - f"{self.prefix}{i}" for i in range(self._reader.table_width) - ] - else: - # error: Cannot determine type of 'names' - self.names = list( # type: ignore[has-type] - range(self._reader.table_width) - ) + # error: Cannot determine type of 'names' + self.names = list(range(self._reader.table_width)) # type: ignore[has-type] # gh-9755 # diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index ddd73375f8871..121c52ba1c323 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -20,11 +20,10 @@ Sequence, cast, ) -import warnings import numpy as np -import pandas._libs.lib as lib +from pandas._libs import lib from pandas._typing import ( ArrayLike, ReadCsvBuffer, @@ -34,7 +33,6 @@ EmptyDataError, ParserError, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_integer from pandas.core.dtypes.inference import is_dict_like @@ -536,10 +534,7 @@ def _infer_columns( num_original_columns = ncols if not names: - if self.prefix: - columns = [[f"{self.prefix}{i}" for i in range(ncols)]] - else: - columns = [list(range(ncols))] + columns = [list(range(ncols))] columns = self._handle_usecols( columns, columns[0], num_original_columns ) @@ -595,11 +590,9 @@ def _handle_usecols( col for col in self.usecols if col >= num_original_columns ] if missing_usecols: - warnings.warn( - "Defining usecols with out of bounds indices is deprecated " - "and will raise a ParserError in a future version.", - FutureWarning, - stacklevel=find_stack_level(), + raise ParserError( + "Defining usecols without of bounds indices is not allowed. " + f"{missing_usecols} are out of bounds.", ) col_indices = self.usecols @@ -763,7 +756,7 @@ def _alert_malformed(self, msg: str, row_num: int) -> None: """ if self.on_bad_lines == self.BadLineHandleMethod.ERROR: raise ParserError(msg) - elif self.on_bad_lines == self.BadLineHandleMethod.WARN: + if self.on_bad_lines == self.BadLineHandleMethod.WARN: base = f"Skipping line {row_num}: " sys.stderr.write(base + msg + "\n") diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index abd1182214f5f..700a2b6ba964c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1,5 +1,7 @@ """ Module contains tools for processing files into DataFrames or other objects + +GH#48849 provides a convenient way of deprecating keyword arguments """ from __future__ import annotations @@ -22,7 +24,9 @@ import numpy as np -import pandas._libs.lib as lib +from pandas._config import get_option + +from pandas._libs import lib from pandas._libs.parsers import STR_NA_VALUES from pandas._typing import ( CompressionOptions, @@ -40,10 +44,8 @@ from pandas.util._decorators import ( Appender, deprecate_kwarg, - deprecate_nonkeyword_arguments, ) from pandas.util._exceptions import find_stack_level -from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( is_file_like, @@ -150,17 +152,6 @@ example of a valid callable argument would be ``lambda x: x.upper() in ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster parsing time and lower memory usage. -squeeze : bool, default False - If the parsed data only contains one column then return a Series. - - .. deprecated:: 1.4.0 - Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze - the data. -prefix : str, optional - Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... - - .. deprecated:: 1.4.0 - Use a list comprehension on the DataFrame's columns after calling ``read_csv``. mangle_dupe_cols : bool, default True Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there @@ -362,22 +353,6 @@ `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to override values, a ParserWarning will be issued. See csv.Dialect documentation for more details. -error_bad_lines : bool, optional, default ``None`` - Lines with too many fields (e.g. a csv line with too many commas) will by - default cause an exception to be raised, and no DataFrame will be returned. - If False, then these "bad lines" will be dropped from the DataFrame that is - returned. - - .. deprecated:: 1.3.0 - The ``on_bad_lines`` parameter should be used instead to specify behavior upon - encountering a bad line instead. -warn_bad_lines : bool, optional, default ``None`` - If error_bad_lines is False, and warn_bad_lines is True, a warning for each - "bad line" will be output. - - .. deprecated:: 1.3.0 - The ``on_bad_lines`` parameter should be used instead to specify behavior upon - encountering a bad line instead. on_bad_lines : {{'error', 'warn', 'skip'}} or callable, default 'error' Specifies what to do upon encountering a bad line (a line with too many fields). Allowed values are : @@ -473,8 +448,6 @@ "thousands", "memory_map", "dialect", - "warn_bad_lines", - "error_bad_lines", "on_bad_lines", "delim_whitespace", "quoting", @@ -495,18 +468,6 @@ class _DeprecationConfig(NamedTuple): msg: str | None -_deprecated_defaults: dict[str, _DeprecationConfig] = { - "error_bad_lines": _DeprecationConfig(None, "Use on_bad_lines in the future."), - "warn_bad_lines": _DeprecationConfig(None, "Use on_bad_lines in the future."), - "squeeze": _DeprecationConfig( - None, 'Append .squeeze("columns") to the call to squeeze.' - ), - "prefix": _DeprecationConfig( - None, "Use a list comprehension on the column names in the future." - ), -} - - @overload def validate_integer(name, val: None, min_val: int = ...) -> None: ... @@ -601,6 +562,14 @@ def _read( raise ValueError( "The 'chunksize' option is not supported with the 'pyarrow' engine" ) + elif ( + kwds.get("use_nullable_dtypes", False) + and get_option("io.nullable_backend") == "pyarrow" + ): + raise NotImplementedError( + f"use_nullable_dtypes=True and engine={kwds['engine']} with " + "io.nullable_backend set to 'pyarrow' is not implemented." + ) else: chunksize = validate_integer("chunksize", chunksize, 1) @@ -630,8 +599,6 @@ def read_csv( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -667,8 +634,6 @@ def read_csv( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -691,8 +656,6 @@ def read_csv( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -728,8 +691,6 @@ def read_csv( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -752,8 +713,6 @@ def read_csv( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -789,8 +748,6 @@ def read_csv( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -813,8 +770,6 @@ def read_csv( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -850,8 +805,6 @@ def read_csv( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -864,7 +817,6 @@ def read_csv( @deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) -@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) @Appender( _doc_read_csv_and_table.format( func_name="read_csv", @@ -877,6 +829,7 @@ def read_csv( ) def read_csv( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, sep: str | None | lib.NoDefault = lib.no_default, delimiter: str | None | lib.NoDefault = None, # Column and Index Locations and Names @@ -884,8 +837,6 @@ def read_csv( names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, usecols=None, - squeeze: bool | None = None, - prefix: str | lib.NoDefault = lib.no_default, mangle_dupe_cols: bool = True, # General Parsing Configuration dtype: DtypeArg | None = None, @@ -927,11 +878,7 @@ def read_csv( encoding_errors: str | None = "strict", dialect: str | csv.Dialect | None = None, # Error Handling - error_bad_lines: bool | None = None, - warn_bad_lines: bool | None = None, - # TODO(2.0): set on_bad_lines to "error". - # See _refine_defaults_read comment for why we do this. - on_bad_lines=None, + on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, low_memory=_c_parser_defaults["low_memory"], @@ -951,11 +898,8 @@ def read_csv( delim_whitespace, engine, sep, - error_bad_lines, - warn_bad_lines, on_bad_lines, names, - prefix, defaults={"delimiter": ","}, ) kwds.update(kwds_defaults) @@ -974,8 +918,6 @@ def read_table( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -1011,8 +953,6 @@ def read_table( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -1035,8 +975,6 @@ def read_table( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -1072,8 +1010,6 @@ def read_table( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -1096,8 +1032,6 @@ def read_table( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -1133,8 +1067,6 @@ def read_table( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -1157,8 +1089,6 @@ def read_table( names: Sequence[Hashable] | None | lib.NoDefault = ..., index_col: IndexLabel | Literal[False] | None = ..., usecols=..., - squeeze: bool | None = ..., - prefix: str | lib.NoDefault = ..., mangle_dupe_cols: bool = ..., dtype: DtypeArg | None = ..., engine: CSVEngine | None = ..., @@ -1194,8 +1124,6 @@ def read_table( encoding: str | None = ..., encoding_errors: str | None = ..., dialect: str | csv.Dialect | None = ..., - error_bad_lines: bool | None = ..., - warn_bad_lines: bool | None = ..., on_bad_lines=..., delim_whitespace: bool = ..., low_memory=..., @@ -1208,7 +1136,6 @@ def read_table( @deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None) -@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) @Appender( _doc_read_csv_and_table.format( func_name="read_table", @@ -1221,6 +1148,7 @@ def read_table( ) def read_table( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, sep: str | None | lib.NoDefault = lib.no_default, delimiter: str | None | lib.NoDefault = None, # Column and Index Locations and Names @@ -1228,8 +1156,6 @@ def read_table( names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default, index_col: IndexLabel | Literal[False] | None = None, usecols=None, - squeeze: bool | None = None, - prefix: str | lib.NoDefault = lib.no_default, mangle_dupe_cols: bool = True, # General Parsing Configuration dtype: DtypeArg | None = None, @@ -1271,11 +1197,7 @@ def read_table( encoding_errors: str | None = "strict", dialect: str | csv.Dialect | None = None, # Error Handling - error_bad_lines: bool | None = None, - warn_bad_lines: bool | None = None, - # TODO(2.0): set on_bad_lines to "error". - # See _refine_defaults_read comment for why we do this. - on_bad_lines=None, + on_bad_lines: str = "error", # Internal delim_whitespace: bool = False, low_memory=_c_parser_defaults["low_memory"], @@ -1295,11 +1217,8 @@ def read_table( delim_whitespace, engine, sep, - error_bad_lines, - warn_bad_lines, on_bad_lines, names, - prefix, defaults={"delimiter": "\t"}, ) kwds.update(kwds_defaults) @@ -1307,9 +1226,9 @@ def read_table( return _read(filepath_or_buffer, kwds) -@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) def read_fwf( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, colspecs: Sequence[tuple[int, int]] | str | None = "infer", widths: Sequence[int] | None = None, infer_nrows: int = 100, @@ -1365,7 +1284,7 @@ def read_fwf( # Check input arguments. if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") - elif colspecs not in (None, "infer") and widths is not None: + if colspecs not in (None, "infer") and widths is not None: raise ValueError("You must specify only one of 'widths' and 'colspecs'") # Compute 'colspecs' from 'widths', if specified. @@ -1451,8 +1370,6 @@ def __init__( self._check_file_or_buffer(f, engine) self.options, self.engine = self._clean_options(options, engine) - self.squeeze = self.options.pop("squeeze", False) - if "has_index_names" in kwds: self.options["has_index_names"] = kwds["has_index_names"] @@ -1480,25 +1397,14 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: and value != default and value != getattr(value, "value", default) ): - if ( - argname == "on_bad_lines" - and kwds.get("error_bad_lines") is not None - ): - argname = "error_bad_lines" - elif ( - argname == "on_bad_lines" and kwds.get("warn_bad_lines") is not None - ): - argname = "warn_bad_lines" - raise ValueError( f"The {repr(argname)} option is not supported with the " f"'pyarrow' engine" ) - elif argname == "mangle_dupe_cols" and value is False: + if argname == "mangle_dupe_cols" and value is False: # GH12935 raise ValueError("Setting mangle_dupe_cols=False is not supported yet") - else: - options[argname] = value + options[argname] = value for argname, default in _c_parser_defaults.items(): if argname in kwds: @@ -1507,22 +1413,13 @@ def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]: if engine != "c" and value != default: if "python" in engine and argname not in _python_unsupported: pass - elif ( - value - == _deprecated_defaults.get( - argname, _DeprecationConfig(default, None) - ).default_value - ): - pass else: raise ValueError( f"The {repr(argname)} option is not supported with the " f"{repr(engine)} engine" ) else: - value = _deprecated_defaults.get( - argname, _DeprecationConfig(default, None) - ).default_value + value = default options[argname] = value if engine == "python-fwf": @@ -1646,17 +1543,6 @@ def _clean_options( validate_header_arg(options["header"]) - for arg, depr_default in _deprecated_defaults.items(): - parser_default = _c_parser_defaults.get(arg, parser_defaults[arg]) - if result.get(arg, depr_default) != depr_default.default_value: - msg = ( - f"The {arg} argument has been deprecated and will be " - f"removed in a future version. {depr_default.msg}\n\n" - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - else: - result[arg] = parser_default - if index_col is True: raise ValueError("The value of index_col couldn't be 'True'") if is_index_col(index_col): @@ -1703,10 +1589,6 @@ def _clean_options( result["na_values"] = na_values result["na_fvalues"] = na_fvalues result["skiprows"] = skiprows - # Default for squeeze is none since we need to check - # if user sets it. We then set to False to preserve - # previous behavior. - result["squeeze"] = False if options["squeeze"] is None else options["squeeze"] return result, engine @@ -1812,9 +1694,6 @@ def read(self, nrows: int | None = None) -> DataFrame: df = DataFrame(col_dict, columns=columns, index=index) self._currow += new_rows - - if self.squeeze and len(df.columns) == 1: - return df.squeeze("columns").copy() return df def get_chunk(self, size: int | None = None) -> DataFrame: @@ -1880,8 +1759,6 @@ def TextParser(*args, **kwds) -> TextFileReader: transformed content. encoding : str, optional Encoding to use for UTF when reading/writing (ex. 'utf-8') - squeeze : bool, default False - returns Series if only one column. infer_datetime_format: bool, default False If True and `parse_dates` is True for a column, try to infer the datetime format based on the first datetime string. If the format @@ -1979,11 +1856,8 @@ def _refine_defaults_read( delim_whitespace: bool, engine: CSVEngine | None, sep: str | None | lib.NoDefault, - error_bad_lines: bool | None, - warn_bad_lines: bool | None, - on_bad_lines: str | Callable | None, + on_bad_lines: str | Callable, names: Sequence[Hashable] | None | lib.NoDefault, - prefix: str | None | lib.NoDefault, defaults: dict[str, Any], ): """Validate/refine default values of input parameters of read_csv, read_table. @@ -2009,18 +1883,12 @@ def _refine_defaults_read( sep : str or object A delimiter provided by the user (str) or a sentinel value, i.e. pandas._libs.lib.no_default. - error_bad_lines : str or None - Whether to error on a bad line or not. - warn_bad_lines : str or None - Whether to warn on a bad line or not. - on_bad_lines : str, callable or None + on_bad_lines : str, callable An option for handling bad lines or a sentinel value(None). names : array-like, optional List of column names to use. If the file contains a header row, then you should explicitly pass ``header=0`` to override the column names. Duplicates in this list are not allowed. - prefix : str, optional - Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... defaults: dict Default values of input parameters. @@ -2034,8 +1902,6 @@ def _refine_defaults_read( ValueError : If a delimiter was specified with ``sep`` (or ``delimiter``) and ``delim_whitespace=True``. - If on_bad_lines is specified(not ``None``) and ``error_bad_lines``/ - ``warn_bad_lines`` is True. """ # fix types for sep, delimiter to Union(str, Any) delim_default = defaults["delimiter"] @@ -2060,16 +1926,7 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - if ( - names is not None - and names is not lib.no_default - and prefix is not None - and prefix is not lib.no_default - ): - raise ValueError("Specified named and prefix; you can only specify one.") - kwds["names"] = None if names is lib.no_default else names - kwds["prefix"] = None if prefix is lib.no_default else prefix # Alias sep -> delimiter. if delimiter is None: @@ -2100,53 +1957,20 @@ def _refine_defaults_read( kwds["engine"] = "c" kwds["engine_specified"] = False - # Ensure that on_bad_lines and error_bad_lines/warn_bad_lines - # aren't specified at the same time. If so, raise. Otherwise, - # alias on_bad_lines to "error" if error/warn_bad_lines not set - # and on_bad_lines is not set. on_bad_lines is defaulted to None - # so we can tell if it is set (this is why this hack exists). - if on_bad_lines is not None: - if error_bad_lines is not None or warn_bad_lines is not None: + if on_bad_lines == "error": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + elif on_bad_lines == "warn": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN + elif on_bad_lines == "skip": + kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP + elif callable(on_bad_lines): + if engine != "python": raise ValueError( - "Both on_bad_lines and error_bad_lines/warn_bad_lines are set. " - "Please only set on_bad_lines." + "on_bad_line can only be a callable function if engine='python'" ) - if on_bad_lines == "error": - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR - elif on_bad_lines == "warn": - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN - elif on_bad_lines == "skip": - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP - elif callable(on_bad_lines): - if engine != "python": - raise ValueError( - "on_bad_line can only be a callable function if engine='python'" - ) - kwds["on_bad_lines"] = on_bad_lines - else: - raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines") + kwds["on_bad_lines"] = on_bad_lines else: - if error_bad_lines is not None: - # Must check is_bool, because other stuff(e.g. non-empty lists) eval to true - validate_bool_kwarg(error_bad_lines, "error_bad_lines") - if error_bad_lines: - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR - else: - if warn_bad_lines is not None: - # This is the case where error_bad_lines is False - # We can only warn/skip if error_bad_lines is False - # None doesn't work because backwards-compatibility reasons - validate_bool_kwarg(warn_bad_lines, "warn_bad_lines") - if warn_bad_lines: - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN - else: - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP - else: - # Backwards compat, when only error_bad_lines = false, we warn - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN - else: - # Everything None -> Error - kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR + raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines") return kwds diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 483c385fa32fc..5c5e9501c111f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -684,18 +684,6 @@ def items(self) -> Iterator[tuple[str, list]]: for g in self.groups(): yield g._v_pathname, g - def iteritems(self): - """ - iterate on key->group - """ - warnings.warn( - "iteritems is deprecated and will be removed in a future version. " - "Use .items instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - yield from self.items() - def open(self, mode: str = "a", **kwargs) -> None: """ Open the file in the specified mode @@ -2593,8 +2581,6 @@ def get_atom_timedelta64(cls, shape): class GenericDataIndexableCol(DataIndexableCol): """represent a generic pytables data column""" - pass - class Fixed: """ @@ -2701,11 +2687,9 @@ def attrs(self): def set_attrs(self) -> None: """set our object attributes""" - pass def get_attrs(self) -> None: """get our object attributes""" - pass @property def storable(self): @@ -2728,7 +2712,6 @@ def validate(self, other) -> Literal[True] | None: def validate_version(self, where=None) -> None: """are we trying to operate on an old version?""" - pass def infer_axes(self) -> bool: """ @@ -4111,44 +4094,44 @@ def process_axes(self, obj, selection: Selection, columns=None) -> DataFrame: for axis, labels in self.non_index_axes: obj = _reindex_axis(obj, axis, labels, columns) - # apply the selection filters (but keep in the same order) - if selection.filter is not None: - for field, op, filt in selection.filter.format(): + def process_filter(field, filt, op): - def process_filter(field, filt): + for axis_name in obj._AXIS_ORDERS: + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None - for axis_name in obj._AXIS_ORDERS: - axis_number = obj._get_axis_number(axis_name) - axis_values = obj._get_axis(axis_name) - assert axis_number is not None + # see if the field is the name of an axis + if field == axis_name: - # see if the field is the name of an axis - if field == axis_name: + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) - # if we have a multi-index, then need to include - # the levels - if self.is_multi_index: - filt = filt.union(Index(self.levels)) + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] - takers = op(axis_values, filt) - return obj.loc(axis=axis_number)[takers] + # this might be the name of a file IN an axis + elif field in axis_values: - # this might be the name of a file IN an axis - elif field in axis_values: + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) - # we need to filter on this dimension - values = ensure_index(getattr(obj, field).values) - filt = ensure_index(filt) + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number - # hack until we support reversed dim flags - if isinstance(obj, DataFrame): - axis_number = 1 - axis_number - takers = op(values, filt) - return obj.loc(axis=axis_number)[takers] + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] - raise ValueError(f"cannot find the field [{field}] for filtering!") + raise ValueError(f"cannot find the field [{field}] for filtering!") - obj = process_filter(field, filt) + # apply the selection filters (but keep in the same order) + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + obj = process_filter(field, filt, op) return obj @@ -4981,14 +4964,14 @@ def _maybe_convert_for_string_atom( if inferred_type == "date": raise TypeError("[date] is not implemented as a table column") - elif inferred_type == "datetime": + if inferred_type == "datetime": # after GH#8260 # this only would be hit for a multi-timezone dtype which is an error raise TypeError( "too many timezones in this block, create separate data columns" ) - elif not (inferred_type == "string" or dtype_name == "object"): + if not (inferred_type == "string" or dtype_name == "object"): return bvalues mask = isna(bvalues) diff --git a/pandas/io/sas/byteswap.pyx b/pandas/io/sas/byteswap.pyx index 4620403910274..2a4d3f66a5d7d 100644 --- a/pandas/io/sas/byteswap.pyx +++ b/pandas/io/sas/byteswap.pyx @@ -1,5 +1,6 @@ """ -The following are faster versions of struct.unpack that avoid the overhead of Python function calls. +The following are faster versions of struct.unpack that avoid the overhead of Python +function calls. In the SAS7BDAT parser, they may be called up to (n_rows * n_cols) times. """ diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 9406900b69998..8c13566c656b7 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -69,6 +69,7 @@ cdef int rle_decompress(Buffer inbuff, Buffer outbuff) except? 0: int rpos = 0 int i, nbytes, end_of_first_byte size_t ipos = 0 + Py_ssize_t _ while ipos < inbuff.length: control_byte = buf_get(inbuff, ipos) & 0xF0 @@ -253,8 +254,16 @@ cdef: def _init_subheader_signatures(): - subheaders_32bit = [(sig, idx) for sig, idx in const.subheader_signature_to_index.items() if len(sig) == 4] - subheaders_64bit = [(sig, idx) for sig, idx in const.subheader_signature_to_index.items() if len(sig) == 8] + subheaders_32bit = [ + (sig, idx) + for sig, idx in const.subheader_signature_to_index.items() + if len(sig) == 4 + ] + subheaders_64bit = [ + (sig, idx) + for sig, idx in const.subheader_signature_to_index.items() + if len(sig) == 8 + ] assert len(subheaders_32bit) == 13 assert len(subheaders_64bit) == 17 assert len(const.subheader_signature_to_index) == 13 + 17 @@ -366,9 +375,9 @@ cdef class Parser: def read(self, int nrows): cdef: bint done - int i + Py_ssize_t i - for _ in range(nrows): + for i in range(nrows): done = self.readline() if done: break @@ -490,7 +499,8 @@ cdef class Parser: rpos = self.decompress(source, decompressed_source) if rpos != self.row_length: raise ValueError( - f"Expected decompressed line of length {self.row_length} bytes but decompressed {rpos} bytes" + f"Expected decompressed line of length {self.row_length} bytes " + f"but decompressed {rpos} bytes" ) source = decompressed_source diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 2b6472913a4d3..bcd342ddb9023 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -19,10 +19,7 @@ FilePath, ReadBuffer, ) -from pandas.util._decorators import ( - deprecate_nonkeyword_arguments, - doc, -) +from pandas.util._decorators import doc from pandas.core.shared_docs import _shared_docs @@ -61,6 +58,7 @@ def __exit__( @overload def read_sas( filepath_or_buffer: FilePath | ReadBuffer[bytes], + *, format: str | None = ..., index: Hashable | None = ..., encoding: str | None = ..., @@ -74,6 +72,7 @@ def read_sas( @overload def read_sas( filepath_or_buffer: FilePath | ReadBuffer[bytes], + *, format: str | None = ..., index: Hashable | None = ..., encoding: str | None = ..., @@ -84,10 +83,10 @@ def read_sas( ... -@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) @doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") def read_sas( filepath_or_buffer: FilePath | ReadBuffer[bytes], + *, format: str | None = None, index: Hashable | None = None, encoding: str | None = None, diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 1b83d339a2990..32efd6ca1180c 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -44,8 +44,7 @@ def read_spss( if usecols is not None: if not is_list_like(usecols): raise TypeError("usecols must be list-like.") - else: - usecols = list(usecols) # pyreadstat requires a list + usecols = list(usecols) # pyreadstat requires a list df, _ = pyreadstat.read_sav( stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0f24e3f31cc4b..591fa25bd36d1 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -25,7 +25,7 @@ import numpy as np -import pandas._libs.lib as lib +from pandas._libs import lib from pandas._typing import ( DateTimeErrorChoices, DtypeArg, @@ -751,8 +751,7 @@ def pandasSQL_builder(con, schema: str | None = None) -> SQLDatabase | SQLiteDat if isinstance(con, str): if sqlalchemy is None: raise ImportError("Using URI string without sqlalchemy installed.") - else: - con = sqlalchemy.create_engine(con) + con = sqlalchemy.create_engine(con) if sqlalchemy is not None and isinstance(con, sqlalchemy.engine.Connectable): return SQLDatabase(con, schema=schema) @@ -828,7 +827,7 @@ def create(self) -> None: if self.exists(): if self.if_exists == "fail": raise ValueError(f"Table '{self.name}' already exists.") - elif self.if_exists == "replace": + if self.if_exists == "replace": self.pd_sql.drop_table(self.name, self.schema) self._execute_create() elif self.if_exists == "append": @@ -1038,8 +1037,7 @@ def _index_name(self, index, index_label): "Length of 'index_label' should match number of " f"levels, which is {nlevels}" ) - else: - return index_label + return index_label # return the used column labels for the index columns if ( nlevels == 1 @@ -1332,8 +1330,7 @@ def insert_records( err_text = str(err.orig) if re.search(msg, err_text): raise ValueError("inf cannot be used with MySQL") from err - else: - raise err + raise err def get_engine(engine: str) -> BaseEngine: @@ -1362,7 +1359,7 @@ def get_engine(engine: str) -> BaseEngine: f"{error_msgs}" ) - elif engine == "sqlalchemy": + if engine == "sqlalchemy": return SQLAlchemyEngine() raise ValueError("engine must be one of 'auto', 'sqlalchemy'") diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 5e9b70aeb2a82..5860aa4ae7c3e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -50,7 +50,6 @@ ) from pandas.util._decorators import ( Appender, - deprecate_nonkeyword_arguments, doc, ) from pandas.util._exceptions import find_stack_level @@ -2009,9 +2008,9 @@ def value_labels(self) -> dict[str, dict[float, str]]: @Appender(_read_stata_doc) -@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) def read_stata( filepath_or_buffer: FilePath | ReadBuffer[bytes], + *, convert_dates: bool = True, convert_categoricals: bool = True, index_col: str | None = None, @@ -2418,7 +2417,6 @@ def _replace_nans(self, data: DataFrame) -> DataFrame: def _update_strl_names(self) -> None: """No-op, forward compatibility""" - pass def _validate_variable_name(self, name: str) -> str: """ @@ -2702,19 +2700,15 @@ def _close(self) -> None: def _write_map(self) -> None: """No-op, future compatibility""" - pass def _write_file_close_tag(self) -> None: """No-op, future compatibility""" - pass def _write_characteristics(self) -> None: """No-op, future compatibility""" - pass def _write_strls(self) -> None: """No-op, future compatibility""" - pass def _write_expansion_fields(self) -> None: """Write 5 zeros for expansion fields""" @@ -3439,7 +3433,6 @@ def _write_strls(self) -> None: def _write_expansion_fields(self) -> None: """No-op in dta 117+""" - pass def _write_value_labels(self) -> None: self._update_map("value_labels") diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 71d19b7861fc2..c44972e9b1a4a 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -27,10 +27,7 @@ AbstractMethodError, ParserError, ) -from pandas.util._decorators import ( - deprecate_nonkeyword_arguments, - doc, -) +from pandas.util._decorators import doc from pandas.core.dtypes.common import is_list_like @@ -212,7 +209,7 @@ def _parse_nodes(self, elems: list[Any]) -> list[dict[str, str | None]]: if self.elems_only and self.attrs_only: raise ValueError("Either element or attributes can be parsed not both.") - elif self.elems_only: + if self.elems_only: if self.names: dicts = [ { @@ -485,9 +482,9 @@ def _validate_path(self) -> list[Any]: if elems is not None: if self.elems_only and children == []: raise ValueError(msg) - elif self.attrs_only and attrs == {}: + if self.attrs_only and attrs == {}: raise ValueError(msg) - elif children == [] and attrs == {}: + if children == [] and attrs == {}: raise ValueError(msg) except (KeyError, SyntaxError): @@ -598,9 +595,9 @@ def _validate_path(self) -> list[Any]: if elems != []: if self.elems_only and children == []: raise ValueError(msg) - elif self.attrs_only and attrs == {}: + if self.attrs_only and attrs == {}: raise ValueError(msg) - elif children == [] and attrs == {}: + if children == [] and attrs == {}: raise ValueError(msg) return elems @@ -712,10 +709,7 @@ def get_data_from_filepath( storage_options=storage_options, ) as handle_obj: filepath_or_buffer = ( - # error: Incompatible types in assignment (expression has type - # "Union[str, IO[str]]", variable has type "Union[Union[str, - # PathLike[str]], bytes, ReadBuffer[bytes], ReadBuffer[str]]") - handle_obj.handle.read() # type: ignore[assignment] + handle_obj.handle.read() if hasattr(handle_obj.handle, "read") else handle_obj.handle ) @@ -853,13 +847,13 @@ def _parse( ) -@deprecate_nonkeyword_arguments(version=None, allowed_args=["path_or_buffer"]) @doc( storage_options=_shared_docs["storage_options"], decompression_options=_shared_docs["decompression_options"] % "path_or_buffer", ) def read_xml( path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], + *, xpath: str = "./*", namespaces: dict[str, str] | None = None, elems_only: bool = False, diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e340ea31deef4..84947c4cfa4fc 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,13 +1,11 @@ from __future__ import annotations import importlib -import itertools import types from typing import ( TYPE_CHECKING, Sequence, ) -import warnings from pandas._config import get_option @@ -16,7 +14,6 @@ Appender, Substitution, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_integer, @@ -760,13 +757,6 @@ class PlotAccessor(PandasObject): Equivalent to yerr. stacked : bool, default False in line and bar plots, and True in area plot If True, create stacked plot. - sort_columns : bool, default False - Sort column names to determine plot ordering. - - .. deprecated:: 1.5.0 - The `sort_columns` arguments is deprecated and will be removed in a - future version. - secondary_y : bool or sequence, default False Whether to plot on the secondary y-axis if a list/tuple, which columns to plot on secondary y-axis. @@ -877,7 +867,6 @@ def _get_call_args(backend_name, data, args, kwargs): ("yerr", None), ("xerr", None), ("secondary_y", False), - ("sort_columns", False), ("xlabel", None), ("ylabel", None), ] @@ -887,14 +876,6 @@ def _get_call_args(backend_name, data, args, kwargs): "expected Series or DataFrame" ) - if "sort_columns" in itertools.chain(args, kwargs.keys()): - warnings.warn( - "`sort_columns` is deprecated and will be removed in a future " - "version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if args and isinstance(data, ABCSeries): positional_args = str(args)[1:-1] keyword_args = ", ".join( @@ -955,7 +936,7 @@ def __call__(self, *args, **kwargs): raise ValueError( f"{kind} requires either y column or 'subplots=True'" ) - elif y is not None: + if y is not None: if is_integer(y) and not data.columns.holds_integer(): y = data.columns[y] # converted to series actually. copy to not modify @@ -1150,7 +1131,9 @@ def line(self, x=None, y=None, **kwargs) -> PlotAccessor: ) @Substitution(kind="bar") @Appender(_bar_or_line_doc) - def bar(self, x=None, y=None, **kwargs) -> PlotAccessor: + def bar( # pylint: disable=disallowed-name + self, x=None, y=None, **kwargs + ) -> PlotAccessor: """ Vertical bar plot. @@ -1710,13 +1693,13 @@ def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: size = kwargs.pop("size", None) if s is not None and size is not None: raise TypeError("Specify exactly one of `s` and `size`") - elif s is not None or size is not None: + if s is not None or size is not None: kwargs["s"] = s if s is not None else size color = kwargs.pop("color", None) if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") - elif c is not None or color is not None: + if c is not None or color is not None: kwargs["c"] = c if c is not None else color return self(kind="scatter", x=x, y=y, **kwargs) diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py index 86b218db4ebe6..7314f05e9f19c 100644 --- a/pandas/plotting/_matplotlib/compat.py +++ b/pandas/plotting/_matplotlib/compat.py @@ -1,8 +1,6 @@ # being a bit too dynamic from __future__ import annotations -import operator - from pandas.util.version import Version @@ -15,8 +13,3 @@ def inner(): return op(Version(mpl.__version__), Version(version)) return inner - - -mpl_ge_3_4_0 = _mpl_version("3.4.0", operator.ge) -mpl_ge_3_5_0 = _mpl_version("3.5.0", operator.ge) -mpl_ge_3_6_0 = _mpl_version("3.6.0", operator.ge) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 4d5feafb5ebd2..28279ff4ee710 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -17,14 +17,16 @@ ) from dateutil.relativedelta import relativedelta -import matplotlib.dates as dates +from matplotlib import ( + dates, + units, +) from matplotlib.ticker import ( AutoLocator, Formatter, Locator, ) from matplotlib.transforms import nonsingular -import matplotlib.units as units import numpy as np from pandas._libs import lib diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 5c3a79927ec2f..64b5bafa97849 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -55,7 +55,6 @@ from pandas.core.frame import DataFrame from pandas.io.formats.printing import pprint_thing -from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by from pandas.plotting._matplotlib.misc import unpack_single_str_list @@ -138,7 +137,6 @@ def __init__( yticks=None, xlabel: Hashable | None = None, ylabel: Hashable | None = None, - sort_columns: bool = False, fontsize=None, secondary_y: bool | tuple | list | np.ndarray = False, colormap=None, @@ -184,7 +182,6 @@ def __init__( self.kind = kind - self.sort_columns = sort_columns self.subplots = self._validate_subplots_kwarg(subplots) if sharex is None: @@ -259,7 +256,7 @@ def __init__( # Probably better to accept either. if "cmap" in kwds and colormap: raise TypeError("Only specify one of `cmap` and `colormap`.") - elif "cmap" in kwds: + if "cmap" in kwds: self.colormap = kwds.pop("cmap") else: self.colormap = colormap @@ -360,16 +357,15 @@ def _validate_subplots_kwarg( raise ValueError( f"Column label(s) {list(bad_labels)} not found in the DataFrame." ) - else: - unique_columns = set(group) - duplicates = seen_columns.intersection(unique_columns) - if duplicates: - raise ValueError( - "Each column should be in only one subplot. " - f"Columns {duplicates} were found in multiple subplots." - ) - seen_columns = seen_columns.union(unique_columns) - out.append(tuple(idx_locs)) + unique_columns = set(group) + duplicates = seen_columns.intersection(unique_columns) + if duplicates: + raise ValueError( + "Each column should be in only one subplot. " + f"Columns {duplicates} were found in multiple subplots." + ) + seen_columns = seen_columns.union(unique_columns) + out.append(tuple(idx_locs)) unseen_columns = columns.difference(seen_columns) for column in unseen_columns: @@ -670,7 +666,6 @@ def _post_plot_logic_common(self, ax, data): def _post_plot_logic(self, ax, data) -> None: """Post process for each axes. Overridden in child classes""" - pass def _adorn_subplots(self): """Common post process unrelated to data""" @@ -1220,7 +1215,7 @@ def _make_plot(self): color = self.kwds.pop("color", None) if c is not None and color is not None: raise TypeError("Specify exactly one of `c` and `color`") - elif c is None and color is None: + if c is None and color is None: c_values = self.plt.rcParams["patch.facecolor"] elif color is not None: c_values = color @@ -1232,19 +1227,13 @@ def _make_plot(self): c_values = c if self.colormap is not None: - if mpl_ge_3_6_0(): - cmap = mpl.colormaps[self.colormap] - else: - cmap = self.plt.cm.get_cmap(self.colormap) + cmap = mpl.colormaps.get_cmap(self.colormap) else: # cmap is only used if c_values are integers, otherwise UserWarning if is_integer_dtype(c_values): # pandas uses colormap, matplotlib uses cmap. cmap = "Greys" - if mpl_ge_3_6_0(): - cmap = mpl.colormaps[cmap] - else: - cmap = self.plt.cm.get_cmap(cmap) + cmap = mpl.colormaps[cmap] else: cmap = None @@ -1312,10 +1301,7 @@ def _make_plot(self) -> None: ax = self.axes[0] # pandas uses colormap, matplotlib uses cmap. cmap = self.colormap or "BuGn" - if mpl_ge_3_6_0(): - cmap = mpl.colormaps[cmap] - else: - cmap = self.plt.cm.get_cmap(cmap) + cmap = mpl.colormaps.get_cmap(cmap) cb = self.kwds.pop("colorbar", True) if C is None: diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 633cb63664823..2f482c7d86571 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -6,8 +6,8 @@ Hashable, ) +from matplotlib import patches import matplotlib.lines as mlines -import matplotlib.patches as patches import numpy as np from pandas.core.dtypes.missing import notna @@ -387,7 +387,7 @@ def parallel_coordinates( elif xticks is not None: if not np.all(np.isreal(xticks)): raise ValueError("xticks specified must be numeric") - elif len(xticks) != ncols: + if len(xticks) != ncols: raise ValueError("Length of xticks must match number of columns") x = xticks else: diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index 2878f4dbf279c..839da35a8ae83 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -10,7 +10,6 @@ import warnings import matplotlib as mpl -import matplotlib.cm as cm import matplotlib.colors import numpy as np @@ -21,8 +20,6 @@ import pandas.core.common as com -from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 - if TYPE_CHECKING: from matplotlib.colors import Colormap @@ -153,10 +150,7 @@ def _get_cmap_instance(colormap: str | Colormap) -> Colormap: """Get instance of matplotlib colormap.""" if isinstance(colormap, str): cmap = colormap - if mpl_ge_3_6_0(): - colormap = mpl.colormaps[colormap] - else: - colormap = cm.get_cmap(colormap) + colormap = mpl.colormaps[colormap] if colormap is None: raise ValueError(f"Colormap {cmap} is not recognized") return colormap diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index d3c16a6d53916..eecfcbc72f489 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -9,8 +9,8 @@ ) import warnings +from matplotlib import ticker import matplotlib.table -import matplotlib.ticker as ticker import numpy as np from pandas.util._exceptions import find_stack_level @@ -22,8 +22,6 @@ ABCSeries, ) -from pandas.plotting._matplotlib import compat - if TYPE_CHECKING: from matplotlib.axes import Axes from matplotlib.axis import Axis @@ -396,10 +394,7 @@ def handle_shared_axes( row_num = lambda x: x.get_subplotspec().rowspan.start col_num = lambda x: x.get_subplotspec().colspan.start - if compat.mpl_ge_3_4_0(): - is_first_col = lambda x: x.get_subplotspec().is_first_col() - else: - is_first_col = lambda x: x.is_first_col() + is_first_col = lambda x: x.get_subplotspec().is_first_col() if nrows > 1: try: @@ -421,10 +416,7 @@ def handle_shared_axes( except IndexError: # if gridspec is used, ax.rowNum and ax.colNum may different # from layout shape. in this case, use last_row logic - if compat.mpl_ge_3_4_0(): - is_last_row = lambda x: x.get_subplotspec().is_last_row() - else: - is_last_row = lambda x: x.is_last_row() + is_last_row = lambda x: x.get_subplotspec().is_last_row() for ax in axarr: if is_last_row(ax): continue diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index a7aefc9b1eaa0..068ce32b5e7aa 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -347,7 +347,7 @@ def test_apply_yield_list(float_frame): def test_apply_reduce_Series(float_frame): - float_frame["A"].iloc[::2] = np.nan + float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) @@ -1141,14 +1141,13 @@ def test_agg_with_name_as_column_name(): tm.assert_series_equal(result, expected) -def test_agg_multiple_mixed_no_warning(): +def test_agg_multiple_mixed(): # GH 20909 mdf = DataFrame( { "A": [1, 2, 3], "B": [1.0, 2.0, 3.0], "C": ["foo", "bar", "baz"], - "D": date_range("20130101", periods=3), } ) expected = DataFrame( @@ -1156,29 +1155,41 @@ def test_agg_multiple_mixed_no_warning(): "A": [1, 6], "B": [1.0, 6.0], "C": ["bar", "foobarbaz"], - "D": [Timestamp("2013-01-01"), pd.NaT], }, index=["min", "sum"], ) # sorted index - with tm.assert_produces_warning( - FutureWarning, match=r"\['D'\] did not aggregate successfully" - ): - result = mdf.agg(["min", "sum"]) - + result = mdf.agg(["min", "sum"]) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning( - FutureWarning, match=r"\['D'\] did not aggregate successfully" - ): - result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) - + result = mdf[["C", "B", "A"]].agg(["sum", "min"]) # GH40420: the result of .agg should have an index that is sorted # according to the arguments provided to agg. - expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) + expected = expected[["C", "B", "A"]].reindex(["sum", "min"]) tm.assert_frame_equal(result, expected) +def test_agg_multiple_mixed_raises(): + # GH 20909 + mdf = DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": date_range("20130101", periods=3), + } + ) + + # sorted index + # TODO: GH#49399 will fix error message + msg = "DataFrame constructor called with" + with pytest.raises(TypeError, match=msg): + mdf.agg(["min", "sum"]) + + with pytest.raises(TypeError, match=msg): + mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) + + def test_agg_reduce(axis, float_frame): other_axis = 1 if axis in {0, "index"} else 0 name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() @@ -1277,14 +1288,10 @@ def test_nuiscance_columns(): expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning( - FutureWarning, match=r"\['D'\] did not aggregate successfully" - ): - result = df.agg(["sum"]) - expected = DataFrame( - [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] - ) - tm.assert_frame_equal(result, expected) + # TODO: GH#49399 will fix error message + msg = "DataFrame constructor called with" + with pytest.raises(TypeError, match=msg): + df.agg(["sum"]) @pytest.mark.parametrize("how", ["agg", "apply"]) @@ -1499,32 +1506,28 @@ def test_aggregation_func_column_order(): # according to the arguments provided to agg. df = DataFrame( [ - ("1", 1, 0, 0), - ("2", 2, 0, 0), - ("3", 3, 0, 0), - ("4", 4, 5, 4), - ("5", 5, 6, 6), - ("6", 6, 7, 7), + (1, 0, 0), + (2, 0, 0), + (3, 0, 0), + (4, 5, 4), + (5, 6, 6), + (6, 7, 7), ], - columns=("item", "att1", "att2", "att3"), + columns=("att1", "att2", "att3"), ) - def foo(s): + def sum_div2(s): return s.sum() / 2 - aggs = ["sum", foo, "count", "min"] - with tm.assert_produces_warning( - FutureWarning, match=r"\['item'\] did not aggregate successfully" - ): - result = df.agg(aggs) + aggs = ["sum", sum_div2, "count", "min"] + result = df.agg(aggs) expected = DataFrame( { - "item": ["123456", np.nan, 6, "1"], "att1": [21.0, 10.5, 6.0, 1.0], "att2": [18.0, 9.0, 6.0, 0.0], "att3": [17.0, 8.5, 6.0, 0.0], }, - index=["sum", "foo", "count", "min"], + index=["sum", "sum_div2", "count", "min"], ) tm.assert_frame_equal(result, expected) @@ -1545,13 +1548,13 @@ def test_nuisance_depr_passes_through_warnings(): # sure if some other warnings were raised, they get passed through to # the user. - def foo(x): + def expected_warning(x): warnings.warn("Hello, World!") return x.sum() df = DataFrame({"a": [1, 2, 3]}) with tm.assert_produces_warning(UserWarning, match="Hello, World!"): - df.agg([foo]) + df.agg([expected_warning]) def test_apply_type(): @@ -1606,28 +1609,6 @@ def test_unique_agg_type_is_series(test, constant): tm.assert_series_equal(result, expected) -def test_any_non_keyword_deprecation(): - df = DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) - msg = ( - "In a future version of pandas all arguments of " - "DataFrame.any and Series.any will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.any("index", None) - expected = Series({"A": True, "B": True, "C": False}) - tm.assert_series_equal(result, expected) - - s = Series([False, False, False]) - msg = ( - "In a future version of pandas all arguments of " - "DataFrame.any and Series.any will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.any("index") - expected = False - tm.assert_equal(result, expected) - - def test_any_apply_keyword_non_zero_axis_regression(): # https://github.com/pandas-dev/pandas/issues/48656 df = DataFrame({"A": [1, 2, 0], "B": [0, 2, 0], "C": [0, 0, 0]}) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index f884e8a7daf67..4749cec018fe6 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -133,32 +133,37 @@ def func(x): @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) def test_transform_bad_dtype(op, frame_or_series, request): # GH 35964 - if op == "rank": - request.node.add_marker( - pytest.mark.xfail( - raises=ValueError, reason="GH 40418: rank does not raise a TypeError" - ) - ) - elif op == "ngroup": + if op == "ngroup": request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") ) obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms obj = tm.get_obj(obj, frame_or_series) + if op == "rank": + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] + ) - with pytest.raises(TypeError, match="unsupported operand|not supported"): + with pytest.raises(error, match=msg): obj.transform(op) - with pytest.raises(TypeError, match="Transform function failed"): + with pytest.raises(error, match=msg): obj.transform([op]) - with pytest.raises(TypeError, match="Transform function failed"): + with pytest.raises(error, match=msg): obj.transform({"A": op}) - with pytest.raises(TypeError, match="Transform function failed"): + with pytest.raises(error, match=msg): obj.transform({"A": [op]}) @pytest.mark.parametrize("op", frame_kernels_raise) -def test_transform_partial_failure_typeerror(request, op): +def test_transform_failure_typeerror(request, op): # GH 35964 if op == "ngroup": @@ -168,62 +173,52 @@ def test_transform_partial_failure_typeerror(request, op): # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) + if op == "rank": + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] + ) - expected = df[["B"]].transform([op]) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform([op]) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform([op]) - expected = df[["B"]].transform({"B": op}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": op, "B": op}) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform({"A": op, "B": op}) - expected = df[["B"]].transform({"B": [op]}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform({"A": [op], "B": [op]}) - expected = df.transform({"A": ["shift"], "B": [op]}) - match = rf"\['{op}'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op, "shift"], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform({"A": [op, "shift"], "B": [op]}) -def test_transform_partial_failure_valueerror(): +def test_transform_failure_valueerror(): # GH 40211 - match = ".*did not transform successfully" - def op(x): if np.sum(np.sum(x)) < 10: raise ValueError return x df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + msg = "Transform function failed" - expected = df[["B"]].transform([op]) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform([op]) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform([op]) - expected = df[["B"]].transform({"B": op}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": op, "B": op}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform({"A": op, "B": op}) - expected = df[["B"]].transform({"B": [op]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform({"A": [op], "B": [op]}) - expected = df.transform({"A": ["shift"], "B": [op]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op, "shift"], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform({"A": [op, "shift"], "B": [op]}) @pytest.mark.parametrize("use_apply", [True, False]) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index bf7f3abc04aa5..6ed962c8f68e6 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -283,7 +283,7 @@ def test_agg_none_to_type(): def test_transform_none_to_type(): # GH#34377 df = DataFrame({"a": [None]}) - msg = "Transform function failed" + msg = "argument must be a" with pytest.raises(TypeError, match=msg): df.transform({"a": int}) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 5221c41ce35d5..9b51ea7fef5f8 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -280,45 +280,39 @@ def test_transform_partial_failure(op, request): # GH 35964 if op in ("ffill", "bfill", "pad", "backfill", "shift"): request.node.add_marker( - pytest.mark.xfail( - raises=AssertionError, reason=f"{op} is successful on any dtype" - ) + pytest.mark.xfail(reason=f"{op} is successful on any dtype") ) # Using object makes most transform kernels fail ser = Series(3 * [object]) - expected = ser.transform(["shift"]) - match = rf"\['{op}'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform([op, "shift"]) - tm.assert_equal(result, expected) - - expected = ser.transform({"B": "shift"}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": op, "B": "shift"}) - tm.assert_equal(result, expected) - - expected = ser.transform({"B": ["shift"]}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [op], "B": ["shift"]}) - tm.assert_equal(result, expected) - - match = r"\['B'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - expected = ser.transform({"A": ["shift"], "B": [op]}) - match = rf"\['{op}'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [op, "shift"], "B": [op]}) - tm.assert_equal(result, expected) + if op in ("fillna", "ngroup", "rank"): + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] + ) + + with pytest.raises(error, match=msg): + ser.transform([op, "shift"]) + + with pytest.raises(error, match=msg): + ser.transform({"A": op, "B": "shift"}) + + with pytest.raises(error, match=msg): + ser.transform({"A": [op], "B": ["shift"]}) + + with pytest.raises(error, match=msg): + ser.transform({"A": [op, "shift"], "B": [op]}) def test_transform_partial_failure_valueerror(): # GH 40211 - match = ".*did not transform successfully" - def noop(x): return x @@ -326,26 +320,19 @@ def raising_op(_): raise ValueError ser = Series(3 * [object]) + msg = "Transform function failed" - expected = ser.transform([noop]) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform([noop, raising_op]) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.transform([noop, raising_op]) - expected = ser.transform({"B": noop}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": raising_op, "B": noop}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.transform({"A": raising_op, "B": noop}) - expected = ser.transform({"B": [noop]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [raising_op], "B": [noop]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.transform({"A": [raising_op], "B": [noop]}) - expected = ser.transform({"A": [noop], "B": [noop]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [noop, raising_op], "B": [noop]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.transform({"A": [noop, raising_op], "B": [noop]}) def test_demo(): @@ -859,8 +846,7 @@ def test_apply_to_timedelta(): list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] a = pd.to_timedelta(list_of_strings) - with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"): - ser = Series(list_of_strings) + ser = Series(list_of_strings) b = ser.apply(pd.to_timedelta) tm.assert_series_equal(Series(a), b) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 36182c46bfd67..61c879fb2b20f 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -12,7 +12,6 @@ Series, ) import pandas._testing as tm -from pandas.core.groupby.base import maybe_normalize_deprecated_kernels from pandas.tests.apply.common import ( frame_transform_kernels, series_transform_kernels, @@ -251,8 +250,6 @@ def test_transform_groupby_kernel_series(request, string_series, op): request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") ) - # TODO(2.0) Remove after pad/backfill deprecation enforced - op = maybe_normalize_deprecated_kernels(op) args = [0.0] if op == "fillna" else [] ones = np.ones(string_series.shape[0]) expected = string_series.groupby(ones).transform(op, *args) @@ -262,8 +259,6 @@ def test_transform_groupby_kernel_series(request, string_series, op): @pytest.mark.parametrize("op", frame_transform_kernels) def test_transform_groupby_kernel_frame(request, axis, float_frame, op): - # TODO(2.0) Remove after pad/backfill deprecation enforced - op = maybe_normalize_deprecated_kernels(op) if op == "ngroup": request.node.add_marker( diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index e847f31cd3f9c..b734344d25174 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -12,9 +12,7 @@ from pandas.core.computation import expressions as expr -@pytest.fixture( - autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] -) +@pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"]) def switch_numexpr_min_elements(request): _MIN_ELEMENTS = expr._MIN_ELEMENTS expr._MIN_ELEMENTS = request.param diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 95d4d6629f608..529dd6baa70c0 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -28,7 +28,6 @@ Int64Index, UInt64Index, ) -from pandas.core.arrays import TimedeltaArray from pandas.core.computation import expressions as expr from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, @@ -210,15 +209,10 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array): index = numeric_idx expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))]) if isinstance(scalar_td, np.timedelta64): - # TODO(2.0): once TDA.astype converts to m8, just do expected.astype - tda = expected._data dtype = scalar_td.dtype - expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype) + expected = expected.astype(dtype) elif type(scalar_td) is timedelta: - # TODO(2.0): once TDA.astype converts to m8, just do expected.astype - tda = expected._data - dtype = np.dtype("m8[us]") - expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype) + expected = expected.astype("m8[us]") index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) @@ -251,11 +245,7 @@ def test_numeric_arr_mul_tdscalar_numexpr_path( expected = arr_i8.view("timedelta64[D]").astype("timedelta64[ns]") if type(scalar_td) is timedelta: - # TODO(2.0): this shouldn't depend on 'box' expected = expected.astype("timedelta64[us]") - # TODO(2.0): won't be necessary to construct TimedeltaArray - # explicitly. - expected = TimedeltaArray._simple_new(expected, dtype=expected.dtype) expected = tm.box_expected(expected, box, transpose=False) @@ -272,18 +262,13 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array expected = TimedeltaIndex(["3 Days", "36 Hours"]) if isinstance(three_days, np.timedelta64): - # TODO(2.0): just use expected.astype - tda = expected._data dtype = three_days.dtype if dtype < np.dtype("m8[s]"): # i.e. resolution is lower -> use lowest supported resolution dtype = np.dtype("m8[s]") - expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype) + expected = expected.astype(dtype) elif type(three_days) is timedelta: - # TODO(2.0): just use expected.astype - tda = expected._data - dtype = np.dtype("m8[us]") - expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype) + expected = expected.astype("m8[us]") index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index e9f4be11ee4b7..55d39cf84eb30 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -320,53 +320,6 @@ def test_validate_inplace_raises(self, value): 'For argument "inplace" expected type bool, ' f"received type {type(value).__name__}" ) - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="Use rename_categories" - ): - cat.set_ordered(value=True, inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="Use rename_categories" - ): - cat.as_ordered(inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="Use rename_categories" - ): - cat.as_unordered(inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.set_categories(["X", "Y", "Z"], rename=True, inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.rename_categories(["X", "Y", "Z"], inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.reorder_categories(["X", "Y", "Z"], ordered=True, inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.add_categories(new_categories=["D", "E", "F"], inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.remove_categories(removals=["D", "E", "F"], inplace=value) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.remove_unused_categories(inplace=value) with pytest.raises(ValueError, match=msg): cat.sort_values(inplace=value) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 03bd1c522838d..450581f89d735 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -37,31 +37,14 @@ def test_ordered_api(self): assert cat4.ordered def test_set_ordered(self): - msg = ( - "The `inplace` parameter in pandas.Categorical.set_ordered is " - "deprecated and will be removed in a future version. setting " - "ordered-ness on categories will always return a new Categorical object" - ) cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() assert not cat2.ordered cat2 = cat.as_ordered() assert cat2.ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.as_unordered(inplace=True) - assert not cat2.ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.as_ordered(inplace=True) - assert cat2.ordered assert cat2.set_ordered(True).ordered assert not cat2.set_ordered(False).ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.set_ordered(True, inplace=True) - assert cat2.ordered - with tm.assert_produces_warning(FutureWarning, match=msg): - cat2.set_ordered(False, inplace=True) - assert not cat2.ordered # removed in 0.19.0 msg = ( @@ -95,17 +78,6 @@ def test_rename_categories(self): expected = Categorical(["A", "B", "C", "A"]) tm.assert_categorical_equal(result, expected) - # and now inplace - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.rename_categories([1, 2, 3], inplace=True) - - assert res is None - tm.assert_numpy_array_equal( - cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) - ) - tm.assert_index_equal(cat.categories, Index([1, 2, 3])) - @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) def test_rename_categories_wrong_length_raises(self, new_categories): cat = Categorical(["a", "b", "c", "a"]) @@ -130,14 +102,6 @@ def test_rename_categories_dict(self): expected = Index([4, 3, 2, 1]) tm.assert_index_equal(res.categories, expected) - # Test for inplace - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}, inplace=True) - - assert res is None - tm.assert_index_equal(cat.categories, expected) - # Test for dicts of smaller length cat = Categorical(["a", "b", "c", "d"]) res = cat.rename_categories({"a": 1, "c": 3}) @@ -165,21 +129,12 @@ def test_reorder_categories(self): ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True ) - # first inplace == False res = cat.reorder_categories(["c", "b", "a"]) # cat must be the same as before tm.assert_categorical_equal(cat, old) # only res is changed tm.assert_categorical_equal(res, new) - # inplace == True - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.reorder_categories(["c", "b", "a"], inplace=True) - - assert res is None - tm.assert_categorical_equal(cat, new) - @pytest.mark.parametrize( "new_categories", [ @@ -201,7 +156,6 @@ def test_add_categories(self): ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True ) - # first inplace == False res = cat.add_categories("d") tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) @@ -210,14 +164,6 @@ def test_add_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) - # inplace == True - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.add_categories("d", inplace=True) - - tm.assert_categorical_equal(cat, new) - assert res is None - # GH 9927 cat = Categorical(list("abc"), ordered=True) expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) @@ -262,14 +208,7 @@ def test_set_categories(self): exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.set_categories(["c", "b", "a"], inplace=True) - - tm.assert_index_equal(cat.categories, exp_categories) - tm.assert_numpy_array_equal(cat.__array__(), exp_values) - assert res is None - + cat = cat.set_categories(["c", "b", "a"]) res = cat.set_categories(["a", "b", "c"]) # cat must be the same as before tm.assert_index_equal(cat.categories, exp_categories) @@ -386,7 +325,6 @@ def test_remove_categories(self): old = cat.copy() new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) - # first inplace == False res = cat.remove_categories("c") tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) @@ -395,14 +333,6 @@ def test_remove_categories(self): tm.assert_categorical_equal(cat, old) tm.assert_categorical_equal(res, new) - # inplace == True - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = cat.remove_categories("c", inplace=True) - - tm.assert_categorical_equal(cat, new) - assert res is None - @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) def test_remove_categories_raises(self, removals): cat = Categorical(["a", "b", "a"]) @@ -422,13 +352,6 @@ def test_remove_unused_categories(self): tm.assert_index_equal(res.categories, exp_categories_dropped) tm.assert_index_equal(c.categories, exp_categories_all) - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - res = c.remove_unused_categories(inplace=True) - - tm.assert_index_equal(c.categories, exp_categories_dropped) - assert res is None - # with NaN values (GH11599) c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"]) res = c.remove_unused_categories() @@ -469,11 +392,7 @@ def test_describe(self, factor): # check unused categories cat = factor.copy() - - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.set_categories(["a", "b", "c", "d"], inplace=True) - + cat = cat.set_categories(["a", "b", "c", "d"]) desc = cat.describe() exp_index = CategoricalIndex( @@ -507,15 +426,6 @@ def test_describe(self, factor): ) tm.assert_frame_equal(desc, expected) - def test_set_categories_inplace(self, factor): - cat = factor.copy() - - with tm.assert_produces_warning(FutureWarning): - # issue #37643 inplace kwarg deprecated - cat.set_categories(["a", "b", "c", "d"], inplace=True) - - tm.assert_index_equal(cat.categories, Index(["a", "b", "c", "d"])) - class TestPrivateCategoricalAPI: def test_codes_immutable(self): diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index d11f4648ec632..570f04fae2c33 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -33,9 +33,9 @@ class TestCategoricalConstructors: - def test_categorical_scalar_deprecated(self): + def test_categorical_disallows_scalar(self): # GH#38433 - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="Categorical input must be list-like"): Categorical("A", categories=["A", "B"]) def test_categorical_1d_only(self): @@ -220,13 +220,6 @@ def test_constructor(self): assert len(cat.codes) == 1 assert cat.codes[0] == 0 - with tm.assert_produces_warning(FutureWarning): - # GH#38433 - cat = Categorical(1) - assert len(cat.categories) == 1 - assert cat.categories[0] == 1 - assert len(cat.codes) == 1 - assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 94e966642b925..d42b73b7c0020 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -191,14 +191,6 @@ def test_periodindex(self): tm.assert_numpy_array_equal(cat3._codes, exp_arr) tm.assert_index_equal(cat3.categories, exp_idx) - def test_categories_assignments(self): - cat = Categorical(["a", "b", "c", "a"]) - exp = np.array([1, 2, 3, 1], dtype=np.int64) - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - cat.categories = [1, 2, 3] - tm.assert_numpy_array_equal(cat.__array__(), exp) - tm.assert_index_equal(cat.categories, Index([1, 2, 3])) - @pytest.mark.parametrize( "null_val", [None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"], @@ -217,9 +209,8 @@ def test_categories_assignments_wrong_length_raises(self, new_categories): "new categories need to have the same number of items " "as the old categories!" ) - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - with pytest.raises(ValueError, match=msg): - cat.categories = new_categories + with pytest.raises(ValueError, match=msg): + cat.rename_categories(new_categories) # Combinations of sorted/unique: @pytest.mark.parametrize( diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 5b9780e390775..092cbc6c7997f 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -5,8 +5,8 @@ import pandas as pd import pandas._testing as tm +from pandas.core import ops from pandas.core.arrays import FloatingArray -import pandas.core.ops as ops # Basic test for the arithmetic array ops # ----------------------------------------------------------------------------- diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 6761040d444a5..8751b9bb294ae 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -39,12 +39,9 @@ def test_astype(self): def test_astype_bool(self): a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = a.astype(bool) - expected = SparseArray( - [True, False, False, True], dtype=SparseDtype(bool, False) - ) - tm.assert_sp_array_equal(result, expected) + result = a.astype(bool) + expected = np.array([1, 0, 0, 1], dtype=bool) + tm.assert_numpy_array_equal(result, expected) # update fill value result = a.astype(SparseDtype(bool, False)) @@ -57,12 +54,8 @@ def test_astype_all(self, any_real_numpy_dtype): vals = np.array([1, 2, 3]) arr = SparseArray(vals, fill_value=1) typ = np.dtype(any_real_numpy_dtype) - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - res = arr.astype(typ) - assert res.dtype == SparseDtype(typ, 1) - assert res.sp_values.dtype == typ - - tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) + res = arr.astype(typ) + tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype)) @pytest.mark.parametrize( "arr, dtype, expected", @@ -100,22 +93,13 @@ def test_astype_all(self, any_real_numpy_dtype): ], ) def test_astype_more(self, arr, dtype, expected): - - if isinstance(dtype, SparseDtype): - warn = None - else: - warn = FutureWarning - - with tm.assert_produces_warning(warn, match="astype from SparseDtype"): - result = arr.astype(dtype) + result = arr.astype(arr.dtype.update_dtype(dtype)) tm.assert_sp_array_equal(result, expected) def test_astype_nan_raises(self): arr = SparseArray([1.0, np.nan]) with pytest.raises(ValueError, match="Cannot convert non-finite"): - msg = "astype from SparseDtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - arr.astype(int) + arr.astype(int) def test_astype_copy_false(self): # GH#34456 bug caused by using .view instead of .astype in astype_nansafe diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 8f9d38044e7ef..3f310d0efa2ca 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -292,19 +292,7 @@ def test_searchsorted(self): assert result == 10 @pytest.mark.parametrize("box", [None, "index", "series"]) - def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage): - if isinstance(arr1d, DatetimeArray): - tz = arr1d.tz - ts1, ts2 = arr1d[1:3] - if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): - # If we have e.g. tzutc(), when we cast to string and parse - # back we get pytz.UTC, and then consider them different timezones - # so incorrectly raise. - mark = pytest.mark.xfail( - raises=TypeError, reason="timezone comparisons inconsistent" - ) - request.node.add_marker(mark) - + def test_searchsorted_castable_strings(self, arr1d, box, string_storage): arr = arr1d if box is None: pass @@ -461,19 +449,8 @@ def test_setitem_object_dtype(self, box, arr1d): tm.assert_equal(arr1d, expected) - def test_setitem_strs(self, arr1d, request): + def test_setitem_strs(self, arr1d): # Check that we parse strs in both scalar and listlike - if isinstance(arr1d, DatetimeArray): - tz = arr1d.tz - ts1, ts2 = arr1d[-2:] - if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): - # If we have e.g. tzutc(), when we cast to string and parse - # back we get pytz.UTC, and then consider them different timezones - # so incorrectly raise. - mark = pytest.mark.xfail( - raises=TypeError, reason="timezone comparisons inconsistent" - ) - request.node.add_marker(mark) # Setting list-like of strs expected = arr1d.copy() @@ -554,22 +531,12 @@ def test_inplace_arithmetic(self): tm.assert_equal(arr, expected) def test_shift_fill_int_deprecated(self): - # GH#31971 + # GH#31971, enforced in 2.0 data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") - msg = "Passing to shift" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = arr.shift(1, fill_value=1) - - expected = arr.copy() - if self.array_cls is PeriodArray: - fill_val = arr._scalar_type._from_ordinal(1, freq=arr.freq) - else: - fill_val = arr._scalar_type(1) - expected[0] = fill_val - expected[1:] = arr[:-1] - tm.assert_equal(result, expected) + with pytest.raises(TypeError, match="value should be a"): + arr.shift(1, fill_value=1) def test_median(self, arr1d): arr = arr1d @@ -815,18 +782,11 @@ def test_bool_properties(self, arr1d, propname): @pytest.mark.parametrize("propname", DatetimeArray._field_ops) def test_int_properties(self, arr1d, propname): - warn = None - msg = "weekofyear and week have been deprecated, please use" - if propname in ["week", "weekofyear"]: - # GH#33595 Deprecate week and weekofyear - warn = FutureWarning - dti = self.index_cls(arr1d) arr = arr1d - with tm.assert_produces_warning(warn, match=msg): - result = getattr(arr, propname) - expected = np.array(getattr(dti, propname), dtype=result.dtype) + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) tm.assert_numpy_array_equal(result, expected) @@ -869,18 +829,14 @@ def test_take_fill_valid(self, arr1d, fixed_now_ts): # GH#37356 # Assuming here that arr1d fixture does not include Australia/Melbourne value = fixed_now_ts.tz_localize("Australia/Melbourne") - msg = "Timezones don't match. .* != 'Australia/Melbourne'" - with pytest.raises(ValueError, match=msg): - # require tz match, not just tzawareness match - with tm.assert_produces_warning( - FutureWarning, match="mismatched timezone" - ): - result = arr.take([-1, 1], allow_fill=True, fill_value=value) - - # once deprecation is enforced - # expected = arr.take([-1, 1], allow_fill=True, - # fill_value=value.tz_convert(arr.dtype.tz)) - # tm.assert_equal(result, expected) + result = arr.take([-1, 1], allow_fill=True, fill_value=value) + + expected = arr.take( + [-1, 1], + allow_fill=True, + fill_value=value.tz_convert(arr.dtype.tz), + ) + tm.assert_equal(result, expected) def test_concat_same_type_invalid(self, arr1d): # different timezones diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 24779c6e0c89d..37a9c19627ada 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -75,9 +75,6 @@ def test_non_nano(self, unit, reso, dtype): assert tz_compare(dta.tz, dta[0].tz) assert (dta[0] == dta[:1]).all() - @pytest.mark.filterwarnings( - "ignore:weekofyear and week have been deprecated:FutureWarning" - ) @pytest.mark.parametrize( "field", DatetimeArray._field_ops + DatetimeArray._bool_ops ) @@ -432,19 +429,16 @@ def test_setitem_str_impute_tz(self, tz_naive_fixture): tm.assert_equal(arr, expected) def test_setitem_different_tz_raises(self): + # pre-2.0 we required exact tz match, in 2.0 we require only + # tzawareness-match data = np.array([1, 2, 3], dtype="M8[ns]") arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")) with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): arr[0] = pd.Timestamp("2000") ts = pd.Timestamp("2000", tz="US/Eastern") - with pytest.raises(ValueError, match="US/Central"): - with tm.assert_produces_warning( - FutureWarning, match="mismatched timezones" - ): - arr[0] = ts - # once deprecation is enforced - # assert arr[0] == ts.tz_convert("US/Central") + arr[0] = ts + assert arr[0] == ts.tz_convert("US/Central") def test_setitem_clears_freq(self): a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) @@ -691,23 +685,16 @@ def test_shift_value_tzawareness_mismatch(self): dta.shift(1, fill_value=invalid) def test_shift_requires_tzmatch(self): - # since filling is setitem-like, we require a matching timezone, - # not just matching tzawawreness + # pre-2.0 we required exact tz match, in 2.0 we require just + # matching tzawareness dti = pd.date_range("2016-01-01", periods=3, tz="UTC") dta = dti._data fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific") - msg = "Timezones don't match. 'UTC' != 'US/Pacific'" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="mismatched timezones" - ): - dta.shift(1, fill_value=fill_value) - - # once deprecation is enforced - # expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) - # tm.assert_equal(result, expected) + result = dta.shift(1, fill_value=fill_value) + expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) + tm.assert_equal(result, expected) def test_tz_localize_t2d(self): dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 75dff66a91365..f5d50465fee10 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -173,8 +173,7 @@ def test_div_td_array(self, tda): tm.assert_numpy_array_equal(result, expected) def test_add_timedeltaarraylike(self, tda): - # TODO(2.0): just do `tda_nano = tda.astype("m8[ns]")` - tda_nano = TimedeltaArray(tda._ndarray.astype("m8[ns]")) + tda_nano = tda.astype("m8[ns]") expected = tda_nano * 2 res = tda_nano + tda diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index c8b923031b9e8..9576bf57c8503 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -43,20 +43,19 @@ def constructor(request): class TestPandasDelegate: class Delegator: - _properties = ["foo"] - _methods = ["bar"] + _properties = ["prop"] + _methods = ["test_method"] - def _set_foo(self, value): - self.foo = value + def _set_prop(self, value): + self.prop = value - def _get_foo(self): - return self.foo + def _get_prop(self): + return self.prop - foo = property(_get_foo, _set_foo, doc="foo property") + prop = property(_get_prop, _set_prop, doc="foo property") - def bar(self, *args, **kwargs): - """a test bar method""" - pass + def test_method(self, *args, **kwargs): + """a test method""" class Delegate(PandasDelegate, PandasObject): def __init__(self, obj) -> None: @@ -78,17 +77,17 @@ def test_invalid_delegation(self): delegate = self.Delegate(self.Delegator()) - msg = "You cannot access the property foo" + msg = "You cannot access the property prop" with pytest.raises(TypeError, match=msg): - delegate.foo + delegate.prop - msg = "The property foo cannot be set" + msg = "The property prop cannot be set" with pytest.raises(TypeError, match=msg): - delegate.foo = 5 + delegate.prop = 5 - msg = "You cannot access the property foo" + msg = "You cannot access the property prop" with pytest.raises(TypeError, match=msg): - delegate.foo() + delegate.prop() @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 599aaae4d3527..703ac6c89fca8 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -334,10 +334,7 @@ def test_array_multiindex_raises(): def test_to_numpy(arr, expected, index_or_series_or_array, request): box = index_or_series_or_array - warn = None - if index_or_series_or_array is pd.Index and isinstance(arr, SparseArray): - warn = FutureWarning - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(None): thing = box(arr) if arr.dtype.name == "int64" and box is pd.array: diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index cfc08426f84e3..f3566e040dc85 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -3,8 +3,8 @@ from pandas.compat._optional import VERSIONS import pandas as pd +from pandas.core.computation import expr from pandas.core.computation.engines import ENGINES -import pandas.core.computation.expr as expr from pandas.util.version import Version diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 7fce4e9d9c38e..b0a182ffe4933 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -30,9 +30,11 @@ date_range, ) import pandas._testing as tm -from pandas.core.computation import pytables +from pandas.core.computation import ( + expr, + pytables, +) from pandas.core.computation.engines import ENGINES -import pandas.core.computation.expr as expr from pandas.core.computation.expr import ( BaseExprVisitor, PandasExprVisitor, diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index 005fd902afc0e..8a2d53313702d 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -227,6 +227,7 @@ def test_validation(self): validator = cf.is_one_of_factory([None, cf.is_callable]) cf.register_option("b", lambda: None, "doc", validator=validator) + # pylint: disable-next=consider-using-f-string cf.set_option("b", "%.1f".format) # Formatter is callable cf.set_option("b", None) # Formatter is none (default) with pytest.raises(ValueError, match="Value must be a callable"): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index d917a3c79aa97..b8028fd28f8f8 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -339,10 +339,9 @@ def test_subset_set_column_with_loc(using_copy_on_write, using_array_manager, dt with pd.option_context("chained_assignment", "warn"): # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those # warnings and only assert the SettingWithCopyWarning - raise_on_extra_warnings = False if using_array_manager else True with tm.assert_produces_warning( SettingWithCopyWarning, - raise_on_extra_warnings=raise_on_extra_warnings, + raise_on_extra_warnings=not using_array_manager, ): subset.loc[:, "a"] = np.array([10, 11], dtype="int64") @@ -376,10 +375,9 @@ def test_subset_set_column_with_loc2(using_copy_on_write, using_array_manager): with pd.option_context("chained_assignment", "warn"): # The (i)loc[:, col] inplace deprecation gets triggered here, ignore those # warnings and only assert the SettingWithCopyWarning - raise_on_extra_warnings = False if using_array_manager else True with tm.assert_produces_warning( SettingWithCopyWarning, - raise_on_extra_warnings=raise_on_extra_warnings, + raise_on_extra_warnings=not using_array_manager, ): subset.loc[:, "a"] = 0 @@ -462,6 +460,158 @@ def test_subset_set_with_column_indexer( tm.assert_frame_equal(df, df_orig) +@pytest.mark.parametrize( + "method", + [ + lambda df: df[["a", "b"]][0:2], + lambda df: df[0:2][["a", "b"]], + lambda df: df[["a", "b"]].iloc[0:2], + lambda df: df[["a", "b"]].loc[0:1], + lambda df: df[0:2].iloc[:, 0:2], + lambda df: df[0:2].loc[:, "a":"b"], # type: ignore[misc] + ], + ids=[ + "row-getitem-slice", + "column-getitem", + "row-iloc-slice", + "row-loc-slice", + "column-iloc-slice", + "column-loc-slice", + ], +) +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_chained_getitem( + request, method, dtype, using_copy_on_write, using_array_manager +): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + # when not using CoW, it depends on whether we have a single block or not + # and whether we are slicing the columns -> in that case we have a view + subset_is_view = request.node.callspec.id in ( + "single-block-column-iloc-slice", + "single-block-column-loc-slice", + ) or ( + request.node.callspec.id + in ("mixed-block-column-iloc-slice", "mixed-block-column-loc-slice") + and using_array_manager + ) + + # modify subset -> don't modify parent + subset = method(df) + subset.iloc[0, 0] = 0 + if using_copy_on_write or (not subset_is_view): + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = method(df) + df.iloc[0, 0] = 0 + expected = DataFrame({"a": [1, 2], "b": [4, 5]}) + if using_copy_on_write or not subset_is_view: + tm.assert_frame_equal(subset, expected) + else: + assert subset.iloc[0, 0] == 0 + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_chained_getitem_column(dtype, using_copy_on_write): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + # modify subset -> don't modify parent + subset = df[:]["a"][0:2] + df._clear_item_cache() + subset.iloc[0] = 0 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = df[:]["a"][0:2] + df._clear_item_cache() + df.iloc[0, 0] = 0 + expected = Series([1, 2], name="a") + if using_copy_on_write: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +@pytest.mark.parametrize( + "method", + [ + lambda s: s["a":"c"]["a":"b"], # type: ignore[misc] + lambda s: s.iloc[0:3].iloc[0:2], + lambda s: s.loc["a":"c"].loc["a":"b"], # type: ignore[misc] + lambda s: s.loc["a":"c"] # type: ignore[misc] + .iloc[0:3] + .iloc[0:2] + .loc["a":"b"] # type: ignore[misc] + .iloc[0:1], + ], + ids=["getitem", "iloc", "loc", "long-chain"], +) +def test_subset_chained_getitem_series(method, using_copy_on_write): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + + # modify subset -> don't modify parent + subset = method(s) + subset.iloc[0] = 0 + if using_copy_on_write: + tm.assert_series_equal(s, s_orig) + else: + assert s.iloc[0] == 0 + + # modify parent -> don't modify subset + subset = s.iloc[0:3].iloc[0:2] + s.iloc[0] = 0 + expected = Series([1, 2], index=["a", "b"]) + if using_copy_on_write: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +def test_subset_chained_single_block_row(using_copy_on_write, using_array_manager): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df_orig = df.copy() + + # modify subset -> don't modify parent + subset = df[:].iloc[0].iloc[0:2] + subset.iloc[0] = 0 + if using_copy_on_write or using_array_manager: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = df[:].iloc[0].iloc[0:2] + df.iloc[0, 0] = 0 + expected = Series([1, 4], index=["a", "b"], name=0) + if using_copy_on_write or using_array_manager: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + # TODO add more tests modifying the parent diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index 2191fc1b33218..edfa7f843f17f 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas.util._test_decorators as td @@ -43,3 +44,21 @@ def test_consolidate(using_copy_on_write): subset.iloc[0, 1] = 0.0 assert df._mgr._has_no_reference(1) assert df.loc[0, "b"] == 0.1 + + +@td.skip_array_manager_invalid_test +def test_clear_parent(using_copy_on_write): + # ensure to clear parent reference if we are no longer viewing data from parent + if not using_copy_on_write: + pytest.skip("test only relevant when using copy-on-write") + + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + subset = df[:] + assert subset._mgr.parent is not None + + # replacing existing columns loses the references to the parent df + subset["a"] = 0 + assert subset._mgr.parent is not None + # when losing the last reference, also the parent should be reset + subset["b"] = 0 + assert subset._mgr.parent is None diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index df723808ce06b..956e2cf98c9b6 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( DataFrame, @@ -156,7 +157,7 @@ def test_to_frame(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() - df = ser.to_frame() + df = ser[:].to_frame() # currently this always returns a "view" assert np.shares_memory(ser.values, get_array(df, 0)) @@ -169,5 +170,47 @@ def test_to_frame(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) else: # but currently select_dtypes() actually returns a view -> mutates parent - ser_orig.iloc[0] = 0 - tm.assert_series_equal(ser, ser_orig) + expected = ser_orig.copy() + expected.iloc[0] = 0 + tm.assert_series_equal(ser, expected) + + # modify original series -> don't modify dataframe + df = ser[:].to_frame() + ser.iloc[0] = 0 + + if using_copy_on_write: + tm.assert_frame_equal(df, ser_orig.to_frame()) + else: + expected = ser_orig.copy().to_frame() + expected.iloc[0, 0] = 0 + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize( + "method, idx", + [ + (lambda df: df.copy(deep=False).copy(deep=False), 0), + (lambda df: df.reset_index().reset_index(), 2), + (lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0), + (lambda df: df.copy(deep=False).select_dtypes(include="number"), 0), + ], + ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], +) +def test_chained_methods(request, method, idx, using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + # when not using CoW, only the copy() variant actually gives a view + df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy" + + # modify df2 -> don't modify df + df2 = method(df) + df2.iloc[0, idx] = 0 + if not df2_is_view: + tm.assert_frame_equal(df, df_orig) + + # modify df -> don't modify df2 + df2 = method(df) + df.iloc[0, 0] = 0 + if not df2_is_view: + tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 589e2e04d668a..c8a3c992248ad 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -290,6 +290,15 @@ def test_is_string_dtype(): assert com.is_string_dtype(pd.StringDtype()) +@pytest.mark.parametrize( + "data", + [[(0, 1), (1, 1)], pd.Categorical([1, 2, 3]), np.array([1, 2], dtype=object)], +) +def test_is_string_dtype_arraylike_with_object_elements_not_strings(data): + # GH 15585 + assert not com.is_string_dtype(pd.Series(data)) + + def test_is_string_dtype_nullable(nullable_string_dtype): assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype)) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 56c97ac7a4dc5..0353d9e23c86e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -203,16 +203,16 @@ def test_is_list_like_disallow_sets(maybe_list_like): def test_is_list_like_recursion(): # GH 33721 # interpreter would crash with SIGABRT - def foo(): + def list_like(): inference.is_list_like([]) - foo() + list_like() rec_limit = sys.getrecursionlimit() try: # Limit to avoid stack overflow on Windows CI sys.setrecursionlimit(100) with tm.external_error_raised(RecursionError): - foo() + list_like() finally: sys.setrecursionlimit(rec_limit) @@ -1340,29 +1340,36 @@ def test_infer_dtype_period_with_na(self, na_value): Timestamp("20170612", tz="US/Eastern"), Timestamp("20170311", tz="US/Eastern"), ], - [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], ], ) def test_infer_datetimelike_array_datetime(self, data): - assert lib.infer_datetimelike_array(data) == ("datetime", False) + assert lib.infer_datetimelike_array(data) == "datetime" + + def test_infer_datetimelike_array_date_mixed(self): + # GH49341 pre-2.0 we these were inferred as "datetime" and "timedelta", + # respectively + data = [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")] + assert lib.infer_datetimelike_array(data) == "mixed" + + data = ([timedelta(2017, 6, 12), date(2017, 3, 11)],) + assert lib.infer_datetimelike_array(data) == "mixed" @pytest.mark.parametrize( "data", [ [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], - [timedelta(2017, 6, 12), date(2017, 3, 11)], [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], ], ) def test_infer_datetimelike_array_timedelta(self, data): - assert lib.infer_datetimelike_array(data) == ("timedelta", False) + assert lib.infer_datetimelike_array(data) == "timedelta" def test_infer_datetimelike_array_date(self): arr = [date(2017, 6, 12), date(2017, 3, 11)] - assert lib.infer_datetimelike_array(arr) == ("date", False) + assert lib.infer_datetimelike_array(arr) == "date" @pytest.mark.parametrize( "data", @@ -1377,7 +1384,7 @@ def test_infer_datetimelike_array_date(self): ], ) def test_infer_datetimelike_array_mixed(self, data): - assert lib.infer_datetimelike_array(data)[0] == "mixed" + assert lib.infer_datetimelike_array(data) == "mixed" @pytest.mark.parametrize( "first, expected", @@ -1395,7 +1402,7 @@ def test_infer_datetimelike_array_mixed(self, data): @pytest.mark.parametrize("second", [None, np.nan]) def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected): first.append(second) - assert lib.infer_datetimelike_array(first) == (expected, False) + assert lib.infer_datetimelike_array(first) == expected def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) @@ -1861,8 +1868,8 @@ def test_is_timedelta(self): assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) # Conversion to Int64Index: - assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64")) - assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]")) + assert not is_timedelta64_ns_dtype(Index([], dtype=np.float64)) + assert not is_timedelta64_ns_dtype(Index([], dtype=np.int64)) class TestIsScalar: diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index e1dd182a5ae30..cc365bef2b183 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -426,11 +426,9 @@ def test_array_equivalent(dtype_equal): dtype_equal=dtype_equal, ) - msg = "will be interpreted as nanosecond UTC timestamps instead of wall-times" - with tm.assert_produces_warning(FutureWarning, match=msg): - dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") - dti2 = DatetimeIndex([0, np.nan], tz="CET") - dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") + dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern") + dti2 = DatetimeIndex([0, np.nan], tz="CET") + dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern") assert array_equivalent( dti1, @@ -444,7 +442,7 @@ def test_array_equivalent(dtype_equal): ) # The rest are not dtype_equal assert not array_equivalent(DatetimeIndex([0, np.nan]), dti1) - assert not array_equivalent( + assert array_equivalent( dti2, dti1, ) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index ea4443010c6a6..32a9246264d69 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -45,10 +45,10 @@ def test_is_dtype_other_input(self, dtype): assert dtype.is_dtype([1, 2, 3]) is False def test_is_not_string_type(self, dtype): - return not is_string_dtype(dtype) + assert not is_string_dtype(dtype) def test_is_not_object_type(self, dtype): - return not is_object_dtype(dtype) + assert not is_object_dtype(dtype) def test_eq_with_str(self, dtype): assert dtype == dtype.name diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index babb2868a4421..cc970c690529d 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -4,7 +4,6 @@ import pytest import pandas as pd -import pandas._testing as tm from pandas.api.extensions import ExtensionArray from pandas.core.internals.blocks import EABackedBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -319,23 +318,13 @@ def test_unstack(self, data, index, obj): alt = df.unstack(level=level).droplevel(0, axis=1) self.assert_frame_equal(result, alt) - if obj == "series": - is_sparse = isinstance(ser.dtype, pd.SparseDtype) - else: - is_sparse = isinstance(ser.dtypes.iat[0], pd.SparseDtype) - warn = None if not is_sparse else FutureWarning - with tm.assert_produces_warning(warn, match="astype from Sparse"): - obj_ser = ser.astype(object) + obj_ser = ser.astype(object) expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value) - if obj == "series" and not is_sparse: - # GH#34457 SparseArray.astype(object) gives Sparse[object] - # instead of np.dtype(object) + if obj == "series": assert (expected.dtypes == object).all() - with tm.assert_produces_warning(warn, match="astype from Sparse"): - result = result.astype(object) - + result = result.astype(object) self.assert_frame_equal(result, expected) def test_ravel(self, data): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 8979c145a223c..f68e38be44811 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -46,6 +46,10 @@ from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip +pytestmark = pytest.mark.filterwarnings( + "ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly" +) + @pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str) def dtype(request): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 148059a6a16f3..d6a5557c89f14 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -26,6 +26,7 @@ import pandas as pd import pandas._testing as tm +from pandas.api.types import is_object_dtype from pandas.core.arrays.numpy_ import PandasArray from pandas.core.internals import blocks from pandas.tests.extension import base @@ -218,6 +219,14 @@ def test_check_dtype(self, data, request): ) super().test_check_dtype(data) + def test_is_not_object_type(self, dtype, request): + if dtype.numpy_dtype == "object": + # Different from BaseDtypeTests.test_is_not_object_type + # because PandasDtype(object) is an object type + assert is_object_dtype(dtype) + else: + super().test_is_not_object_type(dtype) + class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): @skip_nested diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 86a523404ef8b..f82d3c6c06fca 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -19,8 +19,6 @@ from pandas.errors import PerformanceWarning -from pandas.core.dtypes.common import is_object_dtype - import pandas as pd from pandas import SparseDtype import pandas._testing as tm @@ -159,10 +157,7 @@ def test_concat_mixed_dtypes(self, data): ], ) def test_stack(self, data, columns): - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False, match="astype from Sparse" - ): - super().test_stack(data, columns) + super().test_stack(data, columns) def test_concat_columns(self, data, na_value): self._check_unsupported(data) @@ -211,24 +206,7 @@ def test_reindex(self, data, na_value): class TestIndex(base.BaseIndexTests): - def test_index_from_array(self, data): - msg = "will store that array directly" - with tm.assert_produces_warning(FutureWarning, match=msg): - idx = pd.Index(data) - - if data.dtype.subtype == "f": - assert idx.dtype == np.float64 - elif data.dtype.subtype == "i": - assert idx.dtype == np.int64 - else: - assert idx.dtype == data.dtype.subtype - - # TODO(2.0): should pass once SparseArray is stored directly in Index. - @pytest.mark.xfail(reason="Index cannot yet store sparse dtype") - def test_index_from_listlike_with_dtype(self, data): - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(FutureWarning, match=msg): - super().test_index_from_listlike_with_dtype(data) + pass class TestMissing(BaseSparseTests, base.BaseMissingTests): @@ -396,33 +374,11 @@ def test_equals(self, data, na_value, as_series, box): class TestCasting(BaseSparseTests, base.BaseCastingTests): - def test_astype_object_series(self, all_data): - # Unlike the base class, we do not expect the resulting Block - # to be ObjectBlock / resulting array to be np.dtype("object") - ser = pd.Series(all_data, name="A") - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = ser.astype(object) - assert is_object_dtype(result.dtype) - assert is_object_dtype(result._mgr.array.dtype) - - def test_astype_object_frame(self, all_data): - # Unlike the base class, we do not expect the resulting Block - # to be ObjectBlock / resulting array to be np.dtype("object") - df = pd.DataFrame({"A": all_data}) - - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = df.astype(object) - assert is_object_dtype(result._mgr.arrays[0].dtype) - - # check that we can compare the dtypes - comp = result.dtypes == df.dtypes - assert not comp.any() - def test_astype_str(self, data): - with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): - result = pd.Series(data[:5]).astype(str) - expected_dtype = SparseDtype(str, str(data.fill_value)) - expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype) + # pre-2.0 this would give a SparseDtype even if the user asked + # for a non-sparse dtype. + result = pd.Series(data[:5]).astype(str) + expected = pd.Series([str(x) for x in data[:5]], dtype=object) self.assert_series_equal(result, expected) @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype") diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index c5aebb282bafa..8cbd4342ea13f 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -26,6 +26,7 @@ import pandas as pd import pandas._testing as tm +from pandas.api.types import is_string_dtype from pandas.core.arrays import ArrowStringArray from pandas.core.arrays.string_ import StringDtype from pandas.tests.extension import base @@ -106,6 +107,11 @@ def test_eq_with_str(self, dtype): assert dtype == f"string[{dtype.storage}]" super().test_eq_with_str(dtype) + def test_is_not_string_type(self, dtype): + # Different from BaseDtypeTests.test_is_not_string_type + # because StringDtype is a string type + assert is_string_dtype(dtype) + class TestInterface(base.BaseInterfaceTests): def test_view(self, data, request): diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 7c2b009673bb7..6cba95e42463d 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -10,7 +10,6 @@ Series, ) import pandas._testing as tm -from pandas.core.construction import create_series_with_explicit_dtype class TestFromDict: @@ -79,9 +78,7 @@ def test_constructor_list_of_series(self): OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]), OrderedDict([["b", 3], ["c", 4], ["d", 6]]), ] - data = [ - create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data - ] + data = [Series(d) for d in data] result = DataFrame(data) sdict = OrderedDict(zip(range(len(data)), data)) diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index b2efa0713b513..7c4ed68dfd0ef 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -44,7 +44,8 @@ def test_from_records_with_datetimes(self): dtypes = [("EXPIRY", " 0 - - with tm.assert_produces_warning(FutureWarning): - obj.where(mask, -1, errors="raise") - - with tm.assert_produces_warning(FutureWarning): - obj.mask(mask, -1, errors="raise") - - def test_where_producing_ea_cond_for_np_dtype(): # GH#44014 df = DataFrame({"a": Series([1, pd.NA, 2], dtype="Int64"), "b": [1, 2, 3]}) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index b02870d5d1247..e4a92ecc5dac1 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -54,7 +54,7 @@ def test_xs(self, float_frame, datetime_frame): assert xs["B"] == "1" with pytest.raises( - KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')") ): datetime_frame.xs(datetime_frame.index[0] - BDay()) @@ -107,8 +107,7 @@ def test_xs_keep_level(self): expected = df[:1] tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) + result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) def test_xs_view(self, using_array_manager, using_copy_on_write): @@ -225,8 +224,7 @@ def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data): expected = concat([frame.xs("one", level="second")] * 2) if isinstance(key, list): - with tm.assert_produces_warning(FutureWarning): - result = df.xs(key, level=level) + result = df.xs(tuple(key), level=level) else: result = df.xs(key, level=level) tm.assert_frame_equal(result, expected) @@ -412,6 +410,5 @@ def test_xs_list_indexer_droplevel_false(self): # GH#41760 mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")]) df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) - with tm.assert_produces_warning(FutureWarning): - with pytest.raises(KeyError, match="y"): - df.xs(["x", "y"], drop_level=False, axis=1) + with pytest.raises(KeyError, match="y"): + df.xs(("x", "y"), drop_level=False, axis=1) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py deleted file mode 100644 index f07ffee20a55f..0000000000000 --- a/pandas/tests/frame/methods/test_append.py +++ /dev/null @@ -1,292 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - Series, - Timestamp, - date_range, - timedelta_range, -) -import pandas._testing as tm - - -class TestDataFrameAppend: - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") - def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): - obj = multiindex_dataframe_random_data - obj = tm.get_obj(obj, frame_or_series) - - a = obj[:5] - b = obj[5:] - - result = a.append(b) - tm.assert_equal(result, obj) - - def test_append_empty_list(self): - # GH 28769 - df = DataFrame() - result = df._append([]) - expected = df - tm.assert_frame_equal(result, expected) - assert result is not df - - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - result = df._append([]) - expected = df - tm.assert_frame_equal(result, expected) - assert result is not df # ._append() should return a new object - - def test_append_series_dict(self): - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - series = df.loc[4] - msg = "Indexes have overlapping values" - with pytest.raises(ValueError, match=msg): - df._append(series, verify_integrity=True) - - series.name = None - msg = "Can only append a Series if ignore_index=True" - with pytest.raises(TypeError, match=msg): - df._append(series, verify_integrity=True) - - result = df._append(series[::-1], ignore_index=True) - expected = df._append( - DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True - ) - tm.assert_frame_equal(result, expected) - - # dict - result = df._append(series.to_dict(), ignore_index=True) - tm.assert_frame_equal(result, expected) - - result = df._append(series[::-1][:3], ignore_index=True) - expected = df._append( - DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True - ) - tm.assert_frame_equal(result, expected.loc[:, result.columns]) - - msg = "Can only append a dict if ignore_index=True" - with pytest.raises(TypeError, match=msg): - df._append(series.to_dict()) - - # can append when name set - row = df.loc[4] - row.name = 5 - result = df._append(row) - expected = df._append(df[-1:], ignore_index=True) - tm.assert_frame_equal(result, expected) - - def test_append_list_of_series_dicts(self): - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - dicts = [x.to_dict() for idx, x in df.iterrows()] - - result = df._append(dicts, ignore_index=True) - expected = df._append(df, ignore_index=True) - tm.assert_frame_equal(result, expected) - - # different columns - dicts = [ - {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, - {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, - ] - result = df._append(dicts, ignore_index=True, sort=True) - expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) - tm.assert_frame_equal(result, expected) - - def test_append_list_retain_index_name(self): - df = DataFrame( - [[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname") - ) - - serc = Series([5, 6], name="c") - - expected = DataFrame( - [[1, 2], [3, 4], [5, 6]], - index=pd.Index(["a", "b", "c"], name="keepthisname"), - ) - - # append series - result = df._append(serc) - tm.assert_frame_equal(result, expected) - - # append list of series - result = df._append([serc]) - tm.assert_frame_equal(result, expected) - - def test_append_missing_cols(self): - # GH22252 - # exercise the conditional branch in append method where the data - # to be appended is a list and does not contain all columns that are in - # the target DataFrame - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - dicts = [{"foo": 9}, {"bar": 10}] - result = df._append(dicts, ignore_index=True, sort=True) - - expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) - tm.assert_frame_equal(result, expected) - - def test_append_empty_dataframe(self): - - # Empty df append empty df - df1 = DataFrame() - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Non-empty df append empty df - df1 = DataFrame(np.random.randn(5, 2)) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Empty df with columns append empty df - df1 = DataFrame(columns=["bar", "foo"]) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Non-Empty df with columns append empty df - df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - def test_append_dtypes(self, using_array_manager): - - # GH 5754 - # row appends of different dtypes (so need to do by-item) - # can sometimes infer the correct type - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) - df2 = DataFrame() - result = df1._append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) - result = df1._append(df2) - expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) - result = df1._append(df2) - expected = DataFrame( - {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} - ) - if using_array_manager: - # TODO(ArrayManager) decide on exact casting rules in concat - # With ArrayManager, all-NaN float is not ignored - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) - result = df1._append(df2) - expected = DataFrame( - {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} - ) - if using_array_manager: - # With ArrayManager, all-NaN float is not ignored - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": np.nan}, index=range(1)) - df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) - result = df1._append(df2) - expected = DataFrame( - {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} - ) - if using_array_manager: - # With ArrayManager, all-NaN float is not ignored - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) - result = df1._append(df2) - expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] - ) - def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): - # GH 30238 - tz = tz_naive_fixture - df = DataFrame([Timestamp(timestamp, tz=tz)]) - result = df._append(df.iloc[0]).iloc[-1] - expected = Series(Timestamp(timestamp, tz=tz), name=0) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "data, dtype", - [ - ([1], pd.Int64Dtype()), - ([1], pd.CategoricalDtype()), - ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), - ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), - ([1], pd.SparseDtype()), - ], - ) - def test_other_dtypes(self, data, dtype, using_array_manager): - df = DataFrame(data, dtype=dtype) - - warn = None - if using_array_manager and isinstance(dtype, pd.SparseDtype): - warn = FutureWarning - - with tm.assert_produces_warning(warn, match="astype from SparseDtype"): - result = df._append(df.iloc[0]).iloc[-1] - - expected = Series(data, name=0, dtype=dtype) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) - def test_append_numpy_bug_1681(self, dtype): - # another datetime64 bug - if dtype == "datetime64[ns]": - index = date_range("2011/1/1", "2012/1/1", freq="W-FRI") - else: - index = timedelta_range("1 days", "10 days", freq="2D") - - df = DataFrame() - other = DataFrame({"A": "foo", "B": index}, index=index) - - result = df._append(other) - assert (result["B"] == index).all() - - @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") - def test_multiindex_column_append_multiple(self): - # GH 29699 - df = DataFrame( - [[1, 11], [2, 12], [3, 13]], - columns=pd.MultiIndex.from_tuples( - [("multi", "col1"), ("multi", "col2")], names=["level1", None] - ), - ) - df2 = df.copy() - for i in range(1, 10): - df[i, "colA"] = 10 - df = df._append(df2, ignore_index=True) - result = df["multi"] - expected = DataFrame( - {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} - ) - tm.assert_frame_equal(result, expected) - - def test_append_raises_future_warning(self): - # GH#35407 - df1 = DataFrame([[1, 2], [3, 4]]) - df2 = DataFrame([[5, 6], [7, 8]]) - with tm.assert_produces_warning(FutureWarning): - df1.append(df2) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 8e6aa43ff434c..96ef49acdcb21 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -418,51 +418,45 @@ def test_astype_to_datetime_unit(self, unit): idx = pd.Index(ser) dta = ser._values - result = df.astype(dtype) - if unit in ["ns", "us", "ms", "s"]: # GH#48928 - exp_dtype = dtype + result = df.astype(dtype) else: # we use the nearest supported dtype (i.e. M8[s]) - exp_dtype = "M8[s]" - # TODO(2.0): once DataFrame constructor doesn't cast ndarray inputs. - # can simplify this - exp_values = arr.astype(exp_dtype) - exp_dta = pd.core.arrays.DatetimeArray._simple_new( - exp_values, dtype=exp_values.dtype - ) - exp_df = DataFrame(exp_dta) - assert (exp_df.dtypes == exp_dtype).all() + msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]" + with pytest.raises(TypeError, match=msg): + df.astype(dtype) + + with pytest.raises(TypeError, match=msg): + ser.astype(dtype) + with pytest.raises(TypeError, match=msg.replace("Array", "Index")): + idx.astype(dtype) + + with pytest.raises(TypeError, match=msg): + dta.astype(dtype) + + return + + exp_df = DataFrame(arr.astype(dtype)) + assert (exp_df.dtypes == dtype).all() tm.assert_frame_equal(result, exp_df) - # TODO(2.0): make Series/DataFrame raise like Index and DTA? res_ser = ser.astype(dtype) exp_ser = exp_df.iloc[:, 0] - assert exp_ser.dtype == exp_dtype + assert exp_ser.dtype == dtype tm.assert_series_equal(res_ser, exp_ser) - if unit in ["ns", "us", "ms", "s"]: - exp_dta = exp_ser._values + exp_dta = exp_ser._values - res_index = idx.astype(dtype) - # TODO(2.0): should be able to just call pd.Index(exp_ser) - exp_index = pd.DatetimeIndex._simple_new(exp_dta, name=idx.name) - assert exp_index.dtype == exp_dtype - tm.assert_index_equal(res_index, exp_index) + res_index = idx.astype(dtype) + exp_index = pd.Index(exp_ser) + assert exp_index.dtype == dtype + tm.assert_index_equal(res_index, exp_index) - res_dta = dta.astype(dtype) - assert exp_dta.dtype == exp_dtype - tm.assert_extension_array_equal(res_dta, exp_dta) - else: - msg = rf"Cannot cast DatetimeIndex to dtype datetime64\[{unit}\]" - with pytest.raises(TypeError, match=msg): - idx.astype(dtype) - - msg = rf"Cannot cast DatetimeArray to dtype datetime64\[{unit}\]" - with pytest.raises(TypeError, match=msg): - dta.astype(dtype) + res_dta = dta.astype(dtype) + assert exp_dta.dtype == dtype + tm.assert_extension_array_equal(res_dta, exp_dta) @pytest.mark.parametrize("unit", ["ns"]) def test_astype_to_timedelta_unit_ns(self, unit): @@ -483,22 +477,35 @@ def test_astype_to_timedelta_unit(self, unit): dtype = f"m8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) + ser = df.iloc[:, 0] + tdi = pd.Index(ser) + tda = tdi._values + if unit in ["us", "ms", "s"]: assert (df.dtypes == dtype).all() + result = df.astype(dtype) else: # We get the nearest supported unit, i.e. "s" assert (df.dtypes == "m8[s]").all() - result = df.astype(dtype) - if unit in ["m", "h", "D"]: - # We don't support these, so we use the pre-2.0 logic to convert to float - # (xref GH#48979) - - expected = DataFrame(df.values.astype(dtype).astype(float)) - else: - # The conversion is a no-op, so we just get a copy - expected = df + msg = ( + rf"Cannot convert from timedelta64\[s\] to timedelta64\[{unit}\]. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) + with pytest.raises(ValueError, match=msg): + df.astype(dtype) + with pytest.raises(ValueError, match=msg): + ser.astype(dtype) + with pytest.raises(ValueError, match=msg): + tdi.astype(dtype) + with pytest.raises(ValueError, match=msg): + tda.astype(dtype) + + return + result = df.astype(dtype) + # The conversion is a no-op, so we just get a copy + expected = df tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 25ef49718fbe7..445b90327ed2c 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -107,9 +107,9 @@ class TestDataFrameCorr: @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) @td.skip_if_no_scipy def test_corr_scipy_method(self, float_frame, method): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - float_frame["A"][:10] = float_frame["A"][10:20] + float_frame.loc[float_frame.index[:5], "A"] = np.nan + float_frame.loc[float_frame.index[5:10], "B"] = np.nan + float_frame.loc[float_frame.index[:10], "A"] = float_frame["A"][10:20] correls = float_frame.corr(method=method) expected = float_frame["A"].corr(float_frame["C"], method=method) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 24d327a101143..e2b8a0f63c31a 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -274,12 +274,12 @@ def test_describe_tz_values(self, tz_naive_fixture): }, index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"], ) - result = df.describe(include="all", datetime_is_numeric=True) + result = df.describe(include="all") tm.assert_frame_equal(result, expected) def test_datetime_is_numeric_includes_datetime(self): df = DataFrame({"a": date_range("2012", periods=3), "b": [1, 2, 3]}) - result = df.describe(datetime_is_numeric=True) + result = df.describe() expected = DataFrame( { "a": [ @@ -307,36 +307,22 @@ def test_describe_tz_values2(self): df = DataFrame({"s1": s1, "s2": s2}) s1_ = s1.describe() - s2_ = Series( - [ - 5, - 5, - s2.value_counts().index[0], - 1, - start.tz_localize(tz), - end.tz_localize(tz), - ], - index=["count", "unique", "top", "freq", "first", "last"], - ) + s2_ = s2.describe() idx = [ "count", - "unique", - "top", - "freq", - "first", - "last", "mean", - "std", "min", "25%", "50%", "75%", "max", + "std", ] - expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx] + expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).reindex( + idx, copy=False + ) - with tm.assert_produces_warning(FutureWarning): - result = df.describe(include="all") + result = df.describe(include="all") tm.assert_frame_equal(result, expected) def test_describe_percentiles_integer_idx(self): diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 6e5b97af7c297..e6db7ec8ed3d7 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -510,18 +510,6 @@ def test_drop_with_duplicate_columns2(self): result = df2.drop("C", axis=1) tm.assert_frame_equal(result, expected) - def test_drop_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"In a future version of pandas all arguments of DataFrame\.drop " - r"except for the argument 'labels' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.drop("a", 1) - expected = DataFrame(index=[0, 1, 2]) - tm.assert_frame_equal(result, expected) - def test_drop_inplace_no_leftover_column_reference(self): # GH 13934 df = DataFrame({"a": [1, 2, 3]}) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index cd61f59a85d1e..988d8e3b6f13f 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -472,17 +472,3 @@ def test_drop_duplicates_non_boolean_ignore_index(arg): msg = '^For argument "ignore_index" expected type bool, received type .*.$' with pytest.raises(ValueError, match=msg): df.drop_duplicates(ignore_index=arg) - - -def test_drop_duplicates_pos_args_deprecation(): - # GH#41485 - df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) - msg = ( - "In a future version of pandas all arguments of " - "DataFrame.drop_duplicates except for the argument 'subset' " - "will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.drop_duplicates(["b", "c"], "last") - expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2]) - tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 62351aa89c914..8c4d9499e3676 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -231,18 +231,6 @@ def test_dropna_with_duplicate_columns(self): result = df.dropna(subset=["A", "C"], how="all") tm.assert_frame_equal(result, expected) - def test_dropna_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"In a future version of pandas all arguments of DataFrame\.dropna " - r"will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.dropna(1) - expected = DataFrame({"a": [1, 2, 3]}) - tm.assert_frame_equal(result, expected) - def test_set_single_column_subset(self): # GH 41021 df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.NaN, 5]}) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 8716a181120f6..6d9874dc58c17 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -18,7 +20,10 @@ def test_error(): df.explode(list("AA")) df.columns = list("AA") - with pytest.raises(ValueError, match="columns must be unique"): + with pytest.raises( + ValueError, + match=re.escape("DataFrame columns must be unique. Duplicate columns: ['A']"), + ): df.explode("A") diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 869cd32aa9ef9..a3424f09f334c 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -392,16 +392,16 @@ def test_fillna_datetime_columns(self): tm.assert_frame_equal(result, expected) def test_ffill(self, datetime_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan tm.assert_frame_equal( datetime_frame.ffill(), datetime_frame.fillna(method="ffill") ) def test_bfill(self, datetime_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan tm.assert_frame_equal( datetime_frame.bfill(), datetime_frame.fillna(method="bfill") @@ -467,8 +467,8 @@ def test_fillna_integer_limit(self, type): def test_fillna_inplace(self): df = DataFrame(np.random.randn(10, 4)) - df[1][:4] = np.nan - df[3][-4:] = np.nan + df.loc[:4, 1] = np.nan + df.loc[-4:, 3] = np.nan expected = df.fillna(value=0) assert expected is not df @@ -479,8 +479,8 @@ def test_fillna_inplace(self): expected = df.fillna(value={0: 0}, inplace=True) assert expected is None - df[1][:4] = np.nan - df[3][-4:] = np.nan + df.loc[:4, 1] = np.nan + df.loc[-4:, 3] = np.nan expected = df.fillna(method="ffill") assert expected is not df @@ -611,18 +611,6 @@ def test_fillna_downcast_dict(self): expected = DataFrame({"col1": [1, 2]}) tm.assert_frame_equal(result, expected) - def test_fillna_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3, np.nan]}, dtype=float) - msg = ( - r"In a future version of pandas all arguments of DataFrame.fillna " - r"except for the argument 'value' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.fillna(0, None, None) - expected = DataFrame({"a": [1, 2, 3, 0]}, dtype=float) - tm.assert_frame_equal(result, expected) - def test_fillna_with_columns_and_limit(self): # GH40989 df = DataFrame( diff --git a/pandas/tests/frame/methods/test_first_valid_index.py b/pandas/tests/frame/methods/test_first_valid_index.py index e4cbd892de38e..6009851bab643 100644 --- a/pandas/tests/frame/methods/test_first_valid_index.py +++ b/pandas/tests/frame/methods/test_first_valid_index.py @@ -7,7 +7,6 @@ from pandas import ( DataFrame, Series, - date_range, ) import pandas._testing as tm @@ -73,22 +72,3 @@ def test_first_last_valid_all_nan(self, index_func): ser = frame["foo"] assert ser.first_valid_index() is None assert ser.last_valid_index() is None - - @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") - def test_first_last_valid_preserves_freq(self): - # GH#20499: its preserves freq with holes - index = date_range("20110101", periods=30, freq="B") - frame = DataFrame(np.nan, columns=["foo"], index=index) - - frame.iloc[1] = 1 - frame.iloc[-2] = 1 - assert frame.first_valid_index() == frame.index[1] - assert frame.last_valid_index() == frame.index[-2] - assert frame.first_valid_index().freq == frame.index.freq - assert frame.last_valid_index().freq == frame.index.freq - - ts = frame["foo"] - assert ts.first_valid_index() == ts.index[1] - assert ts.last_valid_index() == ts.index[-2] - assert ts.first_valid_index().freq == ts.index.freq - assert ts.last_valid_index().freq == ts.index.freq diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 6543fd8efdf1b..00fdfe373f1d8 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -397,15 +397,3 @@ def test_interp_fillna_methods(self, request, axis, method, using_array_manager) expected = df.fillna(axis=axis, method=method) result = df.interpolate(method=method, axis=axis) tm.assert_frame_equal(result, expected) - - def test_interpolate_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"In a future version of pandas all arguments of DataFrame.interpolate " - r"except for the argument 'method' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.interpolate("pad", 0) - expected = DataFrame({"a": [1, 2, 3]}) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 7db26f7eb570b..9a4837939aceb 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -143,7 +143,18 @@ def test_suffix_on_list_join(): def test_join_invalid_validate(left_no_dup, right_no_dup): # GH 46622 # Check invalid arguments - msg = "Not a valid argument for validate" + msg = ( + '"invalid" is not a valid argument. ' + "Valid arguments are:\n" + '- "1:1"\n' + '- "1:m"\n' + '- "m:1"\n' + '- "m:m"\n' + '- "one_to_one"\n' + '- "one_to_many"\n' + '- "many_to_one"\n' + '- "many_to_many"' + ) with pytest.raises(ValueError, match=msg): left_no_dup.merge(right_no_dup, on="a", validate="invalid") @@ -516,8 +527,9 @@ def test_join_multiindex_dates(self): tm.assert_equal(result, expected) - def test_merge_join_different_levels(self): + def test_merge_join_different_levels_raises(self): # GH#9455 + # GH 40993: For raising, enforced in 2.0 # first dataframe df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]]) @@ -527,20 +539,16 @@ def test_merge_join_different_levels(self): df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) # merge - columns = ["a", "b", ("c", "c1")] - expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) - with tm.assert_produces_warning(FutureWarning): - result = pd.merge(df1, df2, on="a") - tm.assert_frame_equal(result, expected) + with pytest.raises( + MergeError, match="Not allowed to merge between different levels" + ): + pd.merge(df1, df2, on="a") # join, see discussion in GH#12219 - columns = ["a", "b", ("a", ""), ("c", "c1")] - expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]]) - msg = "merging between different levels is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - # stacklevel is chosen to be correct for pd.merge, not DataFrame.join - result = df1.join(df2, on="a") - tm.assert_frame_equal(result, expected) + with pytest.raises( + MergeError, match="Not allowed to merge between different levels" + ): + df1.join(df2, on="a") def test_frame_join_tzaware(self): test1 = DataFrame( diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 7b2f7908673e3..1f5cb95885004 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -43,10 +43,10 @@ def test_rank(self, float_frame): import scipy.stats # noqa:F401 from scipy.stats import rankdata - float_frame["A"][::2] = np.nan - float_frame["B"][::3] = np.nan - float_frame["C"][::4] = np.nan - float_frame["D"][::5] = np.nan + float_frame.loc[::2, "A"] = np.nan + float_frame.loc[::3, "B"] = np.nan + float_frame.loc[::4, "C"] = np.nan + float_frame.loc[::5, "D"] = np.nan ranks0 = float_frame.rank() ranks1 = float_frame.rank(1) @@ -148,10 +148,10 @@ def test_rank_na_option(self, float_frame): import scipy.stats # noqa:F401 from scipy.stats import rankdata - float_frame["A"][::2] = np.nan - float_frame["B"][::3] = np.nan - float_frame["C"][::4] = np.nan - float_frame["D"][::5] = np.nan + float_frame.loc[::2, "A"] = np.nan + float_frame.loc[::3, "B"] = np.nan + float_frame.loc[::4, "C"] = np.nan + float_frame.loc[::5, "D"] = np.nan # bottom ranks0 = float_frame.rank(na_option="bottom") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 39d6bedf8d5ec..626bc658b199c 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -29,8 +29,8 @@ def mix_abc() -> dict[str, list[float | str]]: class TestDataFrameReplace: def test_replace_inplace(self, datetime_frame, float_string_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan tsframe = datetime_frame.copy() return_value = tsframe.replace(np.nan, 0, inplace=True) @@ -420,16 +420,16 @@ def test_regex_replace_string_types( tm.assert_equal(result, expected) def test_replace(self, datetime_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan zero_filled = datetime_frame.replace(np.nan, -1e8) tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan - datetime_frame["B"][:5] = -1e8 + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "B"] = -1e8 # empty df = DataFrame(index=["a", "b"]) @@ -716,16 +716,16 @@ def test_replace_for_new_dtypes(self, datetime_frame): # dtypes tsframe = datetime_frame.copy().astype(np.float32) - tsframe["A"][:5] = np.nan - tsframe["A"][-5:] = np.nan + tsframe.loc[tsframe.index[:5], "A"] = np.nan + tsframe.loc[tsframe.index[-5:], "A"] = np.nan zero_filled = tsframe.replace(np.nan, -1e8) tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) - tsframe["A"][:5] = np.nan - tsframe["A"][-5:] = np.nan - tsframe["B"][:5] = -1e8 + tsframe.loc[tsframe.index[:5], "A"] = np.nan + tsframe.loc[tsframe.index[-5:], "A"] = np.nan + tsframe.loc[tsframe.index[:5], "B"] = -1e8 b = tsframe["B"] b[b == -1e8] = np.nan @@ -1162,20 +1162,15 @@ def test_replace_datetimetz(self): result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern")) tm.assert_frame_equal(result, expected) - # coerce to object + # pre-2.0 this would coerce to object with mismatched tzs result = df.copy() result.iloc[1, 0] = np.nan - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = result.replace( - {"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific") - ) + result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific")) expected = DataFrame( { "A": [ Timestamp("20130101", tz="US/Eastern"), - Timestamp("20130104", tz="US/Pacific"), - # once deprecation is enforced - # Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), + Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), Timestamp("20130103", tz="US/Eastern"), ], "B": [0, np.nan, 2], @@ -1496,6 +1491,18 @@ def test_replace_list_with_mixed_type( result = obj.replace(box(to_replace), value) tm.assert_equal(result, expected) + @pytest.mark.parametrize("val", [2, np.nan, 2.0]) + def test_replace_value_none_dtype_numeric(self, val): + # GH#48231 + df = DataFrame({"a": [1, val]}) + result = df.replace(val, None) + expected = DataFrame({"a": [1, None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1, val]}) + result = df.replace({val: None}) + tm.assert_frame_equal(result, expected) + class TestDataFrameReplaceRegex: @pytest.mark.parametrize( diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 37431bc291b76..e03fa317fded9 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -479,7 +479,6 @@ def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicate with pytest.raises(ValueError, match="expected type bool"): multiindex_df.reset_index(allow_duplicates=allow_duplicates) - @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 tz = tz_naive_fixture @@ -730,19 +729,6 @@ def test_reset_index_multiindex_nat(): tm.assert_frame_equal(result, expected) -def test_drop_pos_args_deprecation(): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3]}).set_index("a") - msg = ( - r"In a future version of pandas all arguments of DataFrame\.reset_index " - r"except for the argument 'level' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.reset_index("a", False) - expected = DataFrame({"a": [1, 2, 3]}) - tm.assert_frame_equal(result, expected) - - def test_reset_index_interval_columns_object_cast(): # GH 19136 df = DataFrame( diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 8e597e1e9fa69..7efd4434f8412 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -16,14 +16,9 @@ def obj(self): def test_set_axis(self, obj): # GH14636; this tests setting index for both Series and DataFrame new_index = list("abcd")[: len(obj)] - expected = obj.copy() expected.index = new_index - - # inplace=False - msg = "set_axis 'inplace' keyword is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = obj.set_axis(new_index, axis=0, inplace=False) + result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) def test_set_axis_copy(self, obj): @@ -34,12 +29,6 @@ def test_set_axis_copy(self, obj): expected = obj.copy() expected.index = new_index - with pytest.raises( - ValueError, match="Cannot specify both inplace=True and copy=True" - ): - with tm.assert_produces_warning(FutureWarning): - obj.set_axis(new_index, axis=0, inplace=True, copy=True) - result = obj.set_axis(new_index, axis=0, copy=True) tm.assert_equal(expected, result) assert result is not obj @@ -77,40 +66,17 @@ def test_set_axis_copy(self, obj): for i in range(obj.shape[1]) ) - # Do this last since it alters obj inplace - with tm.assert_produces_warning(FutureWarning): - res = obj.set_axis(new_index, inplace=True, copy=False) - assert res is None - tm.assert_equal(expected, obj) + res = obj.set_axis(new_index, copy=False) + tm.assert_equal(expected, res) # check we did NOT make a copy - if obj.ndim == 1: - assert tm.shares_memory(obj, orig) + if res.ndim == 1: + assert tm.shares_memory(res, orig) else: assert all( - tm.shares_memory(obj.iloc[:, i], orig.iloc[:, i]) - for i in range(obj.shape[1]) + tm.shares_memory(res.iloc[:, i], orig.iloc[:, i]) + for i in range(res.shape[1]) ) - @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) - def test_set_axis_inplace_axis(self, axis, obj): - # GH#14636 - if obj.ndim == 1 and axis in [1, "columns"]: - # Series only has [0, "index"] - return - - new_index = list("abcd")[: len(obj)] - - expected = obj.copy() - if axis in [0, "index"]: - expected.index = new_index - else: - expected.columns = new_index - - result = obj.copy() - with tm.assert_produces_warning(FutureWarning): - result.set_axis(new_index, axis=axis, inplace=True) - tm.assert_equal(result, expected) - def test_set_axis_unnamed_kwarg_warns(self, obj): # omitting the "axis" parameter new_index = list("abcd")[: len(obj)] @@ -118,10 +84,7 @@ def test_set_axis_unnamed_kwarg_warns(self, obj): expected = obj.copy() expected.index = new_index - with tm.assert_produces_warning( - FutureWarning, match="set_axis 'inplace' keyword" - ): - result = obj.set_axis(new_index, inplace=False) + result = obj.set_axis(new_index) tm.assert_equal(result, expected) @pytest.mark.parametrize("axis", [3, "foo"]) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 4c39cf99f18ff..8e5f11840fbe5 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -704,15 +704,3 @@ def test_set_index_periodindex(self): tm.assert_index_equal(df.index, idx1) df = df.set_index(idx2) tm.assert_index_equal(df.index, idx2) - - def test_drop_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"In a future version of pandas all arguments of DataFrame\.set_index " - r"except for the argument 'keys' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.set_index("a", True) - expected = DataFrame(index=Index([1, 2, 3], name="a")) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index f76deca9048be..3b33d0cc80445 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -505,26 +505,19 @@ def test_shift_dt64values_int_fill_deprecated(self): # GH#31971 ser = Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) - with tm.assert_produces_warning(FutureWarning): - result = ser.shift(1, fill_value=0) - expected = Series([pd.Timestamp(0), ser[0]]) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="value should be a"): + ser.shift(1, fill_value=0) df = ser.to_frame() - with tm.assert_produces_warning(FutureWarning): - result = df.shift(1, fill_value=0) - expected = expected.to_frame() - tm.assert_frame_equal(result, expected) + with pytest.raises(TypeError, match="value should be a"): + df.shift(1, fill_value=0) # axis = 1 df2 = DataFrame({"A": ser, "B": ser}) df2._consolidate_inplace() - with tm.assert_produces_warning(FutureWarning): - result = df2.shift(1, axis=1, fill_value=0) - - expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) - tm.assert_frame_equal(result, expected) + with pytest.raises(TypeError, match="value should be a"): + df2.shift(1, axis=1, fill_value=0) # same thing but not consolidated # This isn't great that we get different behavior, but diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 5d1cc3d4ecee5..806abc1e7c012 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -913,15 +913,3 @@ def test_sort_index_multiindex_sparse_column(self): result = expected.sort_index(level=0) tm.assert_frame_equal(result, expected) - - def test_sort_index_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"In a future version of pandas all arguments of DataFrame.sort_index " - r"will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.sort_index(1) - expected = DataFrame({"a": [1, 2, 3]}) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 1933278efb443..7487b2c70a264 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -35,7 +35,7 @@ def read_csv(self, path, **kwargs): def test_to_csv_from_csv1(self, float_frame, datetime_frame): with tm.ensure_clean("__tmp_to_csv_from_csv1__") as path: - float_frame["A"][:5] = np.nan + float_frame.iloc[:5, float_frame.columns.get_loc("A")] = np.nan float_frame.to_csv(path) float_frame.to_csv(path, columns=["A", "B"]) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 613f7147a4a7d..521f6ead2e69e 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -80,11 +80,10 @@ def test_to_dict_invalid_orient(self): df.to_dict(orient="xinvalid") @pytest.mark.parametrize("orient", ["d", "l", "r", "sp", "s", "i"]) - def test_to_dict_short_orient_warns(self, orient): + def test_to_dict_short_orient_raises(self, orient): # GH#32515 df = DataFrame({"A": [0, 1]}) - msg = "Using short name for 'orient' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match="not understood"): df.to_dict(orient=orient) @pytest.mark.parametrize("mapping", [dict, defaultdict(list), OrderedDict]) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 9fc3161a7b004..822e761f63ae8 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -31,9 +31,7 @@ ) -@pytest.fixture( - autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] -) +@pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"]) def switch_numexpr_min_elements(request): _MIN_ELEMENTS = expr._MIN_ELEMENTS expr._MIN_ELEMENTS = request.param diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 7d33e8b6b0fd1..57e498defccc1 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -16,7 +16,6 @@ DataFrame, Series, Timestamp, - compat, date_range, option_context, ) @@ -259,16 +258,15 @@ def f(dtype): with pytest.raises(NotImplementedError, match=msg): f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) - # these work (though results may be unexpected) - depr_msg = "either all columns will be cast to that dtype, or a TypeError will" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): + # pre-2.0 these used to work (though results may be unexpected) + with pytest.raises(TypeError, match="argument must be"): f("int64") - with tm.assert_produces_warning(FutureWarning, match=depr_msg): + with pytest.raises(TypeError, match="argument must be"): f("float64") # 10822 - # invalid error message on dt inference - if not compat.is_platform_windows(): + msg = "Unknown string format: aa present at position 0" + with pytest.raises(ValueError, match=msg): f("M8[ns]") def test_pickle(self, float_string_frame, timezone_frame): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f9cfb0b81a7bd..d14f419888023 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -14,8 +14,8 @@ import warnings import numpy as np -import numpy.ma as ma -import numpy.ma.mrecords as mrecords +from numpy import ma +from numpy.ma import mrecords import pytest import pytz @@ -141,13 +141,8 @@ def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): if frame_or_series is DataFrame: arr = arr.reshape(1, 1) - msg = "|".join( - [ - "Could not convert object to NumPy timedelta", - "Invalid type for timedelta scalar: ", - ] - ) - with pytest.raises(ValueError, match=msg): + msg = "Invalid type for timedelta scalar: " + with pytest.raises(TypeError, match=msg): frame_or_series(arr, dtype="m8[ns]") @pytest.mark.parametrize("kind", ["m", "M"]) @@ -245,10 +240,11 @@ def test_constructor_mixed(self, float_string_frame): assert float_string_frame["foo"].dtype == np.object_ def test_constructor_cast_failure(self): - msg = "either all columns will be cast to that dtype, or a TypeError will" - with tm.assert_produces_warning(FutureWarning, match=msg): - foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) - assert foo["a"].dtype == object + # as of 2.0, we raise if we can't respect "dtype", previously we + # silently ignored + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) # GH 3010, constructing with odd arrays df = DataFrame(np.ones((4, 2))) @@ -717,7 +713,8 @@ def test_constructor_defaultdict(self, float_frame): from collections import defaultdict data = {} - float_frame["B"][:10] = np.nan + float_frame.loc[: float_frame.index[10], "B"] = np.nan + for k, v in float_frame.items(): dct = defaultdict(dict) dct.update(v.to_dict()) @@ -753,13 +750,8 @@ def test_constructor_dict_cast2(self): "A": dict(zip(range(20), tm.makeStringIndex(20))), "B": dict(zip(range(15), np.random.randn(15))), } - msg = "either all columns will be cast to that dtype, or a TypeError will" - with tm.assert_produces_warning(FutureWarning, match=msg): - frame = DataFrame(test_data, dtype=float) - - assert len(frame) == 20 - assert frame["A"].dtype == np.object_ - assert frame["B"].dtype == np.float64 + with pytest.raises(ValueError, match="could not convert string"): + DataFrame(test_data, dtype=float) def test_constructor_dict_dont_upcast(self): d = {"Col1": {"Row1": "A String", "Row2": np.nan}} @@ -2207,7 +2199,9 @@ def test_constructor_series_copy(self, float_frame): series = float_frame._series df = DataFrame({"A": series["A"]}, copy=True) - df["A"][:] = 5 + # TODO can be replaced with `df.loc[:, "A"] = 5` after deprecation about + # inplace mutation is enforced + df.loc[df.index[0] : df.index[-1], "A"] = 5 assert not (series["A"] == 5).all() @@ -2788,13 +2782,14 @@ def test_floating_values_integer_dtype(self): arr = np.random.randn(10, 5) - msg = "if they cannot be cast losslessly" - with tm.assert_produces_warning(FutureWarning, match=msg): - DataFrame(arr, dtype="i8") + # as of 2.0, we match Series behavior by retaining float dtype instead + # of doing a lossy conversion here. Below we _do_ do the conversion + # since it is lossless. + df = DataFrame(arr, dtype="i8") + assert (df.dtypes == "f8").all() - with tm.assert_produces_warning(None): - # if they can be cast losslessly, no warning - DataFrame(arr.round(), dtype="i8") + df = DataFrame(arr.round(), dtype="i8") + assert (df.dtypes == "i8").all() # with NaNs, we go through a different path with a different warning arr[0, 0] = np.nan diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 4da57fc177712..e81837898c927 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1112,7 +1112,7 @@ def test_invalid_type_for_operator_raises(self, parser, engine, op): class TestDataFrameQueryBacktickQuoting: - @pytest.fixture(scope="class") + @pytest.fixture def df(self): """ Yields a dataframe with strings that may or may not need escaping diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 744d06d6cf339..8d4d705296f35 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -28,8 +28,10 @@ to_timedelta, ) import pandas._testing as tm -import pandas.core.algorithms as algorithms -import pandas.core.nanops as nanops +from pandas.core import ( + algorithms, + nanops, +) def assert_stat_op_calc( @@ -72,14 +74,13 @@ def assert_stat_op_calc( f = getattr(frame, opname) if check_dates: - expected_warning = FutureWarning if opname in ["mean", "median"] else None df = DataFrame({"b": date_range("1/1/2001", periods=2)}) - with tm.assert_produces_warning(expected_warning): + with tm.assert_produces_warning(None): result = getattr(df, opname)() assert isinstance(result, Series) df["a"] = range(len(df)) - with tm.assert_produces_warning(expected_warning): + with tm.assert_produces_warning(None): result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) @@ -168,7 +169,15 @@ class TestDataFrameAnalytics: ], ) def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): - getattr(float_string_frame, opname)(axis=axis) + if opname in ["sum", "min", "max"] and axis == 0: + warn = None + elif opname not in ["count", "nunique"]: + warn = FutureWarning + else: + warn = None + msg = "nuisance columns|default value of numeric_only" + with tm.assert_produces_warning(warn, match=msg): + getattr(float_string_frame, opname)(axis=axis) if opname != "nunique": getattr(float_string_frame, opname)(axis=axis, numeric_only=True) @@ -219,8 +228,7 @@ def sem(x): check_dates=True, ) - # GH#32571 check_less_precise is needed on apparently-random - # py37-npdev builds and OSX-PY36-min_version builds + # GH#32571: rol needed for flaky CI builds # mixed types (with upcasting happening) assert_stat_op_calc( "sum", @@ -374,21 +382,19 @@ def test_nunique(self): def test_mean_mixed_datetime_numeric(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2}) - with tm.assert_produces_warning(FutureWarning): - result = df.mean() - expected = Series([1.0], index=["A"]) + result = df.mean() + expected = Series([1.0, Timestamp("2000", tz=tz)], index=["A", "B"]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) - def test_mean_excludes_datetimes(self, tz): + def test_mean_includes_datetimes(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 - # Our long-term desired behavior is unclear, but the behavior in - # 0.24.0rc1 was buggy. + # Behavior in 0.24.0rc1 was buggy. + # As of 2.0 with numeric_only=None we do *not* drop datetime columns df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2}) - with tm.assert_produces_warning(FutureWarning): - result = df.mean() + result = df.mean() - expected = Series(dtype=np.float64) + expected = Series([Timestamp("2000", tz=tz)], index=["A"]) tm.assert_series_equal(result, expected) def test_mean_mixed_string_decimal(self): @@ -841,6 +847,7 @@ def test_mean_corner(self, float_frame, float_string_frame): def test_mean_datetimelike(self): # GH#24757 check that datetimelike are excluded by default, handled # correctly with numeric_only=True + # As of 2.0, datetimelike are *not* excluded with numeric_only=None df = DataFrame( { @@ -854,10 +861,9 @@ def test_mean_datetimelike(self): expected = Series({"A": 1.0}) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - # in the future datetime columns will be included + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): result = df.mean() - expected = Series({"A": 1.0, "C": df.loc[1, "C"]}) + expected = Series({"A": 1.0, "B": df.loc[1, "B"], "C": df.loc[1, "C"]}) tm.assert_series_equal(result, expected) def test_mean_datetimelike_numeric_only_false(self): @@ -1272,7 +1278,6 @@ def test_any_all_object(self): assert result is False def test_any_all_object_bool_only(self): - msg = "object-dtype columns with all-bool values" df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object) df._consolidate_inplace() @@ -1283,36 +1288,29 @@ def test_any_all_object_bool_only(self): # The underlying bug is in DataFrame._get_bool_data, so we check # that while we're here - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df._get_bool_data() - expected = df[["B", "C"]] + res = df._get_bool_data() + expected = df[["C"]] tm.assert_frame_equal(res, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.all(bool_only=True, axis=0) - expected = Series([False, True], index=["B", "C"]) + res = df.all(bool_only=True, axis=0) + expected = Series([True], index=["C"]) tm.assert_series_equal(res, expected) # operating on a subset of columns should not produce a _larger_ Series - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df[["B", "C"]].all(bool_only=True, axis=0) + res = df[["B", "C"]].all(bool_only=True, axis=0) tm.assert_series_equal(res, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - assert not df.all(bool_only=True, axis=None) + assert df.all(bool_only=True, axis=None) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.any(bool_only=True, axis=0) - expected = Series([True, True], index=["B", "C"]) + res = df.any(bool_only=True, axis=0) + expected = Series([True], index=["C"]) tm.assert_series_equal(res, expected) # operating on a subset of columns should not produce a _larger_ Series - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df[["B", "C"]].any(bool_only=True, axis=0) + res = df[["C"]].any(bool_only=True, axis=0) tm.assert_series_equal(res, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - assert df.any(bool_only=True, axis=None) + assert df.any(bool_only=True, axis=None) @pytest.mark.parametrize("method", ["any", "all"]) def test_any_all_level_axis_none_raises(self, method): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index d5331b1060b23..b385091c9ff51 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -138,11 +138,11 @@ def test_subclass_attr_err_propagation(self): # GH 11808 class A(DataFrame): @property - def bar(self): + def nonexistence(self): return self.i_dont_exist with pytest.raises(AttributeError, match=".*i_dont_exist.*"): - A().bar + A().nonexistence def test_subclass_align(self): # GH 12983 diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index c9036958cbd74..d6d5c29e6d888 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -414,7 +414,6 @@ def test_dataframe_insert_raises(): "method, frame_only", [ (operator.methodcaller("set_index", "A", inplace=True), True), - (operator.methodcaller("set_axis", ["A", "B"], inplace=True), False), (operator.methodcaller("reset_index", inplace=True), True), (operator.methodcaller("rename", lambda x: x, inplace=True), False), ], @@ -427,19 +426,11 @@ def test_inplace_raises(method, frame_only): s.flags.allows_duplicate_labels = False msg = "Cannot specify" - warn_msg = "Series.set_axis 'inplace' keyword" - if "set_axis" in str(method): - warn = FutureWarning - else: - warn = None - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(warn, match=warn_msg): - method(df) + method(df) if not frame_only: with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(warn, match=warn_msg): - method(s) + method(s) def test_pickle(): diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 7634f783117d6..689caffe98a2d 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -161,26 +161,6 @@ pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x)) ), - pytest.param( - ( - pd.DataFrame, - frame_data, - operator.methodcaller("append", pd.DataFrame({"A": [1]})), - ), - marks=pytest.mark.filterwarnings( - "ignore:.*append method is deprecated.*:FutureWarning" - ), - ), - pytest.param( - ( - pd.DataFrame, - frame_data, - operator.methodcaller("append", pd.DataFrame({"B": [1]})), - ), - marks=pytest.mark.filterwarnings( - "ignore:.*append method is deprecated.*:FutureWarning" - ), - ), pytest.param( ( pd.DataFrame, diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ad7368a69c0f5..8ffc49cd25915 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -283,15 +283,15 @@ def test_aggregate_item_by_item(df): aggfun_0 = lambda ser: ser.size result = grouped.agg(aggfun_0) - foo = (df.A == "foo").sum() - bar = (df.A == "bar").sum() + foosum = (df.A == "foo").sum() + barsum = (df.A == "bar").sum() K = len(result.columns) # GH5782 - exp = Series(np.array([foo] * K), index=list("BCD"), name="foo") + exp = Series(np.array([foosum] * K), index=list("BCD"), name="foo") tm.assert_series_equal(result.xs("foo"), exp) - exp = Series(np.array([bar] * K), index=list("BCD"), name="bar") + exp = Series(np.array([barsum] * K), index=list("BCD"), name="bar") tm.assert_almost_equal(result.xs("bar"), exp) def aggfun_1(ser): @@ -308,8 +308,7 @@ def test_wrap_agg_out(three_group): def func(ser): if ser.dtype == object: raise TypeError - else: - return ser.sum() + return ser.sum() with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): result = grouped.aggregate(func) @@ -383,21 +382,18 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): def test_multiple_functions_tuples_and_non_tuples(df): # #1359 + # Columns B and C would cause partial failure + df = df.drop(columns=["B", "C"]) + funcs = [("foo", "mean"), "std"] ex_funcs = [("foo", "mean"), ("std", "std")] - result = df.groupby("A")["C"].agg(funcs) - expected = df.groupby("A")["C"].agg(ex_funcs) + result = df.groupby("A")["D"].agg(funcs) + expected = df.groupby("A")["D"].agg(ex_funcs) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning( - FutureWarning, match=r"\['B'\] did not aggregate successfully" - ): - result = df.groupby("A").agg(funcs) - with tm.assert_produces_warning( - FutureWarning, match=r"\['B'\] did not aggregate successfully" - ): - expected = df.groupby("A").agg(ex_funcs) + result = df.groupby("A").agg(funcs) + expected = df.groupby("A").agg(ex_funcs) tm.assert_frame_equal(result, expected) @@ -420,10 +416,10 @@ def test_more_flexible_frame_multi_function(df): expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) tm.assert_frame_equal(result, expected) - def foo(x): + def numpymean(x): return np.mean(x) - def bar(x): + def numpystd(x): return np.std(x, ddof=1) # this uses column selection & renaming @@ -433,7 +429,7 @@ def bar(x): grouped.aggregate(d) # But without renaming, these functions are OK - d = {"C": [np.mean], "D": [foo, bar]} + d = {"C": [np.mean], "D": [numpymean, numpystd]} grouped.aggregate(d) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index f84abecea37da..6740729d038a7 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -25,10 +25,8 @@ from pandas.io.formats.printing import pprint_thing -def test_agg_api(): - # GH 6337 - # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error - # different api for agg when passed custom function with mixed frame +def test_agg_partial_failure_raises(): + # GH#43741 df = DataFrame( { @@ -43,19 +41,11 @@ def test_agg_api(): def peak_to_peak(arr): return arr.max() - arr.min() - with tm.assert_produces_warning( - FutureWarning, - match=r"\['key2'\] did not aggregate successfully", - ): - expected = grouped.agg([peak_to_peak]) - expected.columns = ["data1", "data2"] - - with tm.assert_produces_warning( - FutureWarning, - match=r"\['key2'\] did not aggregate successfully", - ): - result = grouped.agg(peak_to_peak) - tm.assert_frame_equal(result, expected) + with pytest.raises(TypeError, match="unsupported operand type"): + grouped.agg([peak_to_peak]) + + with pytest.raises(TypeError, match="unsupported operand type"): + grouped.agg(peak_to_peak) def test_agg_datetimes_mixed(): diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 38de589d0c60c..034514cb0bcfb 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -74,6 +74,8 @@ def raw_frame(multiindex_dataframe_random_data): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.filterwarnings("ignore:Using the level keyword:FutureWarning") +@pytest.mark.filterwarnings("ignore:The default value of numeric_only:FutureWarning") def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): # GH6944 # GH 17537 diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0cd89a205bb82..935c39af8af3a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -151,7 +151,6 @@ def f_scalar(group): def f_none(group): # GH10519, GH12155, GH21417 names.append(group.name) - return None def f_constant_df(group): # GH2936, GH20084 diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index b40514568452c..c8aaf71fa419e 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -109,8 +109,7 @@ def test_filter_condition_raises(): def raise_if_sum_is_zero(x): if x.sum() == 0: raise ValueError - else: - return x.sum() > 0 + return x.sum() > 0 s = Series([-1, 0, 1, 2]) grouper = s.apply(lambda x: x % 2) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 2b583431dcd71..7a9d540ae08c4 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -17,7 +17,7 @@ date_range, ) import pandas._testing as tm -import pandas.core.nanops as nanops +from pandas.core import nanops from pandas.tests.groupby import get_groupby_method_args from pandas.util import _test_decorators as td diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 26f269d3d4384..392910bd9e598 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -28,7 +28,6 @@ import pandas._testing as tm from pandas.core.arrays import BooleanArray import pandas.core.common as com -from pandas.core.groupby.base import maybe_normalize_deprecated_kernels from pandas.tests.groupby import get_groupby_method_args @@ -502,6 +501,54 @@ def test_multi_key_multiple_functions(df): def test_frame_multi_key_function_list(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + grouped = data.groupby(["A", "B"]) + funcs = [np.mean, np.std] + agged = grouped.agg(funcs) + expected = pd.concat( + [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)], + keys=["D", "E", "F"], + axis=1, + ) + assert isinstance(agged.index, MultiIndex) + assert isinstance(expected.index, MultiIndex) + tm.assert_frame_equal(agged, expected) + + +def test_frame_multi_key_function_list_partial_failure(): data = DataFrame( { "A": [ @@ -551,18 +598,8 @@ def test_frame_multi_key_function_list(): grouped = data.groupby(["A", "B"]) funcs = [np.mean, np.std] - with tm.assert_produces_warning( - FutureWarning, match=r"\['C'\] did not aggregate successfully" - ): - agged = grouped.agg(funcs) - expected = pd.concat( - [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)], - keys=["D", "E", "F"], - axis=1, - ) - assert isinstance(agged.index, MultiIndex) - assert isinstance(expected.index, MultiIndex) - tm.assert_frame_equal(agged, expected) + with pytest.raises(TypeError, match="Could not convert dullshinyshiny to numeric"): + grouped.agg(funcs) @pytest.mark.parametrize("op", [lambda x: x.sum(), lambda x: x.mean()]) @@ -981,8 +1018,7 @@ def test_wrap_aggregated_output_multindex(mframe): def aggfun(ser): if ser.name == ("foo", "one"): raise TypeError - else: - return ser.sum() + return ser.sum() with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): agged2 = df.groupby(keys).aggregate(aggfun) @@ -1579,7 +1615,7 @@ def freduce(group): assert group.name is not None return group.sum() - def foo(x): + def freducex(x): return freduce(x) grouped = df.groupby(grouper, group_keys=False) @@ -1592,7 +1628,7 @@ def foo(x): grouped["C"].apply(f) grouped["C"].aggregate(freduce) - grouped["C"].aggregate([freduce, foo]) + grouped["C"].aggregate([freduce, freducex]) grouped["C"].transform(f) @@ -1873,6 +1909,7 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize( "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"] ) +@pytest.mark.filterwarnings("ignore:The default value of numeric_only:FutureWarning") @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager): @@ -2316,8 +2353,6 @@ def test_group_on_empty_multiindex(transformation_func, request): def test_dup_labels_output_shape(groupby_func, idx): if groupby_func in {"size", "ngroup", "cumcount"}: pytest.skip(f"Not applicable for {groupby_func}") - # TODO(2.0) Remove after pad/backfill deprecation enforced - groupby_func = maybe_normalize_deprecated_kernels(groupby_func) df = DataFrame([[1, 1]], columns=idx) grp_by = df.groupby([0]) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index b8aa2a1c9656d..eeedb3d6bb1d0 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -9,7 +9,6 @@ Series, ) import pandas._testing as tm -from pandas.core.groupby.base import maybe_normalize_deprecated_kernels from pandas.tests.groupby import get_groupby_method_args @@ -25,8 +24,6 @@ def test_groupby_preserves_subclass(obj, groupby_func): if isinstance(obj, Series) and groupby_func in {"corrwith"}: pytest.skip(f"Not applicable for Series and {groupby_func}") - # TODO(2.0) Remove after pad/backfill deprecation enforced - groupby_func = maybe_normalize_deprecated_kernels(groupby_func) grouped = obj.groupby(np.arange(0, 10)) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 728575a80f32f..1c8b8e3d33ecf 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -102,13 +102,13 @@ def test_getitem_numeric_column_names(self): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - # per GH 23566 this should raise a FutureWarning - with tm.assert_produces_warning(FutureWarning): + # per GH 23566 enforced deprecation raises a ValueError + with pytest.raises(ValueError, match="Cannot subset columns with a tuple"): df.groupby(0)[2, 4].mean() - def test_getitem_single_list_of_columns(self, df): - # per GH 23566 this should raise a FutureWarning - with tm.assert_produces_warning(FutureWarning): + def test_getitem_single_tuple_of_columns_raises(self, df): + # per GH 23566 enforced deprecation raises a ValueError + with pytest.raises(ValueError, match="Cannot subset columns with a tuple"): df.groupby("A")["C", "D"].mean() def test_getitem_single_column(self): diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 2b4eba539ec82..119b9929eea22 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -20,7 +20,6 @@ date_range, ) import pandas._testing as tm -from pandas.core.groupby.base import maybe_normalize_deprecated_kernels from pandas.core.groupby.generic import DataFrameGroupBy from pandas.tests.groupby import get_groupby_method_args @@ -166,9 +165,6 @@ def test_transform_broadcast(tsframe, ts): def test_transform_axis_1(request, transformation_func): # GH 36308 - # TODO(2.0) Remove after pad/backfill deprecation enforced - transformation_func = maybe_normalize_deprecated_kernels(transformation_func) - if transformation_func == "ngroup": msg = "ngroup fails with axis=1: #45986" request.node.add_marker(pytest.mark.xfail(reason=msg)) @@ -373,8 +369,6 @@ def test_transform_transformation_func(request, transformation_func): }, index=date_range("2020-01-01", "2020-01-07"), ) - # TODO(2.0) Remove after pad/backfill deprecation enforced - transformation_func = maybe_normalize_deprecated_kernels(transformation_func) if transformation_func == "cumcount": test_op = lambda x: x.transform("cumcount") mock_op = lambda x: Series(range(len(x)), x.index) diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py index df04502a01f99..cf8b7214f3b91 100644 --- a/pandas/tests/indexes/base_class/test_constructors.py +++ b/pandas/tests/indexes/base_class/test_constructors.py @@ -30,12 +30,6 @@ def test_construction_list_mixed_tuples(self, index_vals): assert isinstance(index, Index) assert not isinstance(index, MultiIndex) - def test_constructor_wrong_kwargs(self): - # GH #19348 - with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"): - with tm.assert_produces_warning(FutureWarning): - Index([], foo="bar") - def test_constructor_cast(self): msg = "could not convert string to float" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 06c00123566ba..e375af797f409 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -38,11 +38,6 @@ def test_can_hold_identifiers(self): key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is True - def test_pickle_compat_construction(self): - # Once the deprecation is enforced, we can use the parent class's test - with tm.assert_produces_warning(FutureWarning, match="without passing data"): - self._index_cls() - def test_insert(self, simple_index): ci = simple_index diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py index 98da8038401e7..19e8ec19db641 100644 --- a/pandas/tests/indexes/categorical/test_constructors.py +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -11,17 +11,12 @@ class TestCategoricalIndexConstructors: - def test_construction_without_data_deprecated(self): - # Once the deprecation is enforced, we can add this case to - # test_construction_disallows_scalar - msg = "without passing data" - with tm.assert_produces_warning(FutureWarning, match=msg): - CategoricalIndex(categories=list("abcd"), ordered=False) - def test_construction_disallows_scalar(self): msg = "must be called with a collection of some kind" with pytest.raises(TypeError, match=msg): CategoricalIndex(data=1, categories=list("abcd"), ordered=False) + with pytest.raises(TypeError, match=msg): + CategoricalIndex(categories=list("abcd"), ordered=False) def test_construction(self): @@ -145,15 +140,5 @@ def test_construction_with_categorical_dtype(self): with pytest.raises(ValueError, match=msg): CategoricalIndex(data, categories=cats, dtype=dtype) - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # passing subclass-specific kwargs to pd.Index - Index(data, categories=cats, dtype=dtype) - with pytest.raises(ValueError, match=msg): CategoricalIndex(data, ordered=ordered, dtype=dtype) - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # passing subclass-specific kwargs to pd.Index - Index(data, ordered=ordered, dtype=dtype) diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py index 1337eff1f1c2f..8ca5c6099b4e7 100644 --- a/pandas/tests/indexes/categorical/test_reindex.py +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( Categorical, @@ -12,37 +13,28 @@ class TestReindex: def test_reindex_list_non_unique(self): # GH#11586 + msg = "cannot reindex on an axis with duplicate labels" ci = CategoricalIndex(["a", "b", "c", "a"]) - with tm.assert_produces_warning(FutureWarning, match="non-unique"): - res, indexer = ci.reindex(["a", "c"]) - - tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + with pytest.raises(ValueError, match=msg): + ci.reindex(["a", "c"]) def test_reindex_categorical_non_unique(self): + msg = "cannot reindex on an axis with duplicate labels" ci = CategoricalIndex(["a", "b", "c", "a"]) - with tm.assert_produces_warning(FutureWarning, match="non-unique"): - res, indexer = ci.reindex(Categorical(["a", "c"])) - - exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + with pytest.raises(ValueError, match=msg): + ci.reindex(Categorical(["a", "c"])) def test_reindex_list_non_unique_unused_category(self): + msg = "cannot reindex on an axis with duplicate labels" ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - with tm.assert_produces_warning(FutureWarning, match="non-unique"): - res, indexer = ci.reindex(["a", "c"]) - exp = Index(["a", "a", "c"], dtype="object") - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + with pytest.raises(ValueError, match=msg): + ci.reindex(["a", "c"]) def test_reindex_categorical_non_unique_unused_category(self): + msg = "cannot reindex on an axis with duplicate labels" ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - with tm.assert_produces_warning(FutureWarning, match="non-unique"): - res, indexer = ci.reindex(Categorical(["a", "c"])) - exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + with pytest.raises(ValueError, match=msg): + ci.reindex(Categorical(["a", "c"])) def test_reindex_duplicate_target(self): # See GH25459 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 85833224fea10..f2141b0b74ac6 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -699,20 +699,16 @@ def test_engine_reference_cycle(self, simple_index): def test_getitem_2d_deprecated(self, simple_index): # GH#30588, GH#31479 idx = simple_index - msg = "Support for multi-dimensional indexing" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = idx[:, None] - - assert isinstance(res, np.ndarray), type(res) + msg = "Multi-dimensional indexing" + with pytest.raises(ValueError, match=msg): + idx[:, None] if not isinstance(idx, RangeIndex): - # GH#44051 RangeIndex already raises - with tm.assert_produces_warning(FutureWarning, match=msg): - res = idx[True] - assert isinstance(res, np.ndarray), type(res) - with tm.assert_produces_warning(FutureWarning, match=msg): - res = idx[False] - assert isinstance(res, np.ndarray), type(res) + # GH#44051 RangeIndex already raised pre-2.0 with a different message + with pytest.raises(ValueError, match=msg): + idx[True] + with pytest.raises(ValueError, match=msg): + idx[False] else: msg = "only integers, slices" with pytest.raises(IndexError, match=msg): diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index a9a35f26d58a3..ccbfd9217373b 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -214,6 +214,8 @@ def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) msg = "Cannot cast DatetimeIndex to dtype" + if dtype == "datetime64": + msg = "Casting to unit-less dtype 'datetime64' is not supported" with pytest.raises(TypeError, match=msg): idx.astype(dtype) diff --git a/pandas/tests/indexes/datetimes/methods/test_insert.py b/pandas/tests/indexes/datetimes/methods/test_insert.py index 592f4240ee750..2478a3ba799ad 100644 --- a/pandas/tests/indexes/datetimes/methods/test_insert.py +++ b/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -193,36 +193,26 @@ def test_insert_mismatched_tzawareness(self): # TODO: also changes DataFrame.__setitem__ with expansion def test_insert_mismatched_tz(self): # see GH#7299 + # pre-2.0 with mismatched tzs we would cast to object idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) item = Timestamp("2000-01-04", tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = idx.insert(3, item) + result = idx.insert(3, item) expected = Index( - list(idx[:3]) + [item] + list(idx[3:]), - dtype=object, - # once deprecation is enforced - # list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), + list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), name="idx", ) - # once deprecation is enforced - # assert expected.dtype == idx.dtype + assert expected.dtype == idx.dtype tm.assert_index_equal(result, expected) - # mismatched tz -> cast to object (could reasonably cast to same tz) item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = idx.insert(3, item) + result = idx.insert(3, item) expected = Index( - list(idx[:3]) + [item] + list(idx[3:]), - dtype=object, - # once deprecation is enforced - # list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), + list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), name="idx", ) - # once deprecation is enforced - # assert expected.dtype == idx.dtype + assert expected.dtype == idx.dtype tm.assert_index_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/datetimes/methods/test_to_frame.py b/pandas/tests/indexes/datetimes/methods/test_to_frame.py index fa5cca1c3e78b..c829109d4e06c 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_frame.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_frame.py @@ -19,13 +19,10 @@ def test_to_frame_respects_none_name(self): # not changed to 0 # GH-45448 this is first deprecated to only change in the future idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") - with tm.assert_produces_warning(FutureWarning): - result = idx.to_frame(name=None) - # exp_idx = Index([None], dtype=object) - exp_idx = Index([0]) + result = idx.to_frame(name=None) + exp_idx = Index([None], dtype=object) tm.assert_index_equal(exp_idx, result.columns) - with tm.assert_produces_warning(FutureWarning): - result = idx.rename("foo").to_frame(name=None) - exp_idx = Index(["foo"], dtype=object) + result = idx.rename("foo").to_frame(name=None) + exp_idx = Index([None], dtype=object) tm.assert_index_equal(exp_idx, result.columns) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py index f6a598bd2a1ed..e8048e63afbf7 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_period.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -148,10 +148,9 @@ def test_to_period_tz(self, tz): with tm.assert_produces_warning(UserWarning): # GH#21333 warning that timezone info will be lost # filter warning about freq deprecation - warnings.filterwarnings("ignore", category=FutureWarning) result = ts.to_period()[0] - expected = ts[0].to_period() + expected = ts[0].to_period(ts.freq) assert result == expected @@ -159,7 +158,7 @@ def test_to_period_tz(self, tz): with tm.assert_produces_warning(UserWarning): # GH#21333 warning that timezone info will be lost - result = ts.to_period() + result = ts.to_period(ts.freq) tm.assert_index_equal(result, expected) @@ -168,10 +167,9 @@ def test_to_period_tz_utc_offset_consistency(self, tz): # GH#22905 ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1") with tm.assert_produces_warning(UserWarning): - warnings.filterwarnings("ignore", category=FutureWarning) result = ts.to_period()[0] - expected = ts[0].to_period() + expected = ts[0].to_period(ts.freq) assert result == expected def test_to_period_nofreq(self): diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 9914f4357cee4..4aaa2b694102d 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -142,11 +142,9 @@ def test_constructor_from_sparse_array(self): Timestamp("2016-05-01T01:00:00.000000"), ] arr = pd.arrays.SparseArray(values) - msg = "will store that array directly" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = Index(arr) - expected = DatetimeIndex(values) - tm.assert_index_equal(result, expected) + result = Index(arr) + assert type(result) is Index + assert result.dtype == arr.dtype def test_construction_caching(self): @@ -411,17 +409,6 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): assert isinstance(result, DatetimeIndex) assert result.tz is None - # all NaT with tz - with tm.assert_produces_warning(FutureWarning): - # subclass-specific kwargs to pd.Index - result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") - exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") - - tm.assert_index_equal(result, exp, exact=True) - assert isinstance(result, DatetimeIndex) - assert result.tz is not None - assert result.tz == exp.tz - def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) @@ -479,41 +466,57 @@ def test_construction_dti_with_mixed_timezones(self): name="idx", ) - with pytest.raises(ValueError, match=msg): - DatetimeIndex( - [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00", tz="US/Eastern"), - ], - tz="Asia/Tokyo", - name="idx", - ) + # pre-2.0 this raised bc of awareness mismatch. in 2.0 with a tz# + # specified we behave as if this was called pointwise, so + # the naive Timestamp is treated as a wall time. + dti = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + expected = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern").tz_convert("Asia/Tokyo"), + ], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(dti, expected) - with pytest.raises(ValueError, match=msg): - DatetimeIndex( - [ - Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), - Timestamp("2011-01-02 10:00", tz="US/Eastern"), - ], - tz="US/Eastern", - name="idx", - ) + # pre-2.0 mixed-tz scalars raised even if a tz/dtype was specified. + # as of 2.0 we successfully return the requested tz/dtype + dti = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + expected = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo").tz_convert("US/Eastern"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(dti, expected) - with pytest.raises(ValueError, match=msg): - # passing tz should results in DatetimeIndex, then mismatch raises - # TypeError - with tm.assert_produces_warning(FutureWarning): - # subclass-specific kwargs to pd.Index - Index( - [ - pd.NaT, - Timestamp("2011-01-01 10:00"), - pd.NaT, - Timestamp("2011-01-02 10:00", tz="US/Eastern"), - ], - tz="Asia/Tokyo", - name="idx", - ) + # same thing but pass dtype instead of tz + dti = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="M8[ns, US/Eastern]", + name="idx", + ) + tm.assert_index_equal(dti, expected) def test_construction_base_constructor(self): arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")] diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 87bf0199b2528..c5b135880ee8c 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -98,11 +98,9 @@ def test_dti_business_getitem(self, freq): @pytest.mark.parametrize("freq", ["B", "C"]) def test_dti_business_getitem_matplotlib_hackaround(self, freq): rng = bdate_range(START, END, freq=freq) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(ValueError, match="Multi-dimensional indexing"): # GH#30588 multi-dimensional indexing deprecated - values = rng[:, None] - expected = rng.values[:, None] - tm.assert_numpy_array_equal(values, expected) + rng[:, None] def test_getitem_int_list(self): dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") @@ -670,7 +668,6 @@ def test_get_indexer_mixed_dtypes(self, target): ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), ], ) - @pytest.mark.filterwarnings("ignore:Comparison of Timestamp.*:FutureWarning") def test_get_indexer_out_of_bounds_date(self, target, positions): values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index c69c35ee46307..1dc01a3d7f937 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -18,6 +18,8 @@ import pandas._testing as tm from pandas.core.arrays import DatetimeArray +from pandas.tseries.frequencies import to_offset + class TestDatetime64: def test_no_millisecond_field(self): @@ -98,9 +100,6 @@ def test_datetimeindex_accessors(self): # non boolean accessors -> return Index for accessor in DatetimeArray._field_ops: - if accessor in ["week", "weekofyear"]: - # GH#33595 Deprecate week and weekofyear - continue res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, Index) @@ -142,41 +141,48 @@ def test_datetimeindex_accessors4(self): assert dti.is_month_start[0] == 1 def test_datetimeindex_accessors5(self): - with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): - tests = [ - (Timestamp("2013-06-01", freq="M").is_month_start, 1), - (Timestamp("2013-06-01", freq="BM").is_month_start, 0), - (Timestamp("2013-06-03", freq="M").is_month_start, 0), - (Timestamp("2013-06-03", freq="BM").is_month_start, 1), - (Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1), - (Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1), - (Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1), - (Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1), - (Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1), - (Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1), - (Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1), - (Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0), - (Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0), - (Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1), - (Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1), - (Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1), - (Timestamp("2013-06-30", freq="BQ").is_month_end, 0), - (Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0), - (Timestamp("2013-06-30", freq="BQ").is_year_end, 0), - (Timestamp("2013-06-28", freq="BQ").is_month_end, 1), - (Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1), - (Timestamp("2013-06-28", freq="BQ").is_year_end, 0), - (Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0), - (Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0), - (Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0), - (Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1), - (Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1), - (Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1), - (Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1), - (Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1), - (Timestamp("2012-02-01").days_in_month, 29), - (Timestamp("2013-02-01").days_in_month, 28), - ] + freq_m = to_offset("M") + bm = to_offset("BM") + qfeb = to_offset("Q-FEB") + qsfeb = to_offset("QS-FEB") + bq = to_offset("BQ") + bqs_apr = to_offset("BQS-APR") + as_nov = to_offset("AS-NOV") + + tests = [ + (freq_m.is_month_start(Timestamp("2013-06-01")), 1), + (bm.is_month_start(Timestamp("2013-06-01")), 0), + (freq_m.is_month_start(Timestamp("2013-06-03")), 0), + (bm.is_month_start(Timestamp("2013-06-03")), 1), + (qfeb.is_month_end(Timestamp("2013-02-28")), 1), + (qfeb.is_quarter_end(Timestamp("2013-02-28")), 1), + (qfeb.is_year_end(Timestamp("2013-02-28")), 1), + (qfeb.is_month_start(Timestamp("2013-03-01")), 1), + (qfeb.is_quarter_start(Timestamp("2013-03-01")), 1), + (qfeb.is_year_start(Timestamp("2013-03-01")), 1), + (qsfeb.is_month_end(Timestamp("2013-03-31")), 1), + (qsfeb.is_quarter_end(Timestamp("2013-03-31")), 0), + (qsfeb.is_year_end(Timestamp("2013-03-31")), 0), + (qsfeb.is_month_start(Timestamp("2013-02-01")), 1), + (qsfeb.is_quarter_start(Timestamp("2013-02-01")), 1), + (qsfeb.is_year_start(Timestamp("2013-02-01")), 1), + (bq.is_month_end(Timestamp("2013-06-30")), 0), + (bq.is_quarter_end(Timestamp("2013-06-30")), 0), + (bq.is_year_end(Timestamp("2013-06-30")), 0), + (bq.is_month_end(Timestamp("2013-06-28")), 1), + (bq.is_quarter_end(Timestamp("2013-06-28")), 1), + (bq.is_year_end(Timestamp("2013-06-28")), 0), + (bqs_apr.is_month_end(Timestamp("2013-06-30")), 0), + (bqs_apr.is_quarter_end(Timestamp("2013-06-30")), 0), + (bqs_apr.is_year_end(Timestamp("2013-06-30")), 0), + (bqs_apr.is_month_end(Timestamp("2013-06-28")), 1), + (bqs_apr.is_quarter_end(Timestamp("2013-06-28")), 1), + (bqs_apr.is_year_end(Timestamp("2013-03-29")), 1), + (as_nov.is_year_start(Timestamp("2013-11-01")), 1), + (as_nov.is_year_end(Timestamp("2013-10-31")), 1), + (Timestamp("2012-02-01").days_in_month, 29), + (Timestamp("2013-02-01").days_in_month, 28), + ] for ts, value in tests: assert ts == value @@ -287,15 +293,6 @@ def test_iter_readonly(): list(dti) -def test_week_and_weekofyear_are_deprecated(): - # GH#33595 Deprecate week and weekofyear - idx = date_range(start="2019-12-29", freq="D", periods=4) - with tm.assert_produces_warning(FutureWarning): - idx.week - with tm.assert_produces_warning(FutureWarning): - idx.weekofyear - - def test_add_timedelta_preserves_freq(): # GH#37295 should hold for any DTI with freq=None or Tick freq tz = "Canada/Eastern" diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 8ddcd6a453080..cdf78c97c45b5 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -295,12 +295,10 @@ def test_partial_slicing_dataframe(self): expected = df["a"][theslice] tm.assert_series_equal(result, expected) - # Frame should return slice as well - with tm.assert_produces_warning(FutureWarning): - # GH#36179 deprecated this indexing - result = df[ts_string] - expected = df[theslice] - tm.assert_frame_equal(result, expected) + # pre-2.0 df[ts_string] was overloaded to interpret this + # as slicing along index + with pytest.raises(KeyError, match=ts_string): + df[ts_string] # Timestamp with resolution more precise than index # Compatible with existing key diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 890590094094a..42aba136f378d 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -64,9 +64,7 @@ def test_dti_timestamp_fields(self, field): idx = tm.makeDateIndex(100) expected = getattr(idx, field)[-1] - warn = FutureWarning if field.startswith("is_") else None - with tm.assert_produces_warning(warn, match="Timestamp.freq is deprecated"): - result = getattr(Timestamp(idx[-1]), field) + result = getattr(Timestamp(idx[-1]), field) assert result == expected def test_dti_timestamp_isocalendar_fields(self): @@ -75,22 +73,6 @@ def test_dti_timestamp_isocalendar_fields(self): result = idx[-1].isocalendar() assert result == expected - def test_dti_timestamp_freq_fields(self): - # extra fields from DatetimeIndex like quarter and week - idx = tm.makeDateIndex(100) - - msg = "The 'freq' argument in Timestamp is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - ts = Timestamp(idx[-1], idx.freq) - - msg2 = "Timestamp.freq is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg2): - assert idx.freq == ts.freq - - msg3 = "Timestamp.freqstr is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg3): - assert idx.freqstr == ts.freqstr - # ---------------------------------------------------------------- # DatetimeIndex.round diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index a07f21f785828..0bc2862e55021 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1155,19 +1155,21 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz): @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"]) def test_dti_setop_aware(self, setop): # non-overlapping + # GH#39328 as of 2.0 we cast these to UTC instead of object rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning): - # # GH#39328 will cast both to UTC - result = getattr(rng, setop)(rng2) + result = getattr(rng, setop)(rng2) - expected = getattr(rng.astype("O"), setop)(rng2.astype("O")) + left = rng.tz_convert("UTC") + right = rng2.tz_convert("UTC") + expected = getattr(left, setop)(right) tm.assert_index_equal(result, expected) + assert result.tz == left.tz if len(result): - assert result[0].tz.zone == "US/Central" - assert result[-1].tz.zone == "US/Eastern" + assert result[0].tz.zone == "UTC" + assert result[-1].tz.zone == "UTC" def test_dti_union_mixed(self): # GH 21671 diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index a71a8f9e34ea9..f8c6042c5007d 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -38,7 +38,6 @@ class ConstructorTests: get_kwargs_from_breaks to the expected format. """ - @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "breaks", [ @@ -96,22 +95,16 @@ def test_constructor_dtype(self, constructor, breaks, subtype): ) def test_constructor_pass_closed(self, constructor, breaks): # not passing closed to IntervalDtype, but to IntervalArray constructor - warn = None - if isinstance(constructor, partial) and constructor.func is Index: - # passing kwargs to Index is deprecated - warn = FutureWarning - iv_dtype = IntervalDtype(breaks.dtype) result_kwargs = self.get_kwargs_from_breaks(breaks) for dtype in (iv_dtype, str(iv_dtype)): - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(None): result = constructor(dtype=dtype, closed="left", **result_kwargs) assert result.dtype.closed == "left" - @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) def test_constructor_nan(self, constructor, breaks, closed): # GH 18421 @@ -125,7 +118,6 @@ def test_constructor_nan(self, constructor, breaks, closed): assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(np.array(result), expected_values) - @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "breaks", [ @@ -353,9 +345,14 @@ class TestClassConstructors(ConstructorTests): params=[IntervalIndex, partial(Index, dtype="interval")], ids=["IntervalIndex", "Index"], ) - def constructor(self, request): + def klass(self, request): + # We use a separate fixture here to include Index.__new__ with dtype kwarg return request.param + @pytest.fixture + def constructor(self): + return IntervalIndex + def get_kwargs_from_breaks(self, breaks, closed="right"): """ converts intervals in breaks format to a dictionary of kwargs to @@ -380,7 +377,6 @@ def test_generic_errors(self, constructor): override the base class implementation since errors are handled differently; checks unnecessary since caught at the Interval level """ - pass def test_constructor_string(self): # GH23013 @@ -388,12 +384,12 @@ def test_constructor_string(self): # the interval of strings is already forbidden. pass - def test_constructor_errors(self, constructor): + def test_constructor_errors(self, klass): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")] msg = "intervals must all be closed on the same side" with pytest.raises(ValueError, match=msg): - constructor(ivs) + klass(ivs) # scalar msg = ( @@ -401,14 +397,13 @@ def test_constructor_errors(self, constructor): "some kind, 5 was passed" ) with pytest.raises(TypeError, match=msg): - constructor(5) + klass(5) # not an interval; dtype depends on 32bit/windows builds msg = "type with value 0 is not an interval" with pytest.raises(TypeError, match=msg): - constructor([0, 1]) + klass([0, 1]) - @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "data, closed", [ diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 37c13c37d070b..5d077f014dc89 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -22,7 +22,7 @@ import pandas.core.common as com -@pytest.fixture(scope="class", params=[None, "foo"]) +@pytest.fixture(params=[None, "foo"]) def name(request): return request.param diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index 2f28c33a3bbc6..18b5af00c8d5d 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -20,7 +20,7 @@ from pandas.tseries.offsets import Day -@pytest.fixture(scope="class", params=[None, "foo"]) +@pytest.fixture(params=[None, "foo"]) def name(request): return request.param diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py index 3b9de8d9e45d9..45b25f2533afd 100644 --- a/pandas/tests/indexes/interval/test_interval_tree.py +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -18,7 +18,7 @@ def skipif_32bit(param): return pytest.param(param, marks=marks) -@pytest.fixture(scope="class", params=["int64", "float64", "uint64"]) +@pytest.fixture(params=["int64", "float64", "uint64"]) def dtype(request): return request.param diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 509daff1262b4..6c31caac4b42d 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -326,19 +326,6 @@ def test_duplicated_series_complex_numbers(dtype): tm.assert_series_equal(result, expected) -def test_multi_drop_duplicates_pos_args_deprecation(): - # GH#41485 - idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) - msg = ( - "In a future version of pandas all arguments of " - "Index.drop_duplicates will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = idx.drop_duplicates("last") - expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) - tm.assert_index_equal(expected, result) - - def test_midx_unique_ea_dtype(): # GH#48335 vals_a = Series([1, 2, NA, NA], dtype="Int64") diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 18ff73dcb2221..c51b9386d7ec6 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -191,18 +191,7 @@ def test_identical(idx): mi2 = mi2.set_names(["new1", "new2"]) assert mi.identical(mi2) - with tm.assert_produces_warning(FutureWarning): - # subclass-specific keywords to pd.Index - mi3 = Index(mi.tolist(), names=mi.names) - - msg = r"Unexpected keyword arguments {'names'}" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # subclass-specific keywords to pd.Index - Index(mi.tolist(), names=mi.names, tupleize_cols=False) - mi4 = Index(mi.tolist(), tupleize_cols=False) - assert mi.identical(mi3) assert not mi.identical(mi4) assert mi.equals(mi4) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 4fff4ca961cf7..70350f0df821b 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -299,23 +299,6 @@ def test_set_names_with_nlevel_1(inplace): tm.assert_index_equal(result, expected) -def test_multi_set_names_pos_args_deprecation(): - # GH#41485 - idx = MultiIndex.from_product([["python", "cobra"], [2018, 2019]]) - msg = ( - "In a future version of pandas all arguments of MultiIndex.set_names " - "except for the argument 'names' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = idx.set_names(["kind", "year"], None) - expected = MultiIndex( - levels=[["python", "cobra"], [2018, 2019]], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=["kind", "year"], - ) - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize("ordered", [True, False]) def test_set_levels_categorical(ordered): # GH13854 diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index ef72f1f3ffde8..e2d59e5511a52 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -233,14 +233,14 @@ def test_level_setting_resets_attributes(): def test_rangeindex_fallback_coercion_bug(): # GH 12893 - foo = pd.DataFrame(np.arange(100).reshape((10, 10))) - bar = pd.DataFrame(np.arange(100).reshape((10, 10))) - df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1) + df1 = pd.DataFrame(np.arange(100).reshape((10, 10))) + df2 = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat({"df1": df1.stack(), "df2": df2.stack()}, axis=1) df.index.names = ["fizz", "buzz"] str(df) expected = pd.DataFrame( - {"bar": np.arange(100), "foo": np.arange(100)}, + {"df2": np.arange(100), "df1": np.arange(100)}, index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]), ) tm.assert_frame_equal(df, expected, check_like=True) diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index cfbc90d1b36bb..5e3a427bc75ba 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -56,9 +56,7 @@ def test_take_preserve_name(idx): def test_copy_names(): # Check that adding a "names" parameter to the copy is honored # GH14302 - with tm.assert_produces_warning(FutureWarning): - # subclass-specific kwargs to pd.Index - multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) + multi_idx = MultiIndex.from_tuples([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) multi_idx1 = multi_idx.copy() assert multi_idx.equals(multi_idx1) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index dd62ad8b31fae..4a6fc3a42b3ee 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -139,8 +139,8 @@ def test_constructor_coerce(self, mixed_index, float_index): self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5])) self.check_coerce(float_index, Index(np.arange(5) * 2.5)) - with tm.assert_produces_warning(FutureWarning, match="will not infer"): - result = Index(np.array(np.arange(5) * 2.5, dtype=object)) + result = Index(np.array(np.arange(5) * 2.5, dtype=object)) + assert result.dtype == object # as of 2.0 to match Series self.check_coerce(float_index, result.astype("float64")) def test_constructor_explicit(self, mixed_index, float_index): @@ -479,12 +479,13 @@ def test_constructor_corner(self, dtype): assert index.values.dtype == index.dtype if dtype == np.int64: - msg = "will not infer" - with tm.assert_produces_warning(FutureWarning, match=msg): - without_dtype = Index(arr) + without_dtype = Index(arr) + # as of 2.0 we do not infer a dtype when we get an object-dtype + # ndarray of numbers, matching Series behavior + assert without_dtype.dtype == object exact = True if index_cls is Int64Index else "equiv" - tm.assert_index_equal(index, without_dtype, exact=exact) + tm.assert_index_equal(index, without_dtype.astype(np.int64), exact=exact) # preventing casting arr = np.array([1, "2", 3, "4"], dtype=object) diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py index 91e266e805868..33e45a707df63 100644 --- a/pandas/tests/indexes/object/test_astype.py +++ b/pandas/tests/indexes/object/test_astype.py @@ -19,6 +19,6 @@ def test_astype_invalid_nas_to_tdt64_raises(): # GH#45722 don't cast np.datetime64 NaTs to timedelta64 NaT idx = Index([NaT.asm8] * 2, dtype=object) - msg = r"Cannot cast Index to dtype timedelta64\[ns\]" + msg = r"Invalid type for timedelta scalar: " with pytest.raises(TypeError, match=msg): idx.astype("m8[ns]") diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py index c4f26220f87d1..74bcaa8529ffc 100644 --- a/pandas/tests/indexes/ranges/test_constructors.py +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -148,8 +148,7 @@ def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = RangeIndex(1, 5) assert index.values.dtype == np.int64 - with tm.assert_produces_warning(FutureWarning, match="will not infer"): - expected = Index(arr).astype("int64") + expected = Index(arr).astype("int64") tm.assert_index_equal(index, expected, exact="equiv") diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index 6868279776a91..6d4e7caacc5e4 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -61,10 +61,10 @@ def test_view_preserves_name(index): assert index.view().name == index.name -def test_ravel_deprecation(index): - # GH#19956 ravel returning ndarray is deprecated - with tm.assert_produces_warning(FutureWarning): - index.ravel() +def test_ravel(index): + # GH#19956 ravel returning ndarray is deprecated, in 2.0 returns a view on self + res = index.ravel() + tm.assert_index_equal(res, index) class TestConversion: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index bfe462cdf6c15..ef041d7f9e119 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -62,11 +62,11 @@ def test_can_hold_identifiers(self, simple_index): @pytest.mark.parametrize("index", ["datetime"], indirect=True) def test_new_axis(self, index): - with tm.assert_produces_warning(FutureWarning): + # TODO: a bunch of scattered tests check this deprecation is enforced. + # de-duplicate/centralize them. + with pytest.raises(ValueError, match="Multi-dimensional indexing"): # GH#30588 multi-dimensional indexing deprecated - new_index = index[None, :] - assert new_index.ndim == 2 - assert isinstance(new_index, np.ndarray) + index[None, :] def test_argsort(self, index): with tm.maybe_produces_warning( @@ -250,9 +250,13 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): if attr == "asi8": result = DatetimeIndex(arg).tz_localize(tz_naive_fixture) + tm.assert_index_equal(result, index) + elif klass is Index: + with pytest.raises(TypeError, match="unexpected keyword"): + klass(arg, tz=tz_naive_fixture) else: result = klass(arg, tz=tz_naive_fixture) - tm.assert_index_equal(result, index) + tm.assert_index_equal(result, index) if attr == "asi8": if err: @@ -267,9 +271,13 @@ def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): if attr == "asi8": result = DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture) + tm.assert_index_equal(result, index) + elif klass is Index: + with pytest.raises(TypeError, match="unexpected keyword"): + klass(arg, tz=tz_naive_fixture) else: result = klass(list(arg), tz=tz_naive_fixture) - tm.assert_index_equal(result, index) + tm.assert_index_equal(result, index) if attr == "asi8": if err: @@ -656,23 +664,6 @@ def test_is_numeric(self, index, expected): def test_is_object(self, index, expected): assert index.is_object() is expected - @pytest.mark.parametrize( - "index, expected", - [ - ("string", False), - ("bool-object", False), - ("bool-dtype", False), - ("categorical", False), - ("int", False), - ("datetime", True), - ("float", False), - ], - indirect=["index"], - ) - def test_is_all_dates(self, index, expected): - with tm.assert_produces_warning(FutureWarning): - assert index.is_all_dates is expected - def test_summary(self, index): index._summary() @@ -1524,15 +1515,15 @@ def test_deprecated_fastpath(): def test_shape_of_invalid_index(): - # Currently, it is possible to create "invalid" index objects backed by + # Pre-2.0, it was possible to create "invalid" index objects backed by # a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125 # about this). However, as long as this is not solved in general,this test ensures # that the returned shape is consistent with this underlying array for # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775) idx = Index([0, 1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(ValueError, match="Multi-dimensional indexing"): # GH#30588 multi-dimensional indexing deprecated - assert idx[:, None].shape == (4, 1) + idx[:, None] def test_validate_1d_input(): @@ -1582,33 +1573,6 @@ def test_construct_from_memoryview(klass, extra_kwargs): tm.assert_index_equal(result, expected, exact=True) -def test_index_set_names_pos_args_deprecation(): - # GH#41485 - idx = Index([1, 2, 3, 4]) - msg = ( - "In a future version of pandas all arguments of Index.set_names " - "except for the argument 'names' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = idx.set_names("quarter", None) - expected = Index([1, 2, 3, 4], name="quarter") - tm.assert_index_equal(result, expected) - - -def test_drop_duplicates_pos_args_deprecation(): - # GH#41485 - idx = Index([1, 2, 3, 1]) - msg = ( - "In a future version of pandas all arguments of " - "Index.drop_duplicates will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - idx.drop_duplicates("last") - result = idx.drop_duplicates("last") - expected = Index([2, 3, 1]) - tm.assert_index_equal(expected, result) - - def test_get_attributes_dict_deprecated(): # https://github.com/pandas-dev/pandas/pull/44028 idx = Index([1, 2, 3, 1]) diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 2b7c5745e0c67..57268c07024f7 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -290,11 +290,9 @@ def test_putmask_with_wrong_mask(self, index): def test_getitem_deprecated_float(idx): # https://github.com/pandas-dev/pandas/issues/34191 - with tm.assert_produces_warning(FutureWarning): - result = idx[1.0] - - expected = idx[1] - assert result == expected + msg = "Indexing with a float is no longer supported" + with pytest.raises(IndexError, match=msg): + idx[1.0] @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py index aa2f7b7af8d98..6302f8784e29b 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_astype.py +++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py @@ -13,7 +13,6 @@ ) import pandas._testing as tm from pandas.core.api import ( - Float64Index, Int64Index, UInt64Index, ) @@ -89,9 +88,12 @@ def test_astype_timedelta64(self): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) - result = idx.astype("timedelta64") - expected = Float64Index([1e14] + [np.NaN] * 3, dtype="float64") - tm.assert_index_equal(result, expected) + msg = ( + r"Cannot convert from timedelta64\[ns\] to timedelta64. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) + with pytest.raises(ValueError, match=msg): + idx.astype("timedelta64") result = idx.astype("timedelta64[ns]") tm.assert_index_equal(result, idx) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index af932f0f20695..1447e9080313f 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -23,17 +23,19 @@ def test_array_of_dt64_nat_raises(self): nat = np.datetime64("NaT", "ns") arr = np.array([nat], dtype=object) - # TODO: should be TypeError? msg = "Invalid type for timedelta scalar" - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match=msg): TimedeltaIndex(arr) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match=msg): TimedeltaArray._from_sequence(arr) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match=msg): sequence_to_td64ns(arr) + with pytest.raises(TypeError, match=msg): + to_timedelta(arr) + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) def test_unit_m_y_raises(self, unit): msg = "Units 'M', 'Y', and 'y' are no longer supported" diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index ff4b8564f86ca..4b7140b112bd9 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -82,14 +82,9 @@ def test_get_loc_key_unit_mismatch(self): assert loc == 1 def test_get_loc_key_unit_mismatch_not_castable(self): - # TODO(2.0): once TDA.astype supports m8[s] directly, tdi - # can be constructed directly - tda = to_timedelta(["0 days", "1 days", "2 days"])._data - arr = np.array(tda).astype("m8[s]") - tda2 = type(tda)._simple_new(arr, dtype=arr.dtype) - tdi = TimedeltaIndex(tda2) + tdi = to_timedelta(["0 days", "1 days", "2 days"]).astype("m8[s]") assert tdi.dtype == "m8[s]" - key = tda[0]._as_unit("ns") + Timedelta(1) + key = tdi[0]._as_unit("ns") + Timedelta(1) with pytest.raises(KeyError, match=r"Timedelta\('0 days 00:00:00.000000001'\)"): tdi.get_loc(key) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 701d737535116..01efbfb9ae0c0 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -141,9 +141,12 @@ def test_freq_conversion(self, index_or_series): # We don't support "D" reso, so we use the pre-2.0 behavior # casting to float64 - result = td.astype("timedelta64[D]") - expected = index_or_series([31, 31, 31, np.nan]) - tm.assert_equal(result, expected) + msg = ( + r"Cannot convert from timedelta64\[ns\] to timedelta64\[D\]. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) + with pytest.raises(ValueError, match=msg): + td.astype("timedelta64[D]") result = td / np.timedelta64(1, "s") expected = index_or_series( diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d4354766a203b..c25866c4f09e2 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -342,8 +342,10 @@ def convert_nested_indexer(indexer_type, keys): for indexer_type, k in zip(types, keys) ) if indexer_type_1 is set or indexer_type_2 is set: - with tm.assert_produces_warning(FutureWarning): - result = df.loc[indexer, "Data"] + with pytest.raises(TypeError, match="as an indexer is not supported"): + df.loc[indexer, "Data"] + + return else: result = df.loc[indexer, "Data"] expected = Series( diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 0cc1e116aa2de..c81473cb945bc 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -71,8 +71,7 @@ def test_xs_partial( ) df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd")) - with tm.assert_produces_warning(FutureWarning): - result = df.xs(["foo", "one"]) + result = df.xs(("foo", "one")) expected = df.loc["foo", "one"] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 2d54a9ba370ca..ee2c06150bf53 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -287,14 +287,11 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): assert expected.dtype == object tm.assert_index_equal(result, expected) - # mismatched tz --> cast to object (could reasonably cast to common tz) ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = obj.insert(1, ts) + result = obj.insert(1, ts) # once deprecation is enforced: - # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) - # assert expected.dtype == obj.dtype - expected = obj.astype(object).insert(1, ts) + expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) + assert expected.dtype == obj.dtype tm.assert_index_equal(result, expected) else: @@ -368,12 +365,6 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype): expected = obj.astype(object).insert(0, str(insert)) tm.assert_index_equal(result, expected) - msg = r"Unexpected keyword arguments {'freq'}" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning): - # passing keywords to pd.Index - pd.Index(data, freq="M") - @pytest.mark.xfail(reason="Test not implemented") def test_insert_index_complex128(self): raise NotImplementedError @@ -658,7 +649,8 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): [ (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), (pd.Timestamp("2012-01-01"), object), - (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), object), + # pre-2.0 with a mismatched tz we would get object result + (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"), (1, object), ("x", object), ], @@ -677,22 +669,19 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): ) assert obj.dtype == "datetime64[ns, US/Eastern]" + if getattr(fill_val, "tz", None) is None: + fv = fill_val + else: + fv = fill_val.tz_convert(tz) exp = klass( [ pd.Timestamp("2011-01-01", tz=tz), - fill_val, - # Once deprecation is enforced, this becomes: - # fill_val.tz_convert(tz) if getattr(fill_val, "tz", None) - # is not None else fill_val, + fv, pd.Timestamp("2011-01-03", tz=tz), pd.Timestamp("2011-01-04", tz=tz), ] ) - warn = None - if getattr(fill_val, "tz", None) is not None and fill_val.tz != obj[0].tz: - warn = FutureWarning - with tm.assert_produces_warning(warn, match="mismatched timezone"): - self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @pytest.mark.parametrize( "fill_val", @@ -920,23 +909,16 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer) obj = pd.Series(self.rep[from_key], index=index, name="yyy") assert obj.dtype == from_key - warn = None - rep_ser = pd.Series(replacer) - if ( - isinstance(obj.dtype, pd.DatetimeTZDtype) - and isinstance(rep_ser.dtype, pd.DatetimeTZDtype) - and obj.dtype != rep_ser.dtype - ): - # mismatched tz DatetimeArray behavior will change to cast - # for setitem-like methods with mismatched tzs GH#44940 - warn = FutureWarning - - msg = "explicitly cast to object" - with tm.assert_produces_warning(warn, match=msg): - result = obj.replace(replacer) + result = obj.replace(replacer) exp = pd.Series(self.rep[to_key], index=index, name="yyy") - assert exp.dtype == to_key + if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance( + exp.dtype, pd.DatetimeTZDtype + ): + # with mismatched tzs, we retain the original dtype as of 2.0 + exp = exp.astype(obj.dtype) + else: + assert exp.dtype == to_key tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index db2fe45faf6de..f7e6665aad253 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1271,7 +1271,8 @@ def test_iloc_frame_indexer(self): # GH#39004 df = DataFrame({"a": [1, 2, 3]}) indexer = DataFrame({"a": [True, False, True]}) - with tm.assert_produces_warning(FutureWarning): + msg = "DataFrame indexer for .iloc is not supported. Consider using .loc" + with pytest.raises(TypeError, match=msg): df.iloc[indexer] = 1 msg = ( diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0502fd445e66e..b3e59da4b0130 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -706,7 +706,7 @@ def run_tests(df, rhs, right_loc, right_iloc): # make frames multi-type & re-run tests for frame in [df, rhs, right_loc, right_iloc]: frame["joe"] = frame["joe"].astype("float64") - frame["jolie"] = frame["jolie"].map("@{}".format) + frame["jolie"] = frame["jolie"].map(lambda x: f"@{x}") right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] run_tests(df, rhs, right_loc, right_iloc) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3490d05f13e9d..3b75f9d7ce1be 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -672,11 +672,11 @@ def test_loc_setitem_consistency_slice_column_len(self): ) with tm.assert_produces_warning(None, match=msg): - # timedelta64[m] -> float64, so this cannot be done inplace, so + # timedelta64[m] -> float, so this cannot be done inplace, so # no warning df.loc[:, ("Respondent", "Duration")] = df.loc[ :, ("Respondent", "Duration") - ].astype("timedelta64[m]") + ] / Timedelta(60_000_000_000) expected = Series( [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration") @@ -1300,10 +1300,6 @@ def test_loc_getitem_time_object(self, frame_or_series): @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) @td.skip_if_no_scipy - @pytest.mark.filterwarnings( - # TODO(2.0): remove filtering; note only needed for using_array_manager - "ignore:The behavior of .astype from SparseDtype.*FutureWarning" - ) def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): import scipy.sparse @@ -1462,9 +1458,6 @@ def test_loc_setitem_datetime_coercion(self): assert Timestamp("2008-08-08") == df.loc[0, "c"] assert Timestamp("2008-08-08") == df.loc[1, "c"] df.loc[2, "c"] = date(2005, 5, 5) - with tm.assert_produces_warning(FutureWarning): - # Comparing Timestamp to date obj is deprecated - assert Timestamp("2005-05-05") == df.loc[2, "c"] assert Timestamp("2005-05-05").date() == df.loc[2, "c"] @pytest.mark.parametrize("idxer", ["var", ["var"]]) @@ -2072,13 +2065,12 @@ def test_setitem_with_expansion(self): df.time = df.set_index("time").index.tz_localize("UTC") v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") - # trying to set a single element on a part of a different timezone - # this converts to object + # pre-2.0 trying to set a single element on a part of a different + # timezone converted to object; in 2.0 it retains dtype df2 = df.copy() - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - df2.loc[df2.new_col == "new", "time"] = v + df2.loc[df2.new_col == "new", "time"] = v - expected = Series([v[0], df.loc[1, "time"]], name="time") + expected = Series([v[0].tz_convert("UTC"), df.loc[1, "time"]], name="time") tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == "new", "time"] + Timedelta("1s") @@ -2795,16 +2787,13 @@ def test_loc_mixed_int_float(): assert result == 0 -def test_loc_with_positional_slice_deprecation(): +def test_loc_with_positional_slice_raises(): # GH#31840 ser = Series(range(4), index=["A", "B", "C", "D"]) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="Slicing a positional slice with .loc"): ser.loc[:3] = 2 - expected = Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) - tm.assert_series_equal(ser, expected) - def test_loc_slice_disallows_positional(): # GH#16121, GH#24612, GH#31810 @@ -2822,15 +2811,15 @@ def test_loc_slice_disallows_positional(): with pytest.raises(TypeError, match=msg): obj.loc[1:3] - with tm.assert_produces_warning(FutureWarning): - # GH#31840 deprecated incorrect behavior + with pytest.raises(TypeError, match="Slicing a positional slice with .loc"): + # GH#31840 enforce incorrect behavior obj.loc[1:3] = 1 with pytest.raises(TypeError, match=msg): df.loc[1:3, 1] - with tm.assert_produces_warning(FutureWarning): - # GH#31840 deprecated incorrect behavior + with pytest.raises(TypeError, match="Slicing a positional slice with .loc"): + # GH#31840 enforce incorrect behavior df.loc[1:3, 1] = 2 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b30b27f5bae1a..ecf247efd74bf 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -356,12 +356,6 @@ def test_split(self): for res, exp in zip(result, expected): assert_block_equal(res, exp) - def test_is_categorical_deprecated(self, fblock): - # GH#40571 - blk = fblock - with tm.assert_produces_warning(DeprecationWarning): - blk.is_categorical - class TestBlockManager: def test_attrs(self): @@ -795,7 +789,6 @@ def test_get_numeric_data(self, using_copy_on_write): ) def test_get_bool_data(self, using_copy_on_write): - msg = "object-dtype columns with all-bool values" mgr = create_mgr( "int: int; float: float; complex: complex;" "str: object; bool: bool; obj: object; dt: datetime", @@ -803,9 +796,8 @@ def test_get_bool_data(self, using_copy_on_write): ) mgr.iset(6, np.array([True, False, True], dtype=np.object_)) - with tm.assert_produces_warning(FutureWarning, match=msg): - bools = mgr.get_bool_data() - tm.assert_index_equal(bools.items, Index(["bool", "dt"])) + bools = mgr.get_bool_data() + tm.assert_index_equal(bools.items, Index(["bool"])) tm.assert_almost_equal( mgr.iget(mgr.items.get_loc("bool")).internal_values(), bools.iget(bools.items.get_loc("bool")).internal_values(), @@ -824,8 +816,7 @@ def test_get_bool_data(self, using_copy_on_write): ) # Check sharing - with tm.assert_produces_warning(FutureWarning, match=msg): - bools2 = mgr.get_bool_data(copy=True) + bools2 = mgr.get_bool_data(copy=True) bools2.iset(0, np.array([False, True, False])) if using_copy_on_write: tm.assert_numpy_array_equal( @@ -1435,11 +1426,3 @@ def test_make_block_no_pandas_array(block_maker): ) assert result.dtype.kind in ["i", "u"] assert result.is_extension is False - - -def test_single_block_manager_fastpath_deprecated(): - # GH#33092 - ser = Series(range(3)) - blk = ser._data.blocks[0] - with tm.assert_produces_warning(FutureWarning): - SingleBlockManager(blk, ser.index, fastpath=True) diff --git a/pandas/tests/io/__init__.py b/pandas/tests/io/__init__.py index 3231e38b985af..15294fd0cabbc 100644 --- a/pandas/tests/io/__init__.py +++ b/pandas/tests/io/__init__.py @@ -5,9 +5,6 @@ pytest.mark.filterwarnings( "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning" ), - pytest.mark.filterwarnings( - "ignore:Block.is_categorical is deprecated:DeprecationWarning" - ), pytest.mark.filterwarnings( r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning" ), @@ -20,8 +17,4 @@ r"Use 'tree.iter\(\)' or 'list\(tree.iter\(\)\)' instead." ":PendingDeprecationWarning" ), - # GH 26552 - pytest.mark.filterwarnings( - "ignore:As the xlwt package is no longer maintained:FutureWarning" - ), ] diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle deleted file mode 100644 index 9777319465de6..0000000000000 Binary files a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle and /dev/null differ diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle deleted file mode 100644 index 88bb6989f5b08..0000000000000 Binary files a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle and /dev/null differ diff --git a/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz b/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz deleted file mode 100644 index f4ff0dbaa1ff9..0000000000000 Binary files a/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz and /dev/null differ diff --git a/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz b/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz deleted file mode 100644 index b299e7d85808e..0000000000000 Binary files a/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz and /dev/null differ diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py index e7a182ea63178..419761cbe1d6d 100644 --- a/pandas/tests/io/excel/__init__.py +++ b/pandas/tests/io/excel/__init__.py @@ -9,12 +9,4 @@ pytest.mark.filterwarnings( "ignore:This method will be removed in future versions:DeprecationWarning" ), - # GH 26552 - pytest.mark.filterwarnings( - "ignore:As the xlwt package is no longer maintained:FutureWarning" - ), - # GH 38571 - pytest.mark.filterwarnings( - "ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning" - ), ] diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py index e9dad0c7fedc9..21d31ec8a7fb5 100644 --- a/pandas/tests/io/excel/test_odswriter.py +++ b/pandas/tests/io/excel/test_odswriter.py @@ -19,25 +19,6 @@ def test_write_append_mode_raises(ext): ExcelWriter(f, engine="odf", mode="a") -def test_kwargs(ext): - # GH 42286 - # GH 43445 - # test for error: OpenDocumentSpreadsheet does not accept any arguments - kwargs = {"kwarg": 1} - with tm.ensure_clean(ext) as f: - msg = re.escape("Use of **kwargs is deprecated") - error = re.escape( - "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" - ) - with pytest.raises( - TypeError, - match=error, - ): - with tm.assert_produces_warning(FutureWarning, match=msg): - with ExcelWriter(f, engine="odf", **kwargs) as _: - pass - - @pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}]) def test_engine_kwargs(ext, engine_kwargs): # GH 42286 diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 3b122c8572751..be0428a2b0fce 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -86,19 +86,6 @@ def test_write_cells_merge_styled(ext): assert xcell_a2.font == openpyxl_sty_merged -@pytest.mark.parametrize("iso_dates", [True, False]) -def test_kwargs(ext, iso_dates): - # GH 42286 GH 43445 - kwargs = {"iso_dates": iso_dates} - with tm.ensure_clean(ext) as f: - msg = re.escape("Use of **kwargs is deprecated") - with tm.assert_produces_warning(FutureWarning, match=msg): - with ExcelWriter(f, engine="openpyxl", **kwargs) as writer: - assert writer.book.iso_dates == iso_dates - # ExcelWriter won't allow us to close without writing something - DataFrame().to_excel(writer) - - @pytest.mark.parametrize("iso_dates", [True, False]) def test_engine_kwargs_write(ext, iso_dates): # GH 42286 GH 43445 @@ -269,7 +256,7 @@ def test_to_excel_with_openpyxl_engine(ext): df2 = DataFrame({"B": np.linspace(1, 20, 10)}) df = pd.concat([df1, df2], axis=1) styled = df.style.applymap( - lambda val: "color: %s" % ("red" if val < 0 else "black") + lambda val: f"color: {'red' if val < 0 else 'black'}" ).highlight_max() styled.to_excel(filename, engine="openpyxl") diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 62c93de4d44aa..ee2a8f518cd56 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -538,8 +538,8 @@ def test_reader_dtype_str(self, read_ext, dtype, expected): def test_use_nullable_dtypes(self, read_ext): # GH#36712 - if read_ext == ".xlsb": - pytest.skip("No engine for filetype: 'xlsb'") + if read_ext in (".xlsb", ".xls"): + pytest.skip(f"No engine for filetype: '{read_ext}'") df = DataFrame( { @@ -564,8 +564,8 @@ def test_use_nullable_dtypes(self, read_ext): def test_use_nullabla_dtypes_and_dtype(self, read_ext): # GH#36712 - if read_ext == ".xlsb": - pytest.skip("No engine for filetype: 'xlsb'") + if read_ext in (".xlsb", ".xls"): + pytest.skip(f"No engine for filetype: '{read_ext}'") df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]}) with tm.ensure_clean(read_ext) as file_path: @@ -577,10 +577,10 @@ def test_use_nullabla_dtypes_and_dtype(self, read_ext): @td.skip_if_no("pyarrow") @pytest.mark.parametrize("storage", ["pyarrow", "python"]) - def test_use_nullabla_dtypes_string(self, read_ext, storage): + def test_use_nullable_dtypes_string(self, read_ext, storage): # GH#36712 - if read_ext == ".xlsb": - pytest.skip("No engine for filetype: 'xlsb'") + if read_ext in (".xlsb", ".xls"): + pytest.skip(f"No engine for filetype: '{read_ext}'") import pyarrow as pa @@ -1308,31 +1308,6 @@ def test_read_excel_nrows_params( ) tm.assert_frame_equal(actual, expected) - def test_read_excel_squeeze(self, read_ext): - # GH 12157 - f = "test_squeeze" + read_ext - - with tm.assert_produces_warning( - FutureWarning, - match="The squeeze argument has been deprecated " - "and will be removed in a future version. " - 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', - ): - actual = pd.read_excel( - f, sheet_name="two_columns", index_col=0, squeeze=True - ) - expected = Series([2, 3, 4], [4, 5, 6], name="b") - expected.index.name = "a" - tm.assert_series_equal(actual, expected) - - actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True) - expected = DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) - tm.assert_frame_equal(actual, expected) - - actual = pd.read_excel(f, sheet_name="one_column", squeeze=True) - expected = Series([1, 2, 3], name="a") - tm.assert_series_equal(actual, expected) - def test_deprecated_kwargs(self, read_ext): with pytest.raises(TypeError, match="but 3 positional arguments"): pd.read_excel("test1" + read_ext, "Sheet1", 0) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 897d6969ea6ae..60ee8943c9916 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -27,7 +27,6 @@ ExcelWriter, _OpenpyxlWriter, _XlsxWriter, - _XlwtWriter, register_writer, ) @@ -61,7 +60,6 @@ def set_engine(engine, ext): [ pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), - pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), pytest.param( ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")] ), @@ -319,9 +317,6 @@ def test_multiindex_interval_datetimes(self, ext): ".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], ), - pytest.param( - "xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")] - ), pytest.param( "xlsxwriter", ".xlsx", @@ -379,7 +374,7 @@ def test_excel_writer_context_manager(self, frame, path): def test_roundtrip(self, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -449,7 +444,7 @@ def test_ts_frame(self, tsframe, path): def test_basics_with_nan(self, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) frame.to_excel(path, "test1", header=False) @@ -513,7 +508,7 @@ def test_sheets(self, frame, tsframe, path): tsframe.index = index frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -535,7 +530,7 @@ def test_sheets(self, frame, tsframe, path): def test_colaliases(self, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -553,7 +548,7 @@ def test_colaliases(self, frame, path): def test_roundtrip_indexlabels(self, merge_cells, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -1283,7 +1278,6 @@ class TestExcelWriterEngineTests: [ pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")), pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")), - pytest.param(_XlwtWriter, ".xls", marks=td.skip_if_no("xlwt")), ], ) def test_ExcelWriter_dispatch(self, klass, ext): @@ -1343,21 +1337,6 @@ def assert_called_and_reset(cls): df.to_excel(filepath, engine="dummy") DummyClass.assert_called_and_reset() - @pytest.mark.parametrize( - "ext", - [ - pytest.param(".xlsx", marks=td.skip_if_no("xlsxwriter")), - pytest.param(".xlsx", marks=td.skip_if_no("openpyxl")), - pytest.param(".ods", marks=td.skip_if_no("odf")), - ], - ) - def test_engine_kwargs_and_kwargs_raises(self, ext): - # GH 40430 - msg = re.escape("Cannot use both engine_kwargs and **kwargs") - with pytest.raises(ValueError, match=msg): - with ExcelWriter("", engine_kwargs={"a": 1}, b=2): - pass - @td.skip_if_no("xlrd") @td.skip_if_no("openpyxl") diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index 86141f08f5f2d..1f8fb4b801356 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -2,8 +2,6 @@ import pytest -from pandas.compat._optional import import_optional_dependency - import pandas as pd import pandas._testing as tm @@ -11,12 +9,6 @@ from pandas.io.excel._base import inspect_excel_format xlrd = pytest.importorskip("xlrd") -xlwt = pytest.importorskip("xlwt") - -pytestmark = pytest.mark.filterwarnings( - "ignore:As the xlwt package is no longer maintained:FutureWarning" -) - exts = [".xls"] @@ -31,54 +23,27 @@ def read_ext_xlrd(request): return request.param -def test_read_xlrd_book(read_ext_xlrd, frame): - df = frame - +def test_read_xlrd_book(read_ext_xlrd, datapath): engine = "xlrd" - sheet_name = "SheetA" + sheet_name = "Sheet1" + pth = datapath("io", "data", "excel", "test1.xls") + with xlrd.open_workbook(pth) as book: + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) - with tm.ensure_clean(read_ext_xlrd) as pth: - df.to_excel(pth, sheet_name) - with xlrd.open_workbook(pth) as book: - with ExcelFile(book, engine=engine) as xl: - result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) - tm.assert_frame_equal(df, result) + expected = pd.read_excel( + book, sheet_name=sheet_name, engine=engine, index_col=0 + ) + tm.assert_frame_equal(result, expected) - result = pd.read_excel( - book, sheet_name=sheet_name, engine=engine, index_col=0 - ) - tm.assert_frame_equal(df, result) - -def test_excel_file_warning_with_xlsx_file(datapath): - # GH 29375 - path = datapath("io", "data", "excel", "test1.xlsx") - has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None - if not has_openpyxl: - with tm.assert_produces_warning( - FutureWarning, - raise_on_extra_warnings=False, - match="The xlrd engine is no longer maintained", - ): - ExcelFile(path, engine=None) - else: - with tm.assert_produces_warning(None): - pd.read_excel(path, "Sheet1", engine=None) - - -def test_read_excel_warning_with_xlsx_file(datapath): +def test_read_xlsx_fails(datapath): # GH 29375 + from xlrd.biffh import XLRDError + path = datapath("io", "data", "excel", "test1.xlsx") - has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None - if not has_openpyxl: - with pytest.raises( - ValueError, - match="Your version of xlrd is ", - ): - pd.read_excel(path, "Sheet1", engine=None) - else: - with tm.assert_produces_warning(None): - pd.read_excel(path, "Sheet1", engine=None) + with pytest.raises(XLRDError, match="Excel xlsx file; not supported"): + pd.read_excel(path, engine="xlrd") @pytest.mark.parametrize( diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 82d47a13aefbc..477d3b05c0a74 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -1,5 +1,4 @@ import contextlib -import re import warnings import pytest @@ -65,17 +64,6 @@ def test_write_append_mode_raises(ext): ExcelWriter(f, engine="xlsxwriter", mode="a") -@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) -def test_kwargs(ext, nan_inf_to_errors): - # GH 42286 - kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} - with tm.ensure_clean(ext) as f: - msg = re.escape("Use of **kwargs is deprecated") - with tm.assert_produces_warning(FutureWarning, match=msg): - with ExcelWriter(f, engine="xlsxwriter", **kwargs) as writer: - assert writer.book.nan_inf_to_errors == nan_inf_to_errors - - @pytest.mark.parametrize("nan_inf_to_errors", [True, False]) def test_engine_kwargs(ext, nan_inf_to_errors): # GH 42286 diff --git a/pandas/tests/io/excel/test_xlwt.py b/pandas/tests/io/excel/test_xlwt.py deleted file mode 100644 index 3aa405eb1e275..0000000000000 --- a/pandas/tests/io/excel/test_xlwt.py +++ /dev/null @@ -1,146 +0,0 @@ -import re - -import numpy as np -import pytest - -from pandas import ( - DataFrame, - MultiIndex, - options, -) -import pandas._testing as tm - -from pandas.io.excel import ( - ExcelWriter, - _XlwtWriter, -) - -xlwt = pytest.importorskip("xlwt") - -pytestmark = pytest.mark.parametrize("ext,", [".xls"]) - - -def test_excel_raise_error_on_multiindex_columns_and_no_index(ext): - # MultiIndex as columns is not yet implemented 9794 - cols = MultiIndex.from_tuples( - [("site", ""), ("2014", "height"), ("2014", "weight")] - ) - df = DataFrame(np.random.randn(10, 3), columns=cols) - - msg = ( - "Writing to Excel with MultiIndex columns and no index " - "\\('index'=False\\) is not yet implemented." - ) - with pytest.raises(NotImplementedError, match=msg): - with tm.ensure_clean(ext) as path: - df.to_excel(path, index=False) - - -def test_excel_multiindex_columns_and_index_true(ext): - cols = MultiIndex.from_tuples( - [("site", ""), ("2014", "height"), ("2014", "weight")] - ) - df = DataFrame(np.random.randn(10, 3), columns=cols) - with tm.ensure_clean(ext) as path: - df.to_excel(path, index=True) - - -def test_excel_multiindex_index(ext): - # MultiIndex as index works so assert no error #9794 - cols = MultiIndex.from_tuples( - [("site", ""), ("2014", "height"), ("2014", "weight")] - ) - df = DataFrame(np.random.randn(3, 10), index=cols) - with tm.ensure_clean(ext) as path: - df.to_excel(path, index=False) - - -def test_to_excel_styleconverter(ext): - hstyle = { - "font": {"bold": True}, - "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, - "alignment": {"horizontal": "center", "vertical": "top"}, - } - - xls_style = _XlwtWriter._convert_to_style(hstyle) - assert xls_style.font.bold - assert xlwt.Borders.THIN == xls_style.borders.top - assert xlwt.Borders.THIN == xls_style.borders.right - assert xlwt.Borders.THIN == xls_style.borders.bottom - assert xlwt.Borders.THIN == xls_style.borders.left - assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz - assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert - - -def test_write_append_mode_raises(ext): - msg = "Append mode is not supported with xlwt!" - - with tm.ensure_clean(ext) as f: - with pytest.raises(ValueError, match=msg): - ExcelWriter(f, engine="xlwt", mode="a") - - -def test_to_excel_xlwt_warning(ext): - # GH 26552 - df = DataFrame(np.random.randn(3, 10)) - with tm.ensure_clean(ext) as path: - with tm.assert_produces_warning( - FutureWarning, - match="As the xlwt package is no longer maintained", - ): - df.to_excel(path) - - -def test_option_xls_writer_deprecated(ext): - # GH 26552 - with tm.assert_produces_warning( - FutureWarning, - match="As the xlwt package is no longer maintained", - check_stacklevel=False, - ): - options.io.excel.xls.writer = "xlwt" - - -@pytest.mark.parametrize("style_compression", [0, 2]) -def test_kwargs(ext, style_compression): - # GH 42286 - kwargs = {"style_compression": style_compression} - with tm.ensure_clean(ext) as f: - msg = re.escape("Use of **kwargs is deprecated") - with tm.assert_produces_warning(FutureWarning, match=msg): - with ExcelWriter(f, engine="xlwt", **kwargs) as writer: - assert ( - writer.book._Workbook__styles.style_compression == style_compression - ) - # xlwt won't allow us to close without writing something - DataFrame().to_excel(writer) - - -@pytest.mark.parametrize("style_compression", [0, 2]) -def test_engine_kwargs(ext, style_compression): - # GH 42286 - engine_kwargs = {"style_compression": style_compression} - with tm.ensure_clean(ext) as f: - with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer: - assert writer.book._Workbook__styles.style_compression == style_compression - # xlwt won't allow us to close without writing something - DataFrame().to_excel(writer) - - -def test_book_and_sheets_consistent(ext): - # GH#45687 - Ensure sheets is updated if user modifies book - with tm.ensure_clean(ext) as f: - with ExcelWriter(f) as writer: - assert writer.sheets == {} - sheet = writer.book.add_sheet("test_name") - assert writer.sheets == {"test_name": sheet} - - -@pytest.mark.parametrize("attr", ["fm_date", "fm_datetime"]) -def test_deprecated_attr(ext, attr): - # GH#45572 - with tm.ensure_clean(ext) as path: - with ExcelWriter(path, engine="xlwt") as writer: - msg = f"{attr} is not part of the public API" - with tm.assert_produces_warning(FutureWarning, match=msg): - getattr(writer, attr) diff --git a/pandas/tests/io/formats/style/test_deprecated.py b/pandas/tests/io/formats/style/test_deprecated.py deleted file mode 100644 index 863c31ed3cccd..0000000000000 --- a/pandas/tests/io/formats/style/test_deprecated.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -modules collects tests for Styler methods which have been deprecated -""" -import numpy as np -import pytest - -jinja2 = pytest.importorskip("jinja2") - -from pandas import ( - DataFrame, - IndexSlice, - NaT, - Timestamp, -) -import pandas._testing as tm - - -@pytest.fixture -def df(): - return DataFrame({"A": [0, 1], "B": np.random.randn(2)}) - - -@pytest.mark.parametrize("axis", ["index", "columns"]) -def test_hide_index_columns(df, axis): - with tm.assert_produces_warning(FutureWarning): - getattr(df.style, "hide_" + axis)() - - -def test_set_non_numeric_na(): - # GH 21527 28358 - df = DataFrame( - { - "object": [None, np.nan, "foo"], - "datetime": [None, NaT, Timestamp("20120101")], - } - ) - - with tm.assert_produces_warning(FutureWarning): - ctx = df.style.set_na_rep("NA")._translate(True, True) - assert ctx["body"][0][1]["display_value"] == "NA" - assert ctx["body"][0][2]["display_value"] == "NA" - assert ctx["body"][1][1]["display_value"] == "NA" - assert ctx["body"][1][2]["display_value"] == "NA" - - -def test_where_with_one_style(df): - # GH 17474 - def f(x): - return x > 0.5 - - style1 = "foo: bar" - - with tm.assert_produces_warning(FutureWarning): - result = df.style.where(f, style1)._compute().ctx - expected = { - (r, c): [("foo", "bar")] - for r, row in enumerate(df.index) - for c, col in enumerate(df.columns) - if f(df.loc[row, col]) - } - assert result == expected - - -@pytest.mark.parametrize( - "slice_", - [ - IndexSlice[:], - IndexSlice[:, ["A"]], - IndexSlice[[1], :], - IndexSlice[[1], ["A"]], - IndexSlice[:2, ["A", "B"]], - ], -) -def test_where_subset(df, slice_): - # GH 17474 - def f(x): - return x > 0.5 - - style1 = "foo: bar" - style2 = "baz: foo" - - with tm.assert_produces_warning(FutureWarning): - res = df.style.where(f, style1, style2, subset=slice_)._compute().ctx - expected = { - (r, c): [("foo", "bar") if f(df.loc[row, col]) else ("baz", "foo")] - for r, row in enumerate(df.index) - for c, col in enumerate(df.columns) - if row in df.loc[slice_].index and col in df.loc[slice_].columns - } - assert res == expected - - -def test_where_subset_compare_with_applymap(df): - # GH 17474 - def f(x): - return x > 0.5 - - style1 = "foo: bar" - style2 = "baz: foo" - - def g(x): - return style1 if f(x) else style2 - - slices = [ - IndexSlice[:], - IndexSlice[:, ["A"]], - IndexSlice[[1], :], - IndexSlice[[1], ["A"]], - IndexSlice[:2, ["A", "B"]], - ] - - for slice_ in slices: - with tm.assert_produces_warning(FutureWarning): - result = df.style.where(f, style1, style2, subset=slice_)._compute().ctx - expected = df.style.applymap(g, subset=slice_)._compute().ctx - assert result == expected - - -def test_where_kwargs(): - df = DataFrame([[1, 2], [3, 4]]) - - def f(x, val): - return x > val - - with tm.assert_produces_warning(FutureWarning): - res = df.style.where(f, "color:green;", "color:red;", val=2)._compute().ctx - expected = { - (0, 0): [("color", "red")], - (0, 1): [("color", "red")], - (1, 0): [("color", "green")], - (1, 1): [("color", "green")], - } - assert res == expected - - -def test_set_na_rep(): - # GH 21527 28358 - df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) - - with tm.assert_produces_warning(FutureWarning): - ctx = df.style.set_na_rep("NA")._translate(True, True) - assert ctx["body"][0][1]["display_value"] == "NA" - assert ctx["body"][0][2]["display_value"] == "NA" - - with tm.assert_produces_warning(FutureWarning): - ctx = ( - df.style.set_na_rep("NA") - .format(None, na_rep="-", subset=["B"]) - ._translate(True, True) - ) - assert ctx["body"][0][1]["display_value"] == "NA" - assert ctx["body"][0][2]["display_value"] == "-" - - -def test_precision(df): - styler = df.style - with tm.assert_produces_warning(FutureWarning): - s2 = styler.set_precision(1) - assert styler is s2 - assert styler.precision == 1 - - -def test_render(df): - with tm.assert_produces_warning(FutureWarning): - df.style.render() - - -def test_null_color(df): - with tm.assert_produces_warning(FutureWarning): - df.style.highlight_null(null_color="blue") diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 46891863975ea..43eb4cb0502a1 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -802,7 +802,7 @@ def test_rendered_links(type, text, exp, found): df = DataFrame([0], index=[text]) styler = df.style.format_index(hyperlinks="html") - rendered = '{0}'.format(found) + rendered = f'{found}' result = styler.to_html() assert (rendered in result) is exp assert (text in result) is not exp # test conversion done when expected and not @@ -810,6 +810,7 @@ def test_rendered_links(type, text, exp, found): def test_multiple_rendered_links(): links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") + # pylint: disable-next=consider-using-f-string df = DataFrame(["text {} {} text {} {}".format(*links)]) result = df.style.format(hyperlinks="html").to_html() href = '{0}' diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index c5b05b4e0d0c1..c19f27dc064d1 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -13,7 +13,6 @@ import matplotlib as mpl from pandas.io.formats.style import Styler -from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 @pytest.fixture @@ -263,7 +262,7 @@ def test_background_gradient_gmap_wrong_series(styler_blank): @pytest.mark.parametrize( "cmap", - ["PuBu", mpl.colormaps["PuBu"] if mpl_ge_3_6_0() else mpl.cm.get_cmap("PuBu")], + ["PuBu", mpl.colormaps["PuBu"]], ) def test_bar_colormap(cmap): data = DataFrame([[1, 2], [3, 4]]) @@ -288,3 +287,17 @@ def test_bar_color_raises(df): msg = "`color` and `cmap` cannot both be given" with pytest.raises(ValueError, match=msg): df.style.bar(color="something", cmap="something else").to_html() + + +@pytest.mark.parametrize( + "plot_method", + ["scatter", "hexbin"], +) +def test_pass_colormap_instance(df, plot_method): + # https://github.com/pandas-dev/pandas/issues/49374 + cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]]) + df["c"] = df.A + df.B + kwargs = dict(x="A", y="B", c="c", colormap=cmap) + if plot_method == "hexbin": + kwargs["C"] = kwargs.pop("c") + getattr(df.plot, plot_method)(**kwargs) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 77a996b1f92d6..c97505eacd4c4 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -272,8 +272,6 @@ def test_copy(comprehensive, render, deepcopy, mi_styler, mi_styler_comp): styler.to_html() excl = [ - "na_rep", # deprecated - "precision", # deprecated "cellstyle_map", # render time vars.. "cellstyle_map_columns", "cellstyle_map_index", @@ -333,8 +331,6 @@ def test_clear(mi_styler_comp): "cellstyle_map", # execution time only "cellstyle_map_columns", # execution time only "cellstyle_map_index", # execution time only - "precision", # deprecated - "na_rep", # deprecated "template_latex", # render templates are class level "template_html", "template_html_style", @@ -657,10 +653,10 @@ def test_apply_dataframe_return(self, index, columns): ) @pytest.mark.parametrize("axis", [0, 1]) def test_apply_subset(self, slice_, axis, df): - def h(x, foo="bar"): - return Series(f"color: {foo}", index=x.index, name=x.name) + def h(x, color="bar"): + return Series(f"color: {color}", index=x.index, name=x.name) - result = df.style.apply(h, axis=axis, subset=slice_, foo="baz")._compute().ctx + result = df.style.apply(h, axis=axis, subset=slice_, color="baz")._compute().ctx expected = { (r, c): [("color", "baz")] for r, row in enumerate(df.index) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b3e2e81e95613..f870ef25991df 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -45,8 +45,8 @@ ) import pandas._testing as tm +from pandas.io.formats import printing import pandas.io.formats.format as fmt -import pandas.io.formats.printing as printing use_32bit_repr = is_platform_windows() or not IS64 @@ -209,20 +209,6 @@ def test_show_counts(self, row, columns, show_counts, result): df.info(buf=buf, show_counts=show_counts) assert ("non-null" in buf.getvalue()) is result - def test_show_null_counts_deprecation(self): - # GH37999 - df = DataFrame(1, columns=range(10), index=range(10)) - with tm.assert_produces_warning( - FutureWarning, match="null_counts is deprecated.+" - ): - buf = StringIO() - df.info(buf=buf, null_counts=True) - assert "non-null" in buf.getvalue() - - # GH37999 - with pytest.raises(ValueError, match=r"null_counts used with show_counts.+"): - df.info(null_counts=True, show_counts=True) - def test_repr_truncation(self): max_len = 20 with option_context("display.max_colwidth", max_len): diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 4fc8a46bad777..5ab7ff085f539 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -5,8 +5,8 @@ import pandas as pd +from pandas.io.formats import printing import pandas.io.formats.format as fmt -import pandas.io.formats.printing as printing def test_adjoin(): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 2b86e9c7b3de2..51f607f425fa2 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -388,9 +388,7 @@ def test_to_csv_single_level_multi_index(self, ind, expected, frame_or_series): # see gh-19589 obj = frame_or_series(pd.Series([1], ind, name="data")) - with tm.assert_produces_warning(FutureWarning, match="lineterminator"): - # GH#9568 standardize on lineterminator matching stdlib - result = obj.to_csv(line_terminator="\n", header=True) + result = obj.to_csv(lineterminator="\n", header=True) assert result == expected def test_to_csv_string_array_ascii(self): diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index f8015851c9a83..11ee41ed40ce8 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -1335,6 +1335,7 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): placeholder = "{}" if any(names) and 1 in axes else " " col_names = [n if (bool(n) and 1 in axes) else placeholder for n in names] observed = df.to_latex() + # pylint: disable-next=consider-using-f-string expected = r"""\begin{tabular}{llrrrr} \toprule & %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\ diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index b66631a7d943e..a6a0b2781dc3b 100644 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -243,9 +243,6 @@ def create_data(): "tz": Timestamp("2011-01-01", tz="US/Eastern"), } - timestamp["freq"] = Timestamp("2011-01-01", freq="D") - timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") - off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), @@ -327,7 +324,7 @@ def write_legacy_file(): sys.path.insert(0, ".") if not 3 <= len(sys.argv) <= 4: - exit( + sys.exit( "Specify output directory and storage type: generate_legacy_" "storage_files.py " ) @@ -338,7 +335,7 @@ def write_legacy_file(): if storage_type == "pickle": write_legacy_pickles(output_dir=output_dir) else: - exit("storage_type must be one of {'pickle'}") + sys.exit("storage_type must be one of {'pickle'}") if __name__ == "__main__": diff --git a/pandas/tests/io/json/test_deprecated_kwargs.py b/pandas/tests/io/json/test_deprecated_kwargs.py index 79245bc9d34a8..7e3296db75323 100644 --- a/pandas/tests/io/json/test_deprecated_kwargs.py +++ b/pandas/tests/io/json/test_deprecated_kwargs.py @@ -8,19 +8,6 @@ from pandas.io.json import read_json -def test_deprecated_kwargs(): - df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) - buf = df.to_json(orient="split") - with tm.assert_produces_warning(FutureWarning): - tm.assert_frame_equal(df, read_json(buf, "split")) - buf = df.to_json(orient="columns") - with tm.assert_produces_warning(FutureWarning): - tm.assert_frame_equal(df, read_json(buf, "columns")) - buf = df.to_json(orient="index") - with tm.assert_produces_warning(FutureWarning): - tm.assert_frame_equal(df, read_json(buf, "index")) - - def test_good_kwargs(): df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) with tm.assert_produces_warning(None): diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 28545b7ab2cc6..109c6dbb469c9 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -423,11 +423,9 @@ def test_encode_recursion_max(self): class O2: member = 0 - pass class O1: member = 0 - pass decoded_input = O1() decoded_input.member = O2() diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 52d8abe76ecbc..24b18c8657546 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -21,7 +21,6 @@ from pandas import ( DataFrame, Index, - Series, Timestamp, compat, ) @@ -128,39 +127,6 @@ def test_1000_sep(all_parsers): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("squeeze", [True, False]) -def test_squeeze(all_parsers, squeeze): - data = """\ -a,1 -b,2 -c,3 -""" - parser = all_parsers - index = Index(["a", "b", "c"], name=0) - expected = Series([1, 2, 3], name=1, index=index) - - result = parser.read_csv_check_warnings( - FutureWarning, - "The squeeze argument has been deprecated " - "and will be removed in a future version. " - 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', - StringIO(data), - index_col=0, - header=None, - squeeze=squeeze, - ) - if not squeeze: - expected = DataFrame(expected) - tm.assert_frame_equal(result, expected) - else: - tm.assert_series_equal(result, expected) - - # see gh-8217 - # - # Series should not be a view. - assert not result._is_view - - @xfail_pyarrow def test_unnamed_columns(all_parsers): data = """A,B,C,, @@ -661,8 +627,8 @@ def test_read_table_same_signature_as_read_csv(all_parsers): assert table_param.annotation == csv_param.annotation assert table_param.kind == csv_param.kind continue - else: - assert table_param == csv_param + + assert table_param == csv_param def test_read_table_equivalency_to_read_csv(all_parsers): @@ -805,17 +771,6 @@ def test_read_csv_line_break_as_separator(kwargs, all_parsers): parser.read_csv(StringIO(data), **kwargs) -def test_read_csv_posargs_deprecation(all_parsers): - # GH 41485 - f = StringIO("a,b\n1,2") - parser = all_parsers - msg = ( - "In a future version of pandas all arguments of read_csv " - "except for the argument 'filepath_or_buffer' will be keyword-only" - ) - parser.read_csv_check_warnings(FutureWarning, msg, f, " ") - - @pytest.mark.parametrize("delimiter", [",", "\t"]) def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): # GH: 35958 @@ -832,36 +787,6 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): parser.read_table(f, delim_whitespace=True, delimiter=delimiter) -@pytest.mark.parametrize("func", ["read_csv", "read_table"]) -def test_names_and_prefix_not_None_raises(all_parsers, func): - # GH#39123 - f = StringIO("a,b\n1,2") - parser = all_parsers - msg = "Specified named and prefix; you can only specify one." - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - getattr(parser, func)(f, names=["a", "b"], prefix="x") - - -@pytest.mark.parametrize("func", ["read_csv", "read_table"]) -@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)]) -def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func): - # GH42387 - f = StringIO("a,b\n1,2") - expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]}) - parser = all_parsers - if prefix is not None: - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = getattr(parser, func)( - f, names=names, sep=",", prefix=prefix, header=None - ) - else: - result = getattr(parser, func)( - f, names=names, sep=",", prefix=prefix, header=None - ) - tm.assert_frame_equal(result, expected) - - @xfail_pyarrow def test_dict_keys_as_names(all_parsers): # GH: 36928 @@ -894,22 +819,6 @@ def test_encoding_surrogatepass(all_parsers): parser.read_csv(path) -@xfail_pyarrow -@pytest.mark.parametrize("on_bad_lines", ["error", "warn"]) -def test_deprecated_bad_lines_warns(all_parsers, csv1, on_bad_lines): - # GH 15122 - parser = all_parsers - kwds = {f"{on_bad_lines}_bad_lines": False} - parser.read_csv_check_warnings( - FutureWarning, - f"The {on_bad_lines}_bad_lines argument has been deprecated " - "and will be removed in a future version. " - "Use on_bad_lines in the future.\n\n", - csv1, - **kwds, - ) - - def test_malformed_second_line(all_parsers): # see GH14782 parser = all_parsers @@ -941,17 +850,6 @@ def test_short_multi_line(all_parsers): tm.assert_frame_equal(result, expected) -def test_read_table_posargs_deprecation(all_parsers): - # https://github.com/pandas-dev/pandas/issues/41485 - data = StringIO("a\tb\n1\t2") - parser = all_parsers - msg = ( - "In a future version of pandas all arguments of read_table " - "except for the argument 'filepath_or_buffer' will be keyword-only" - ) - parser.read_table_check_warnings(FutureWarning, msg, data, " ") - - def test_read_seek(all_parsers): # GH48646 parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index fc30ebff0d93a..aec0d57bc0fc4 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -121,18 +121,6 @@ def test_raise_on_no_columns(all_parsers, nrows): parser.read_csv(StringIO(data)) -def test_read_csv_raises_on_header_prefix(all_parsers): - # gh-27394 - parser = all_parsers - msg = "Argument prefix must be None if argument header is not None" - - s = StringIO("0,1\n2,3") - - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - parser.read_csv(s, header=0, prefix="_X") - - def test_unexpected_keyword_parameter_exception(all_parsers): # GH-34976 parser = all_parsers @@ -144,66 +132,36 @@ def test_unexpected_keyword_parameter_exception(all_parsers): parser.read_table("foo.tsv", foo=1) -@pytest.mark.parametrize( - "kwargs", - [ - pytest.param( - {"error_bad_lines": False, "warn_bad_lines": False}, - marks=pytest.mark.filterwarnings("ignore"), - ), - {"on_bad_lines": "skip"}, - ], -) -def test_suppress_error_output(all_parsers, capsys, kwargs): +def test_suppress_error_output(all_parsers, capsys): # see gh-15925 parser = all_parsers data = "a\n1\n1,2,3\n4\n5,6,7" expected = DataFrame({"a": [1, 4]}) - result = parser.read_csv(StringIO(data), **kwargs) + result = parser.read_csv(StringIO(data), on_bad_lines="skip") tm.assert_frame_equal(result, expected) captured = capsys.readouterr() assert captured.err == "" -@pytest.mark.filterwarnings("ignore") -@pytest.mark.parametrize( - "kwargs", - [{}, {"error_bad_lines": True}], # Default is True. # Explicitly pass in. -) -@pytest.mark.parametrize( - "warn_kwargs", - [{}, {"warn_bad_lines": True}, {"warn_bad_lines": False}], -) -def test_error_bad_lines(all_parsers, kwargs, warn_kwargs): +def test_error_bad_lines(all_parsers): # see gh-15925 parser = all_parsers - kwargs.update(**warn_kwargs) data = "a\n1\n1,2,3\n4\n5,6,7" msg = "Expected 1 fields in line 3, saw 3" with pytest.raises(ParserError, match=msg): - parser.read_csv(StringIO(data), **kwargs) - - -@pytest.mark.parametrize( - "kwargs", - [ - pytest.param( - {"error_bad_lines": False, "warn_bad_lines": True}, - marks=pytest.mark.filterwarnings("ignore"), - ), - {"on_bad_lines": "warn"}, - ], -) -def test_warn_bad_lines(all_parsers, capsys, kwargs): + parser.read_csv(StringIO(data), on_bad_lines="error") + + +def test_warn_bad_lines(all_parsers, capsys): # see gh-15925 parser = all_parsers data = "a\n1\n1,2,3\n4\n5,6,7" expected = DataFrame({"a": [1, 4]}) - result = parser.read_csv(StringIO(data), **kwargs) + result = parser.read_csv(StringIO(data), on_bad_lines="warn") tm.assert_frame_equal(result, expected) captured = capsys.readouterr() @@ -278,20 +236,6 @@ def test_invalid_on_bad_line(all_parsers): parser.read_csv(StringIO(data), on_bad_lines="abc") -@pytest.mark.parametrize("error_bad_lines", [True, False]) -@pytest.mark.parametrize("warn_bad_lines", [True, False]) -def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines): - parser = all_parsers - data = "a\n1\n1,2,3\n4\n5,6,7" - kwds = {"error_bad_lines": error_bad_lines, "warn_bad_lines": warn_bad_lines} - with pytest.raises( - ValueError, - match="Both on_bad_lines and error_bad_lines/warn_bad_lines are set. " - "Please only set on_bad_lines.", - ): - parser.read_csv(StringIO(data), on_bad_lines="error", **kwds) - - def test_bad_header_uniform_error(all_parsers): parser = all_parsers data = "+++123456789...\ncol1,col2,col3,col4\n1,2,3,4\n" diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index e2c7f77aae815..030b38cceeb39 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -9,7 +9,6 @@ import pytest from pandas.errors import ParserWarning -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -22,13 +21,10 @@ StringArray, ) -# TODO(1.4): Change me into xfail at release time -# and xfail individual tests -pytestmark = pytest.mark.usefixtures("pyarrow_skip") - @pytest.mark.parametrize("dtype", [str, object]) @pytest.mark.parametrize("check_orig", [True, False]) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_all_columns(all_parsers, dtype, check_orig): # see gh-3795, gh-6607 parser = all_parsers @@ -53,6 +49,7 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_per_column(all_parsers): parser = all_parsers data = """\ @@ -71,6 +68,7 @@ def test_dtype_per_column(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_invalid_dtype_per_column(all_parsers): parser = all_parsers data = """\ @@ -84,6 +82,7 @@ def test_invalid_dtype_per_column(all_parsers): parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"}) +@pytest.mark.usefixtures("pyarrow_xfail") def test_raise_on_passed_int_dtype_with_nas(all_parsers): # see gh-2631 parser = all_parsers @@ -101,6 +100,7 @@ def test_raise_on_passed_int_dtype_with_nas(all_parsers): parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_with_converters(all_parsers): parser = all_parsers data = """a,b @@ -132,6 +132,7 @@ def test_numeric_dtype(all_parsers, dtype): tm.assert_frame_equal(expected, result) +@pytest.mark.usefixtures("pyarrow_xfail") def test_boolean_dtype(all_parsers): parser = all_parsers data = "\n".join( @@ -184,6 +185,7 @@ def test_boolean_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_delimiter_with_usecols_and_parse_dates(all_parsers): # GH#35873 result = all_parsers.read_csv( @@ -264,6 +266,7 @@ def test_skip_whitespace(c_parser_only, float_precision): tm.assert_series_equal(df.iloc[:, 1], pd.Series([1.2, 2.1, 1.0, 1.2], name="num")) +@pytest.mark.usefixtures("pyarrow_xfail") def test_true_values_cast_to_bool(all_parsers): # GH#34655 text = """a,b @@ -286,6 +289,7 @@ def test_true_values_cast_to_bool(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)]) def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): # GH#35211 @@ -300,6 +304,7 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_mangle_dup_cols_single_dtype(all_parsers): # GH#42022 parser = all_parsers @@ -309,6 +314,7 @@ def test_dtype_mangle_dup_cols_single_dtype(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtype_multi_index(all_parsers): # GH 42446 parser = all_parsers @@ -355,6 +361,7 @@ def test_nullable_int_dtype(all_parsers, any_int_ea_dtype): tm.assert_frame_equal(actual, expected) +@pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("default", ["float", "float64"]) def test_dtypes_defaultdict(all_parsers, default): # GH#41574 @@ -368,6 +375,7 @@ def test_dtypes_defaultdict(all_parsers, default): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtypes_defaultdict_mangle_dup_cols(all_parsers): # GH#41574 data = """a,b,a,b,b.1 @@ -381,6 +389,7 @@ def test_dtypes_defaultdict_mangle_dup_cols(all_parsers): tm.assert_frame_equal(result, expected) +@pytest.mark.usefixtures("pyarrow_xfail") def test_dtypes_defaultdict_invalid(all_parsers): # GH#41574 data = """a,b @@ -392,6 +401,7 @@ def test_dtypes_defaultdict_invalid(all_parsers): parser.read_csv(StringIO(data), dtype=dtype) +@pytest.mark.usefixtures("pyarrow_xfail") def test_use_nullable_dtypes(all_parsers): # GH#36712 @@ -435,11 +445,11 @@ def test_use_nullabla_dtypes_and_dtype(all_parsers): tm.assert_frame_equal(result, expected) -@td.skip_if_no("pyarrow") +@pytest.mark.usefixtures("pyarrow_xfail") @pytest.mark.parametrize("storage", ["pyarrow", "python"]) -def test_use_nullabla_dtypes_string(all_parsers, storage): +def test_use_nullable_dtypes_string(all_parsers, storage): # GH#36712 - import pyarrow as pa + pa = pytest.importorskip("pyarrow") with pd.option_context("mode.string_storage", storage): @@ -477,3 +487,40 @@ def test_use_nullable_dtypes_ea_dtype_specified(all_parsers): result = parser.read_csv(StringIO(data), dtype="Int64", use_nullable_dtypes=True) expected = DataFrame({"a": [1], "b": 2}, dtype="Int64") tm.assert_frame_equal(result, expected) + + +def test_use_nullable_dtypes_pyarrow_backend(all_parsers, request): + # GH#36712 + pa = pytest.importorskip("pyarrow") + parser = all_parsers + + data = """a,b,c,d,e,f,g,h,i,j +1,2.5,True,a,,,,,12-31-2019, +3,4.5,False,b,6,7.5,True,a,12-31-2019, +""" + with pd.option_context("io.nullable_backend", "pyarrow"): + if parser.engine != "pyarrow": + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"Not implemented with engine={parser.engine}", + ) + ) + result = parser.read_csv( + StringIO(data), use_nullable_dtypes=True, parse_dates=["i"] + ) + expected = DataFrame( + { + "a": pd.Series([1, 3], dtype="int64[pyarrow]"), + "b": pd.Series([2.5, 4.5], dtype="float64[pyarrow]"), + "c": pd.Series([True, False], dtype="bool[pyarrow]"), + "d": pd.Series(["a", "b"], dtype=pd.ArrowDtype(pa.string())), + "e": pd.Series([pd.NA, 6], dtype="int64[pyarrow]"), + "f": pd.Series([pd.NA, 7.5], dtype="float64[pyarrow]"), + "g": pd.Series([pd.NA, True], dtype="bool[pyarrow]"), + "h": pd.Series(["", "a"], dtype=pd.ArrowDtype(pa.string())), + "i": pd.Series([Timestamp("2019-12-31")] * 2), + "j": pd.Series([pd.NA, pd.NA], dtype="null[pyarrow]"), + } + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index ecc49ea8adb9f..ec08fb0d60648 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -176,6 +176,9 @@ def test_precise_conversion(c_parser_only): normal_errors = [] precise_errors = [] + def error(val: float, actual_val: Decimal) -> Decimal: + return abs(Decimal(f"{val:.100}") - actual_val) + # test numbers between 1 and 2 for num in np.linspace(1.0, 2.0, num=500): # 25 decimal digits of precision @@ -192,11 +195,8 @@ def test_precise_conversion(c_parser_only): ) actual_val = Decimal(text[2:]) - def error(val): - return abs(Decimal(f"{val:.100}") - actual_val) - - normal_errors.append(error(normal_val)) - precise_errors.append(error(precise_val)) + normal_errors.append(error(normal_val, actual_val)) + precise_errors.append(error(precise_val, actual_val)) # round-trip should match float() assert roundtrip_val == float(text[2:]) diff --git a/pandas/tests/io/parser/test_dialect.py b/pandas/tests/io/parser/test_dialect.py index 458d4116558e4..7d2bb6c083cda 100644 --- a/pandas/tests/io/parser/test_dialect.py +++ b/pandas/tests/io/parser/test_dialect.py @@ -97,9 +97,9 @@ def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, val # arg=None tests when we pass in the dialect without any other arguments. if arg is not None: - if "value" == "dialect": # No conflict --> no warning. + if value == "dialect": # No conflict --> no warning. kwds[arg] = dialect_kwargs[arg] - elif "value" == "default": # Default --> no warning. + elif value == "default": # Default --> no warning. from pandas.io.parsers.base_parser import parser_defaults kwds[arg] = parser_defaults[arg] diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 4ded70db8bae7..5cb54bb4e2916 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -76,21 +76,6 @@ def test_bool_header_arg(all_parsers, header): parser.read_csv(StringIO(data), header=header) -def test_no_header_prefix(all_parsers): - parser = all_parsers - data = """1,2,3,4,5 -6,7,8,9,10 -11,12,13,14,15 -""" - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = parser.read_csv(StringIO(data), prefix="Field", header=None) - expected = DataFrame( - [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], - columns=["Field0", "Field1", "Field2", "Field3", "Field4"], - ) - tm.assert_frame_equal(result, expected) - - @skip_pyarrow def test_header_with_index_col(all_parsers): parser = all_parsers @@ -442,7 +427,6 @@ def test_read_only_header_no_rows(all_parsers, kwargs): "kwargs,names", [ ({}, [0, 1, 2, 3, 4]), - ({"prefix": "X"}, ["X0", "X1", "X2", "X3", "X4"]), ( {"names": ["foo", "bar", "baz", "quux", "panda"]}, ["foo", "bar", "baz", "quux", "panda"], @@ -458,11 +442,7 @@ def test_no_header(all_parsers, kwargs, names): expected = DataFrame( [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names ) - if "prefix" in kwargs.keys(): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = parser.read_csv(StringIO(data), header=None, **kwargs) - else: - result = parser.read_csv(StringIO(data), header=None, **kwargs) + result = parser.read_csv(StringIO(data), header=None, **kwargs) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 40b08e6d68016..1a8149ae41fcb 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -169,17 +169,11 @@ def date_parser(*date_cols): kwds = { "header": None, "date_parser": date_parser, - "prefix": "X", "parse_dates": {"actual": [1, 2], "nominal": [1, 3]}, "keep_date_col": keep_date_col, + "names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"], } - result = parser.read_csv_check_warnings( - FutureWarning, - "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", - StringIO(data), - **kwds, - ) + result = parser.read_csv(StringIO(data), **kwds) expected = DataFrame( [ @@ -313,17 +307,11 @@ def test_multiple_date_col(all_parsers, keep_date_col): parser = all_parsers kwds = { "header": None, - "prefix": "X", "parse_dates": [[1, 2], [1, 3]], "keep_date_col": keep_date_col, + "names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"], } - result = parser.read_csv_check_warnings( - FutureWarning, - "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", - StringIO(data), - **kwds, - ) + result = parser.read_csv(StringIO(data), **kwds) expected = DataFrame( [ @@ -436,14 +424,13 @@ def test_date_col_as_index_col(all_parsers): KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 """ parser = all_parsers - kwds = {"header": None, "prefix": "X", "parse_dates": [1], "index_col": 1} - result = parser.read_csv_check_warnings( - FutureWarning, - "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", - StringIO(data), - **kwds, - ) + kwds = { + "header": None, + "parse_dates": [1], + "index_col": 1, + "names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7"], + } + result = parser.read_csv(StringIO(data), **kwds) index = Index( [ @@ -489,17 +476,10 @@ def test_multiple_date_cols_int_cast(all_parsers): kwds = { "header": None, - "prefix": "X", "parse_dates": parse_dates, "date_parser": pd.to_datetime, } - result = parser.read_csv_check_warnings( - FutureWarning, - "The prefix argument has been deprecated " - "and will be removed in a future version. .*\n\n", - StringIO(data), - **kwds, - ) + result = parser.read_csv(StringIO(data), **kwds) expected = DataFrame( [ @@ -530,7 +510,7 @@ def test_multiple_date_cols_int_cast(all_parsers): -0.59, ], ], - columns=["actual", "nominal", "X0", "X4"], + columns=["actual", "nominal", 0, 4], ) # Python can sometimes be flaky about how @@ -1212,7 +1192,7 @@ def test_read_with_parse_dates_scalar_non_bool(all_parsers, kwargs): parser.read_csv(StringIO(data), parse_dates="C", **kwargs) -@pytest.mark.parametrize("parse_dates", [(1,), np.array([4, 5]), {1, 3, 3}]) +@pytest.mark.parametrize("parse_dates", [(1,), np.array([4, 5]), {1, 3}]) def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates): parser = all_parsers msg = ( @@ -1673,7 +1653,6 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): result = call(date_string, **kwargs) except ValueError as er: msg = str(er) - pass return msg, result diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index d6d787df39dfa..3e451239dcd40 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -910,18 +910,6 @@ def test_skiprows_with_iterator(): tm.assert_frame_equal(result, expected_frames[i]) -def test_skiprows_passing_as_positional_deprecated(): - # GH#41485 - data = """0 -1 -2 -""" - with tm.assert_produces_warning(FutureWarning, match="keyword-only"): - result = read_fwf(StringIO(data), [(0, 2)]) - expected = DataFrame({"0": [1, 2]}) - tm.assert_frame_equal(result, expected) - - def test_names_and_infer_colspecs(): # GH#45337 data = """X Y Z diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index aef1937dcf287..bbf159845b1d6 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas.errors import ParserError + from pandas import ( DataFrame, Index, @@ -402,20 +404,14 @@ def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols): @pytest.mark.parametrize("names", [None, ["a", "b"]]) def test_usecols_indices_out_of_bounds(all_parsers, names): - # GH#25623 + # GH#25623 & GH 41130; enforced in 2.0 parser = all_parsers data = """ a,b 1,2 """ - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False - ): - result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0) - expected = DataFrame({"a": [1], "b": [None]}) - if names is None and parser.engine == "python": - expected = DataFrame({"a": [1]}) - tm.assert_frame_equal(result, expected) + with pytest.raises(ParserError, match="Defining usecols without of bounds"): + parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0) def test_usecols_additional_columns(all_parsers): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 08b1ee3f0ddbe..3add6e1482687 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1009,15 +1009,6 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): assert len(result) -def test_hdfstore_iteritems_deprecated(tmp_path, setup_path): - path = tmp_path / setup_path - df = DataFrame({"a": [1]}) - with HDFStore(path, mode="w") as hdf: - hdf.put("table", df) - with tm.assert_produces_warning(FutureWarning): - next(hdf.iteritems()) - - def test_hdfstore_strides(setup_path): # GH22073 df = DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py index 823d2875c5417..27843415f367e 100644 --- a/pandas/tests/io/pytables/test_subclass.py +++ b/pandas/tests/io/pytables/test_subclass.py @@ -13,6 +13,9 @@ ) pytest.importorskip("tables") +pytestmark = pytest.mark.filterwarnings( + "ignore:`np.object` is a deprecated alias:DeprecationWarning" +) class TestHDFStoreSubclass: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 4a6ec7cfd2ae3..145682b484100 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -317,9 +317,6 @@ def test_read_expands_user_home_dir( ), ], ) - @pytest.mark.filterwarnings( - "ignore:CategoricalBlock is deprecated:DeprecationWarning" - ) @pytest.mark.filterwarnings( # pytables np.object usage "ignore:`np.object` is a deprecated alias:DeprecationWarning" ) @@ -342,7 +339,7 @@ def test_read_fspath_all(self, reader, module, path, datapath): "writer_name, writer_kwargs, module", [ ("to_csv", {}, "os"), - ("to_excel", {"engine": "xlwt"}, "xlwt"), + ("to_excel", {"engine": "openpyxl"}, "openpyxl"), ("to_feather", {}, "pyarrow"), ("to_html", {}, "os"), ("to_json", {}, "os"), diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 236a7f9e1a9c1..eaeb769a94c38 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -15,7 +15,6 @@ @filter_sparse @pytest.mark.single_cpu -@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestFeather: def check_error_on_write(self, df, exc, err_msg): # check that we are raising the exception diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 82f5bdda2a4c5..a1c597087788c 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -74,13 +74,9 @@ def test_to_csv(cleared_fs, df1): tm.assert_frame_equal(df1, df2) -@pytest.mark.parametrize("ext", ["xls", "xlsx"]) -def test_to_excel(cleared_fs, ext, df1): - if ext == "xls": - pytest.importorskip("xlwt") - else: - pytest.importorskip("openpyxl") - +def test_to_excel(cleared_fs, df1): + pytest.importorskip("openpyxl") + ext = "xlsx" path = f"memory://test/test.{ext}" df1.to_excel(path, index=True) @@ -132,12 +128,9 @@ def test_read_table_options(fsspectest): assert fsspectest.test[0] == "csv_read" -@pytest.mark.parametrize("extension", ["xlsx", "xls"]) -def test_excel_options(fsspectest, extension): - if extension == "xls": - pytest.importorskip("xlwt") - else: - pytest.importorskip("openpyxl") +def test_excel_options(fsspectest): + pytest.importorskip("openpyxl") + extension = "xlsx" df = DataFrame({"a": [0]}) @@ -218,7 +211,7 @@ def test_from_s3_csv(s3_resource, tips_file, s3so): @td.skip_if_no("s3fs") def test_s3_protocols(s3_resource, tips_file, protocol, s3so): tm.assert_equal( - read_csv("%s://pandas-test/tips.csv" % protocol, storage_options=s3so), + read_csv(f"{protocol}://pandas-test/tips.csv", storage_options=s3so), read_csv(tips_file), ) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 3e94a31b3b25c..e3333025da547 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -74,7 +74,7 @@ def test_to_read_gcs(gcs_buffer, format): df1.to_csv(path, index=True) df2 = read_csv(path, parse_dates=["dt"], index_col=0) elif format == "excel": - path = "gs://test/test.xls" + path = "gs://test/test.xlsx" df1.to_excel(path) df2 = read_excel(path, parse_dates=["dt"], index_col=0) elif format == "json": diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 0ea1203359153..daa2dffeaa143 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -101,7 +101,6 @@ def test_same_ordering(datapath): pytest.param("bs4", marks=[td.skip_if_no("bs4"), td.skip_if_no("html5lib")]), pytest.param("lxml", marks=td.skip_if_no("lxml")), ], - scope="class", ) class TestReadHtml: @pytest.fixture @@ -112,7 +111,7 @@ def spam_data(self, datapath): def banklist_data(self, datapath): return datapath("io", "data", "html", "banklist.html") - @pytest.fixture(autouse=True, scope="function") + @pytest.fixture(autouse=True) def set_defaults(self, flavor): self.read_html = partial(read_html, flavor=flavor) yield @@ -126,39 +125,13 @@ def test_to_html_compat(self): c_idx_names=False, r_idx_names=False, ) - .applymap("{:.3f}".format) - .astype(float) + # pylint: disable-next=consider-using-f-string + .applymap("{:.3f}".format).astype(float) ) out = df.to_html() res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0] tm.assert_frame_equal(res, df) - @pytest.mark.network - @tm.network( - url=( - "https://www.fdic.gov/resources/resolutions/" - "bank-failures/failed-bank-list/index.html" - ), - check_before_test=True, - ) - def test_banklist_url_positional_match(self): - url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/index.html" # noqa E501 - # Passing match argument as positional should cause a FutureWarning. - with tm.assert_produces_warning(FutureWarning): - df1 = self.read_html( - # lxml cannot find attrs leave out for now - url, - "First Federal Bank of Florida", # attrs={"class": "dataTable"} - ) - with tm.assert_produces_warning(FutureWarning): - # lxml cannot find attrs leave out for now - df2 = self.read_html( - url, - "Metcalf Bank", - ) # attrs={"class": "dataTable"}) - - assert_framelist_equal(df1, df2) - @pytest.mark.network @tm.network( url=( diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 0bb320907b813..a0acf160854ac 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -14,10 +14,6 @@ pytest.importorskip("pyarrow.orc") -pytestmark = pytest.mark.filterwarnings( - "ignore:RangeIndex.* is deprecated:DeprecationWarning" -) - @pytest.fixture def dirpath(datapath): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9c85ab4ba4a57..75683a1d96bfb 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -53,11 +53,6 @@ _HAVE_FASTPARQUET = False -pytestmark = pytest.mark.filterwarnings( - "ignore:RangeIndex.* is deprecated:DeprecationWarning" -) - - # TODO(ArrayManager) fastparquet relies on BlockManager internals # setup engines & skips @@ -688,7 +683,6 @@ def test_read_empty_array(self, pa, dtype): ) -@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index d78cb9e46cd1a..930e547d5cba8 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -26,7 +26,6 @@ import uuid from warnings import ( catch_warnings, - filterwarnings, simplefilter, ) import zipfile @@ -56,10 +55,6 @@ MonthEnd, ) -pytestmark = pytest.mark.filterwarnings( - "ignore:Timestamp.freq is deprecated:FutureWarning" -) - @pytest.fixture(scope="module") def current_pickle_data(): @@ -67,10 +62,6 @@ def current_pickle_data(): from pandas.tests.io.generate_legacy_storage_files import create_pickle_data with catch_warnings(): - filterwarnings( - "ignore", "The 'freq' argument in Timestamp", category=FutureWarning - ) - return create_pickle_data() @@ -89,7 +80,6 @@ def compare_element(result, expected, typ): assert result is pd.NaT else: assert result == expected - assert result.freq == expected.freq else: comparator = getattr(tm, f"assert_{typ}_equal", tm.assert_almost_equal) comparator(result, expected) @@ -215,7 +205,6 @@ def python_unpickler(path): ], ) @pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler]) -@pytest.mark.filterwarnings("ignore:The 'freq' argument in Timestamp:FutureWarning") def test_round_trip_current(current_pickle_data, pickle_writer, writer): data = current_pickle_data for typ, dv in data.items(): @@ -254,28 +243,6 @@ def test_pickle_path_localpath(): tm.assert_frame_equal(df, result) -@pytest.mark.parametrize("typ", ["sparseseries", "sparseframe"]) -def test_legacy_sparse_warning(datapath, typ): - """ - - Generated with - - >>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [0, 0, 1, 1]}).to_sparse() - >>> df.to_pickle("pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz", - ... compression="gzip") - - >>> s = df['B'] - >>> s.to_pickle("pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz", - ... compression="gzip") - """ - with tm.assert_produces_warning(FutureWarning): - simplefilter("ignore", DeprecationWarning) # from boto - pd.read_pickle( - datapath("io", "data", "pickle", f"{typ}-0.20.3.pickle.gz"), - compression="gzip", - ) - - # --------------------- # test pickle compression # --------------------- diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 9adada8afb2c2..f89a06636a0c0 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -55,7 +55,7 @@ ) import pandas._testing as tm -import pandas.io.sql as sql +from pandas.io import sql from pandas.io.sql import ( SQLAlchemyEngine, SQLDatabase, @@ -2181,26 +2181,26 @@ def test_connectable_issue_example(self): # https://github.com/pandas-dev/pandas/issues/10104 from sqlalchemy.engine import Engine - def foo(connection): + def test_select(connection): query = "SELECT test_foo_data FROM test_foo_data" return sql.read_sql_query(query, con=connection) - def bar(connection, data): + def test_append(connection, data): data.to_sql(name="test_foo_data", con=connection, if_exists="append") - def baz(conn): + def test_connectable(conn): # https://github.com/sqlalchemy/sqlalchemy/commit/ # 00b5c10846e800304caa86549ab9da373b42fa5d#r48323973 - foo_data = foo(conn) - bar(conn, foo_data) + foo_data = test_select(conn) + test_append(conn, foo_data) def main(connectable): if isinstance(connectable, Engine): with connectable.connect() as conn: with conn.begin(): - baz(conn) + test_connectable(conn) else: - baz(connectable) + test_connectable(connectable) assert ( DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) @@ -2373,21 +2373,21 @@ def test_row_object_is_named_tuple(self): class Test(BaseModel): __tablename__ = "test_frame" id = Column(Integer, primary_key=True) - foo = Column(String(50)) + string_column = Column(String(50)) BaseModel.metadata.create_all(self.conn) Session = sessionmaker(bind=self.conn) with Session() as session: - df = DataFrame({"id": [0, 1], "foo": ["hello", "world"]}) + df = DataFrame({"id": [0, 1], "string_column": ["hello", "world"]}) assert ( df.to_sql("test_frame", con=self.conn, index=False, if_exists="replace") == 2 ) session.commit() - foo = session.query(Test.id, Test.foo) - df = DataFrame(foo) + test_query = session.query(Test.id, Test.string_column) + df = DataFrame(test_query) - assert list(df.columns) == ["id", "foo"] + assert list(df.columns) == ["id", "string_column"] class _TestMySQLAlchemy: diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index baebc562fc5ff..0829ece64c451 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1096,19 +1096,6 @@ def test_stylesheet_file(datapath): tm.assert_frame_equal(df_kml, df_iter) -def test_read_xml_passing_as_positional_deprecated(datapath, parser): - # GH#45133 - kml = datapath("io", "data", "xml", "cta_rail_lines.kml") - - with tm.assert_produces_warning(FutureWarning, match="keyword-only"): - read_xml( - kml, - ".//k:Placemark", - namespaces={"k": "http://www.opengis.net/kml/2.2"}, - parser=parser, - ) - - @td.skip_if_no("lxml") def test_stylesheet_file_like(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 55a5473ce7d0f..20de38ebf6665 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -47,7 +47,7 @@ def plt(self): @cache_readonly def colorconverter(self): - import matplotlib.colors as colors + from matplotlib import colors return colors.colorConverter @@ -510,18 +510,10 @@ def _unpack_cycler(self, rcParams, field="color"): return [v[field] for v in rcParams["axes.prop_cycle"]] def get_x_axis(self, ax): - from pandas.plotting._matplotlib.compat import mpl_ge_3_5_0 - - if mpl_ge_3_5_0(): - return ax._shared_axes["x"] - return ax._shared_x_axes + return ax._shared_axes["x"] def get_y_axis(self, ax): - from pandas.plotting._matplotlib.compat import mpl_ge_3_5_0 - - if mpl_ge_3_5_0(): - return ax._shared_axes["y"] - return ax._shared_y_axes + return ax._shared_axes["y"] def _check_plot_works(f, filterwarnings="always", default_axes=False, **kwargs): diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 44f57b02d0f0a..73b723ba7f597 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -22,6 +22,7 @@ Series, bdate_range, date_range, + plotting, ) import pandas._testing as tm from pandas.tests.plotting.common import ( @@ -30,17 +31,11 @@ ) from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - -try: - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 -except ImportError: - mpl_ge_3_6_0 = lambda: True @td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): - @pytest.mark.xfail(mpl_ge_3_6_0(), reason="Api changed") + @pytest.mark.xfail(reason="Api changed in 3.6.0") @pytest.mark.slow def test_plot(self): df = tm.makeTimeDataFrame() @@ -76,8 +71,6 @@ def test_plot(self): ax = _check_plot_works(df.plot, use_index=True) self._check_ticks_props(ax, xrot=0) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - _check_plot_works(df.plot, sort_columns=False) _check_plot_works(df.plot, yticks=[1, 5, 10]) _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) @@ -737,7 +730,6 @@ def test_plot_scatter_with_categorical_data(self, x, y): _check_plot_works(df.plot.scatter, x=x, y=y) def test_plot_scatter_with_c(self): - from pandas.plotting._matplotlib.compat import mpl_ge_3_4_0 df = DataFrame( np.random.randint(low=0, high=100, size=(6, 4)), @@ -750,10 +742,7 @@ def test_plot_scatter_with_c(self): # default to Greys assert ax.collections[0].cmap.name == "Greys" - if mpl_ge_3_4_0(): - assert ax.collections[0].colorbar.ax.get_ylabel() == "z" - else: - assert ax.collections[0].colorbar._label == "z" + assert ax.collections[0].colorbar.ax.get_ylabel() == "z" cm = "cubehelix" ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) @@ -1835,7 +1824,7 @@ def test_memory_leak(self): def test_df_gridspec_patterns(self): # GH 10819 - import matplotlib.gridspec as gridspec + from matplotlib import gridspec import matplotlib.pyplot as plt ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) @@ -2232,19 +2221,6 @@ def test_secondary_y(self, secondary_y): assert ax.get_ylim() == (0, 100) assert ax.get_yticks()[0] == 99 - def test_sort_columns_deprecated(self): - # GH 47563 - df = DataFrame({"a": [1, 2], "b": [3, 4]}) - - with tm.assert_produces_warning(FutureWarning): - df.plot.box("a", sort_columns=True) - - with tm.assert_produces_warning(FutureWarning): - df.plot.box(sort_columns=False) - - with tm.assert_produces_warning(False): - df.plot.box("a") - def _generate_4_axes_via_gridspec(): import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 2e860c2615322..ed129d315a0c6 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -199,14 +199,13 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("kw", ["c", "color"]) def test_scatter_with_c_column_name_with_colors(self, cmap, kw): # https://github.com/pandas-dev/pandas/issues/34316 - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) df["species"] = ["r", "r", "g", "g", "b"] - if mpl_ge_3_6_0() and cmap is not None: + if cmap is not None: with tm.assert_produces_warning(UserWarning, check_stacklevel=False): ax = df.plot.scatter(x=0, y=1, cmap=cmap, **{kw: "species"}) else: diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 9112d5cb3368f..ab7b2855768db 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -13,6 +13,7 @@ MultiIndex, Series, date_range, + plotting, timedelta_range, ) import pandas._testing as tm @@ -22,7 +23,6 @@ ) from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index f75e5cd3491a4..3149fa9cb2095 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -39,11 +39,6 @@ from pandas.core.indexes.timedeltas import timedelta_range from pandas.tests.plotting.common import TestPlotBase -try: - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 -except ImportError: - mpl_ge_3_6_0 = lambda: True - from pandas.tseries.offsets import WeekOfMonth @@ -265,7 +260,7 @@ def test_plot_multiple_inferred_freq(self): ser = Series(np.random.randn(len(dr)), index=dr) _check_plot_works(ser.plot) - @pytest.mark.xfail(mpl_ge_3_6_0(), reason="Api changed") + @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_uhf(self): import pandas.plotting._matplotlib.converter as conv @@ -1215,7 +1210,7 @@ def test_secondary_legend(self): # TODO: color cycle problems assert len(colors) == 4 - @pytest.mark.xfail(mpl_ge_3_6_0(), reason="Api changed") + @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_format_date_axis(self): rng = date_range("1/1/2012", periods=12, freq="M") df = DataFrame(np.random.randn(len(rng), 3), rng) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index dc586d15ba115..9d90f2e405803 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -18,11 +18,6 @@ _check_plot_works, ) -try: - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 -except ImportError: - mpl_ge_3_6_0 = lambda: True - @pytest.fixture def ts(): @@ -196,7 +191,7 @@ def test_hist_kwargs(self, ts): ax = ts.plot.hist(align="left", stacked=True, ax=ax) tm.close() - @pytest.mark.xfail(mpl_ge_3_6_0(), reason="Api changed") + @pytest.mark.xfail(reason="Api changed in 3.6.0") @td.skip_if_no_scipy def test_hist_kde(self, ts): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 93da7f71f51f9..67486ec2a17b6 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -11,6 +11,7 @@ Series, Timestamp, interval_range, + plotting, ) import pandas._testing as tm from pandas.tests.plotting.common import ( @@ -18,8 +19,6 @@ _check_plot_works, ) -import pandas.plotting as plotting - @td.skip_if_mpl def test_import_error_message(): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 816ce95dbb83a..d9505b4d593e6 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -12,6 +12,7 @@ DataFrame, Series, date_range, + plotting, ) import pandas._testing as tm from pandas.tests.plotting.common import ( @@ -19,13 +20,6 @@ _check_plot_works, ) -import pandas.plotting as plotting - -try: - from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 -except ImportError: - mpl_ge_3_6_0 = lambda: True - @pytest.fixture def ts(): @@ -498,7 +492,7 @@ def test_kde_missing_vals(self): # gh-14821: check if the values have any missing values assert any(~np.isnan(axes.lines[0].get_xdata())) - @pytest.mark.xfail(mpl_ge_3_6_0(), reason="Api changed") + @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_boxplot_series(self, ts): _, ax = self.plt.subplots() ax = ts.plot.box(logy=True, ax=ax) @@ -652,7 +646,7 @@ def test_standard_colors(self, c): assert result == [c] * 3 def test_standard_colors_all(self): - import matplotlib.colors as colors + from matplotlib import colors from pandas.plotting._matplotlib.style import get_standard_colors diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index e82ffdfef7b15..0c264c107d3d6 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -32,7 +32,7 @@ _get_timestamp_range_edges, ) -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets from pandas.tseries.offsets import Minute @@ -700,8 +700,7 @@ def test_asfreq_non_unique(): msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match="non-unique"): - ts.asfreq("B") + ts.asfreq("B") def test_resample_axis1(): diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py deleted file mode 100644 index 3aac7a961fa19..0000000000000 --- a/pandas/tests/resample/test_deprecated.py +++ /dev/null @@ -1,307 +0,0 @@ -from datetime import ( - datetime, - timedelta, -) - -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - Series, -) -import pandas._testing as tm -from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import ( - PeriodIndex, - period_range, -) -from pandas.core.indexes.timedeltas import timedelta_range - -from pandas.tseries.offsets import ( - BDay, - Minute, -) - -DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) -PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) -TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") - -all_ts = pytest.mark.parametrize( - "_index_factory,_series_name,_index_start,_index_end", - [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], -) - - -@pytest.fixture() -def _index_factory(): - return period_range - - -@pytest.fixture -def create_index(_index_factory): - def _create_index(*args, **kwargs): - """return the _index_factory created using the args, kwargs""" - return _index_factory(*args, **kwargs) - - return _create_index - - -# new test to check that all FutureWarning are triggered -def test_deprecating_on_loffset_and_base(): - # GH 31809 - - idx = date_range("2001-01-01", periods=4, freq="T") - df = DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"]) - - with tm.assert_produces_warning(FutureWarning): - pd.Grouper(freq="10s", base=0) - with tm.assert_produces_warning(FutureWarning): - pd.Grouper(freq="10s", loffset="0s") - - # not checking the stacklevel for .groupby().resample() because it's complicated to - # reconcile it with the stacklevel for Series.resample() and DataFrame.resample(); - # see GH #37603 - with tm.assert_produces_warning(FutureWarning): - df.groupby("a").resample("3T", base=0).sum() - with tm.assert_produces_warning(FutureWarning): - df.groupby("a").resample("3T", loffset="0s").sum() - msg = "'offset' and 'base' cannot be present at the same time" - with tm.assert_produces_warning(FutureWarning): - with pytest.raises(ValueError, match=msg): - df.groupby("a").resample("3T", base=0, offset=0).sum() - - with tm.assert_produces_warning(FutureWarning): - df.resample("3T", base=0).sum() - with tm.assert_produces_warning(FutureWarning): - df.resample("3T", loffset="0s").sum() - - -@all_ts -@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]]) -def test_resample_loffset_arg_type(frame, create_index, arg): - # GH 13218, 15002 - df = frame - expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] - expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D") - - # loffset coerces PeriodIndex to DateTimeIndex - if isinstance(expected_index, PeriodIndex): - expected_index = expected_index.to_timestamp() - - expected_index += timedelta(hours=2) - expected = DataFrame({"value": expected_means}, index=expected_index) - - with tm.assert_produces_warning(FutureWarning): - result_agg = df.resample("2D", loffset="2H").agg(arg) - - if isinstance(arg, list): - expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) - - tm.assert_frame_equal(result_agg, expected) - - -@pytest.mark.parametrize( - "loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")] -) -def test_resample_loffset(loffset): - # GH 7687 - rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") - s = Series(np.random.randn(14), index=rng) - - with tm.assert_produces_warning(FutureWarning): - result = s.resample( - "5min", closed="right", label="right", loffset=loffset - ).mean() - idx = date_range("1/1/2000", periods=4, freq="5min") - expected = Series( - [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], - index=idx + timedelta(minutes=1), - ) - tm.assert_series_equal(result, expected) - assert result.index.freq == Minute(5) - - # from daily - dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") - ser = Series(np.random.rand(len(dti)), dti) - - # to weekly - result = ser.resample("w-sun").last() - business_day_offset = BDay() - with tm.assert_produces_warning(FutureWarning): - expected = ser.resample("w-sun", loffset=-business_day_offset).last() - assert result.index[0] - business_day_offset == expected.index[0] - - -def test_resample_loffset_upsample(): - # GH 20744 - rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") - s = Series(np.random.randn(14), index=rng) - - with tm.assert_produces_warning(FutureWarning): - result = s.resample( - "5min", closed="right", label="right", loffset=timedelta(minutes=1) - ).ffill() - idx = date_range("1/1/2000", periods=4, freq="5min") - expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1)) - - tm.assert_series_equal(result, expected) - - -def test_resample_loffset_count(): - # GH 12725 - start_time = "1/1/2000 00:00:00" - rng = date_range(start_time, periods=100, freq="S") - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning): - result = ts.resample("10S", loffset="1s").count() - - expected_index = date_range(start_time, periods=10, freq="10S") + timedelta( - seconds=1 - ) - expected = Series(10, index=expected_index) - - tm.assert_series_equal(result, expected) - - # Same issue should apply to .size() since it goes through - # same code path - with tm.assert_produces_warning(FutureWarning): - result = ts.resample("10S", loffset="1s").size() - - tm.assert_series_equal(result, expected) - - -def test_resample_base(): - rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s") - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning): - resampled = ts.resample("5min", base=2).mean() - exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min") - tm.assert_index_equal(resampled.index, exp_rng) - - -def test_resample_float_base(): - # GH25161 - dt = pd.to_datetime( - ["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"] - ) - s = Series(np.arange(3), index=dt) - - base = 17 + 43.51 / 60 - with tm.assert_produces_warning(FutureWarning): - result = s.resample("3min", base=base).size() - expected = Series( - 3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"], freq="3min") - ) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize("kind", ["period", None, "timestamp"]) -@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]]) -def test_loffset_returns_datetimeindex(frame, kind, agg_arg): - # make sure passing loffset returns DatetimeIndex in all cases - # basic method taken from Base.test_resample_loffset_arg_type() - df = frame - expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] - expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D") - - # loffset coerces PeriodIndex to DateTimeIndex - expected_index = expected_index.to_timestamp() - expected_index += timedelta(hours=2) - expected = DataFrame({"value": expected_means}, index=expected_index) - - with tm.assert_produces_warning(FutureWarning): - result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) - if isinstance(agg_arg, list): - expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) - tm.assert_frame_equal(result_agg, expected) - - -@pytest.mark.parametrize( - "start,end,start_freq,end_freq,base,offset", - [ - ("19910905", "19910909 03:00", "H", "24H", 10, "10H"), - ("19910905", "19910909 12:00", "H", "24H", 10, "10H"), - ("19910905", "19910909 23:00", "H", "24H", 10, "10H"), - ("19910905 10:00", "19910909", "H", "24H", 10, "10H"), - ("19910905 10:00", "19910909 10:00", "H", "24H", 10, "10H"), - ("19910905", "19910909 10:00", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909 03:00", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909 12:00", "H", "24H", 10, "10H"), - ("19910905 12:00", "19910909 12:00", "H", "24H", 34, "34H"), - ("19910905 12:00", "19910909 12:00", "H", "17H", 10, "10H"), - ("19910905 12:00", "19910909 12:00", "H", "17H", 3, "3H"), - ("19910905 12:00", "19910909 1:00", "H", "M", 3, "3H"), - ("19910905", "19910913 06:00", "2H", "24H", 10, "10H"), - ("19910905", "19910905 01:39", "Min", "5Min", 3, "3Min"), - ("19910905", "19910905 03:18", "2Min", "5Min", 3, "3Min"), - ], -) -def test_resample_with_non_zero_base(start, end, start_freq, end_freq, base, offset): - # GH 23882 - s = Series(0, index=period_range(start, end, freq=start_freq)) - s = s + np.arange(len(s)) - with tm.assert_produces_warning(FutureWarning): - result = s.resample(end_freq, base=base).mean() - result = result.to_timestamp(end_freq) - - # test that the replacement argument 'offset' works - result_offset = s.resample(end_freq, offset=offset).mean() - result_offset = result_offset.to_timestamp(end_freq) - tm.assert_series_equal(result, result_offset) - - # to_timestamp casts 24H -> D - result = result.asfreq(end_freq) if end_freq == "24H" else result - with tm.assert_produces_warning(FutureWarning): - expected = s.to_timestamp().resample(end_freq, base=base).mean() - if end_freq == "M": - # TODO: is non-tick the relevant characteristic? (GH 33815) - expected.index = expected.index._with_freq(None) - tm.assert_series_equal(result, expected) - - -def test_resample_base_with_timedeltaindex(): - # GH 10530 - rng = timedelta_range(start="0s", periods=25, freq="s") - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning): - with_base = ts.resample("2s", base=5).mean() - without_base = ts.resample("2s").mean() - - exp_without_base = timedelta_range(start="0s", end="25s", freq="2s") - exp_with_base = timedelta_range(start="5s", end="29s", freq="2s") - - tm.assert_index_equal(without_base.index, exp_without_base) - tm.assert_index_equal(with_base.index, exp_with_base) - - -def test_interpolate_posargs_deprecation(): - # GH 41485 - idx = pd.to_datetime(["1992-08-27 07:46:48", "1992-08-27 07:46:59"]) - s = Series([1, 4], index=idx) - - msg = ( - r"In a future version of pandas all arguments of Resampler\.interpolate " - r"except for the argument 'method' will be keyword-only" - ) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.resample("3s").interpolate("linear", 0) - - idx = pd.to_datetime( - [ - "1992-08-27 07:46:48", - "1992-08-27 07:46:51", - "1992-08-27 07:46:54", - "1992-08-27 07:46:57", - ] - ) - expected = Series([1.0, 1.0, 1.0, 1.0], index=idx) - - expected.index._data.freq = "3s" - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 4da1f4c589c56..e32708c4402e4 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -27,7 +27,7 @@ ) from pandas.core.resample import _get_period_range_edges -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets @pytest.fixture() @@ -605,9 +605,11 @@ def test_resample_with_dst_time_change(self): def test_resample_bms_2752(self): # GH2753 - foo = Series(index=pd.bdate_range("20000101", "20000201"), dtype=np.float64) - res1 = foo.resample("BMS").mean() - res2 = foo.resample("BMS").mean().resample("B").mean() + timeseries = Series( + index=pd.bdate_range("20000101", "20000201"), dtype=np.float64 + ) + res1 = timeseries.resample("BMS").mean() + res2 = timeseries.resample("BMS").mean().resample("B").mean() assert res1.index[0] == Timestamp("20000103") assert res1.index[0] == res2.index[0] diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index c5cd777962df3..53d416a74cac2 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -407,14 +407,14 @@ def test_agg(): expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: # In case 2, "date" is an index and a column, so agg still tries to agg - warn = FutureWarning if t == cases[2] else None - with tm.assert_produces_warning( - warn, - match=r"\['date'\] did not aggregate successfully", - ): - # .var on dt64 column raises and is dropped + if t == cases[2]: + # .var on dt64 column raises + msg = "Cannot cast DatetimeArray to dtype float64" + with pytest.raises(TypeError, match=msg): + t.aggregate([np.mean, np.std]) + else: result = t.aggregate([np.mean, np.std]) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = pd.concat([a_mean, b_std], axis=1) for t in cases: diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 4498f11d77313..7ec3c81de235c 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -321,29 +321,27 @@ def test_groupby_resample_interpolate(): .interpolate(method="linear") ) - msg = "containing strings is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected_ind = pd.MultiIndex.from_tuples( - [ - (50, "2018-01-07"), - (50, Timestamp("2018-01-08")), - (50, Timestamp("2018-01-09")), - (50, Timestamp("2018-01-10")), - (50, Timestamp("2018-01-11")), - (50, Timestamp("2018-01-12")), - (50, Timestamp("2018-01-13")), - (50, Timestamp("2018-01-14")), - (50, Timestamp("2018-01-15")), - (50, Timestamp("2018-01-16")), - (50, Timestamp("2018-01-17")), - (50, Timestamp("2018-01-18")), - (50, Timestamp("2018-01-19")), - (50, Timestamp("2018-01-20")), - (50, Timestamp("2018-01-21")), - (60, Timestamp("2018-01-14")), - ], - names=["volume", "week_starting"], - ) + expected_ind = pd.MultiIndex.from_tuples( + [ + (50, Timestamp("2018-01-07")), + (50, Timestamp("2018-01-08")), + (50, Timestamp("2018-01-09")), + (50, Timestamp("2018-01-10")), + (50, Timestamp("2018-01-11")), + (50, Timestamp("2018-01-12")), + (50, Timestamp("2018-01-13")), + (50, Timestamp("2018-01-14")), + (50, Timestamp("2018-01-15")), + (50, Timestamp("2018-01-16")), + (50, Timestamp("2018-01-17")), + (50, Timestamp("2018-01-18")), + (50, Timestamp("2018-01-19")), + (50, Timestamp("2018-01-20")), + (50, Timestamp("2018-01-21")), + (60, Timestamp("2018-01-14")), + ], + names=["volume", "week_starting"], + ) expected = DataFrame( data={ diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 277496d776cb2..ea526c95f20e0 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -30,7 +30,6 @@ ) import pandas._testing as tm from pandas.core.arrays import SparseArray -from pandas.core.construction import create_series_with_explicit_dtype from pandas.tests.extension.decimal import to_decimal @@ -519,7 +518,7 @@ def test_concat_no_unnecessary_upcast(dt, frame_or_series): assert x.values.dtype == dt -@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame]) +@pytest.mark.parametrize("pdt", [Series, DataFrame]) @pytest.mark.parametrize("dt", np.sctypes["int"]) def test_concat_will_upcast(dt, pdt): with catch_warnings(record=True): @@ -697,21 +696,6 @@ def test_concat_multiindex_with_empty_rangeindex(): tm.assert_frame_equal(result, expected) -def test_concat_posargs_deprecation(): - # https://github.com/pandas-dev/pandas/issues/41485 - df = DataFrame([[1, 2, 3]], index=["a"]) - df2 = DataFrame([[4, 5, 6]], index=["b"]) - - msg = ( - "In a future version of pandas all arguments of concat " - "except for the argument 'objs' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = concat([df, df2], 0) - expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "data", [ diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 1af54a1d5cf4a..93d212d0a581d 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -403,7 +403,6 @@ def test_concat_multiple_tzs(self): expected = DataFrame({"time": [ts2, ts3]}) tm.assert_frame_equal(results, expected) - @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") def test_concat_multiindex_with_tz(self): # GH 6606 df = DataFrame( diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 541a34bde8143..18c0645df1ceb 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -16,11 +16,11 @@ class TestEmptyConcat: def test_handle_empty_objects(self, sort): df = DataFrame(np.random.randn(10, 4), columns=list("abcd")) - baz = df[:5].copy() - baz["foo"] = "bar" + dfcopy = df[:5].copy() + dfcopy["foo"] = "bar" empty = df[5:5] - frames = [baz, empty, empty, df[5:]] + frames = [dfcopy, empty, empty, df[5:]] concatted = concat(frames, axis=0, sort=sort) expected = df.reindex(columns=["a", "b", "c", "d", "foo"]) diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index 8fa5988720c6b..886ada409a91a 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -120,24 +120,30 @@ def test_concat_series_name_npscalar_tuple(self, s1name, s2name): def test_concat_series_partial_columns_names(self): # GH10698 - foo = Series([1, 2], name="foo") - bar = Series([1, 2]) - baz = Series([4, 5]) + named_series = Series([1, 2], name="foo") + unnamed_series1 = Series([1, 2]) + unnamed_series2 = Series([4, 5]) - result = concat([foo, bar, baz], axis=1) + result = concat([named_series, unnamed_series1, unnamed_series2], axis=1) expected = DataFrame( {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1] ) tm.assert_frame_equal(result, expected) - result = concat([foo, bar, baz], axis=1, keys=["red", "blue", "yellow"]) + result = concat( + [named_series, unnamed_series1, unnamed_series2], + axis=1, + keys=["red", "blue", "yellow"], + ) expected = DataFrame( {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]}, columns=["red", "blue", "yellow"], ) tm.assert_frame_equal(result, expected) - result = concat([foo, bar, baz], axis=1, ignore_index=True) + result = concat( + [named_series, unnamed_series1, unnamed_series2], axis=1, ignore_index=True + ) expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_sort.py b/pandas/tests/reshape/concat/test_sort.py index e83880625f3d6..2724f81958893 100644 --- a/pandas/tests/reshape/concat/test_sort.py +++ b/pandas/tests/reshape/concat/test_sort.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas import DataFrame @@ -109,8 +110,9 @@ def test_concat_frame_with_sort_false(self): ) tm.assert_frame_equal(result, expected) - def test_concat_sort_none_warning(self): + def test_concat_sort_none_raises(self): # GH#41518 df = DataFrame({1: [1, 2], "a": [3, 4]}) - with tm.assert_produces_warning(FutureWarning, match="sort"): + msg = "The 'sort' keyword only accepts boolean values; None was passed." + with pytest.raises(ValueError, match=msg): pd.concat([df, df], sort=None) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 23d7c91ceefae..dd2c59ec161e7 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -420,18 +420,18 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): # _assert_same_contents(expected, expected2.loc[:, expected.columns]) - def test_join_hierarchical_mixed(self): + def test_join_hierarchical_mixed_raises(self): # GH 2024 + # GH 40993: For raising, enforced in 2.0 df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) other_df.set_index("a", inplace=True) # GH 9455, 12219 - msg = "merging between different levels is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = merge(new_df, other_df, left_index=True, right_index=True) - assert ("b", "mean") in result - assert "b" in result + with pytest.raises( + pd.errors.MergeError, match="Not allowed to merge between different levels" + ): + merge(new_df, other_df, left_index=True, right_index=True) def test_join_float64_float32(self): @@ -642,11 +642,12 @@ def test_join_dups(self): dta = x.merge(y, left_index=True, right_index=True).merge( z, left_index=True, right_index=True, how="outer" ) - with tm.assert_produces_warning(FutureWarning): - dta = dta.merge(w, left_index=True, right_index=True) - expected = concat([x, y, z, w], axis=1) - expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"] - tm.assert_frame_equal(dta, expected) + # GH 40991: As of 2.0 causes duplicate columns + with pytest.raises( + pd.errors.MergeError, + match="Passing 'suffixes' which cause duplicate columns", + ): + dta.merge(w, left_index=True, right_index=True) def test_join_multi_to_multi(self, join_type): # GH 20475 diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index edfae3ad9dac6..e4638c43e5a66 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -734,8 +734,6 @@ def test_other_datetime_unit(self, unit): if unit in ["D", "h", "m"]: # not supported so we cast to the nearest supported unit, seconds - # TODO(2.0): cast to nearest (second) instead of ns - # coerces to datetime64[ns], thus should not be affected exp_dtype = "datetime64[s]" else: exp_dtype = dtype @@ -1255,7 +1253,18 @@ def test_validation(self): merge(left_w_dups, right_w_dups, on="a", validate="one_to_many") # Check invalid arguments - msg = "Not a valid argument for validate" + msg = ( + '"jibberish" is not a valid argument. ' + "Valid arguments are:\n" + '- "1:1"\n' + '- "1:m"\n' + '- "m:1"\n' + '- "m:m"\n' + '- "one_to_one"\n' + '- "one_to_many"\n' + '- "many_to_one"\n' + '- "many_to_many"' + ) with pytest.raises(ValueError, match=msg): merge(left, right, on="a", validate="jibberish") @@ -2207,6 +2216,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): def test_merge_series_multilevel(): # GH#47946 + # GH 40993: For raising, enforced in 2.0 a = DataFrame( {"A": [1, 2, 3, 4]}, index=MultiIndex.from_product([["a", "b"], [0, 1]], names=["outer", "inner"]), @@ -2216,13 +2226,10 @@ def test_merge_series_multilevel(): index=MultiIndex.from_product([["a", "b"], [1, 2]], names=["outer", "inner"]), name=("B", "C"), ) - expected = DataFrame( - {"A": [2, 4], ("B", "C"): [1, 3]}, - index=MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]), - ) - with tm.assert_produces_warning(FutureWarning): - result = merge(a, b, on=["outer", "inner"]) - tm.assert_frame_equal(result, expected) + with pytest.raises( + MergeError, match="Not allowed to merge between different levels" + ): + merge(a, b, on=["outer", "inner"]) @pytest.mark.parametrize( @@ -2303,12 +2310,12 @@ def test_merge_suffix_error(col1, col2, suffixes): @pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}]) -def test_merge_suffix_warns(suffixes): +def test_merge_suffix_raises(suffixes): a = DataFrame({"a": [1, 2, 3]}) b = DataFrame({"b": [3, 4, 5]}) - with tm.assert_produces_warning(FutureWarning): - merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"}) + with pytest.raises(TypeError, match="Passing 'suffixes' as a"): + merge(a, b, left_index=True, right_index=True, suffixes=suffixes) @pytest.mark.parametrize( @@ -2609,20 +2616,16 @@ def test_merge_result_empty_index_and_on(): tm.assert_frame_equal(result, expected) -def test_merge_suffixes_produce_dup_columns_warns(): - # GH#22818 +def test_merge_suffixes_produce_dup_columns_raises(): + # GH#22818; Enforced in 2.0 left = DataFrame({"a": [1, 2, 3], "b": 1, "b_x": 2}) right = DataFrame({"a": [1, 2, 3], "b": 2}) - expected = DataFrame( - [[1, 1, 2, 2], [2, 1, 2, 2], [3, 1, 2, 2]], columns=["a", "b_x", "b_x", "b_y"] - ) - with tm.assert_produces_warning(FutureWarning): - result = merge(left, right, on="a") - tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(MergeError, match="Passing 'suffixes' which cause duplicate"): + merge(left, right, on="a") + + with pytest.raises(MergeError, match="Passing 'suffixes' which cause duplicate"): merge(right, left, on="a", suffixes=("_y", "_x")) - tm.assert_frame_equal(result, expected) def test_merge_duplicate_columns_with_suffix_no_warning(): @@ -2635,15 +2638,13 @@ def test_merge_duplicate_columns_with_suffix_no_warning(): tm.assert_frame_equal(result, expected) -def test_merge_duplicate_columns_with_suffix_causing_another_duplicate(): - # GH#22818 +def test_merge_duplicate_columns_with_suffix_causing_another_duplicate_raises(): + # GH#22818, Enforced in 2.0 # This should raise warning because suffixes cause another collision left = DataFrame([[1, 1, 1, 1], [2, 2, 2, 2]], columns=["a", "b", "b", "b_x"]) right = DataFrame({"a": [1, 3], "b": 2}) - with tm.assert_produces_warning(FutureWarning): - result = merge(left, right, on="a") - expected = DataFrame([[1, 1, 1, 1, 2]], columns=["a", "b_x", "b_x", "b_x", "b_y"]) - tm.assert_frame_equal(result, expected) + with pytest.raises(MergeError, match="Passing 'suffixes' which cause duplicate"): + merge(left, right, on="a") def test_merge_string_float_column_result(): diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 2013b3484ebff..fe88b7f9caa02 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -1073,19 +1075,16 @@ def test_col_substring_of_stubname(self): result = wide_to_long(wide_df, stubnames="PA", i=["node_id", "A"], j="time") tm.assert_frame_equal(result, expected) - def test_warn_of_column_name_value(self): - # GH34731 - # raise a warning if the resultant value column name matches + def test_raise_of_column_name_value(self): + # GH34731, enforced in 2.0 + # raise a ValueError if the resultant value column name matches # a name in the dataframe already (default name is "value") df = DataFrame({"col": list("ABC"), "value": range(10, 16, 2)}) - expected = DataFrame( - [["A", "col", "A"], ["B", "col", "B"], ["C", "col", "C"]], - columns=["value", "variable", "value"], - ) - with tm.assert_produces_warning(FutureWarning): - result = df.melt(id_vars="value") - tm.assert_frame_equal(result, expected) + with pytest.raises( + ValueError, match=re.escape("value_name (value) cannot match") + ): + df.melt(id_vars="value", value_name="value") @pytest.mark.parametrize("dtype", ["O", "string"]) def test_missing_stubname(self, dtype): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8c2c1026d5c82..14ea670fa6cf9 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -481,11 +481,9 @@ def test_pivot_index_with_nan(self, method): } ) if method: - with tm.assert_produces_warning(FutureWarning): - result = df.pivot("a", columns="b", values="c") + result = df.pivot(index="a", columns="b", values="c") else: - with tm.assert_produces_warning(FutureWarning): - result = pd.pivot(df, "a", columns="b", values="c") + result = pd.pivot(df, index="a", columns="b", values="c") expected = DataFrame( [ [nan, nan, 17, nan], @@ -527,7 +525,6 @@ def test_pivot_index_with_nan_dates(self, method): result = pd.pivot(df, index="b", columns="a", values="c") tm.assert_frame_equal(result, pv.T) - @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") @pytest.mark.parametrize("method", [True, False]) def test_pivot_with_tz(self, method): # GH 5878 @@ -1699,7 +1696,7 @@ def test_pivot_table_with_iterator_values(self, data): ) tm.assert_frame_equal(pivot_values_keys, pivot_values_list) - agg_values_gen = (value for value in aggs.keys()) + agg_values_gen = (value for value in aggs) pivot_values_gen = pivot_table( data, index=["A"], values=agg_values_gen, aggfunc=aggs ) @@ -2088,8 +2085,9 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna): tm.assert_frame_equal(result, expected) - def test_pivot_table_empty_aggfunc(self): - # GH 9186 & GH 13483 + @pytest.mark.parametrize("margins", [True, False]) + def test_pivot_table_empty_aggfunc(self, margins): + # GH 9186 & GH 13483 & GH 49240 df = DataFrame( { "A": [2, 2, 3, 3, 2], @@ -2098,7 +2096,9 @@ def test_pivot_table_empty_aggfunc(self): "D": [None, None, None, None, None], } ) - result = df.pivot_table(index="A", columns="D", values="id", aggfunc=np.size) + result = df.pivot_table( + index="A", columns="D", values="id", aggfunc=np.size, margins=margins + ) expected = DataFrame(index=Index([], dtype="int64", name="A")) expected.columns.name = "D" tm.assert_frame_equal(result, expected) @@ -2108,9 +2108,9 @@ def test_pivot_table_no_column_raises(self): def agg(arr): return np.mean(arr) - foo = DataFrame({"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]}) + df = DataFrame({"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]}) with pytest.raises(KeyError, match="notpresent"): - foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) + df.pivot_table("notpresent", "X", "Y", aggfunc=agg) def test_pivot_table_multiindex_columns_doctest_case(self): # The relevant characteristic is that the call diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 1fd5f5ab7c2a6..3b39a2f877c8d 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -73,9 +73,6 @@ def test_nat_vector_field_access(): # on NaT/Timestamp for compat with datetime if field == "weekday": continue - if field in ["week", "weekofyear"]: - # GH#33595 Deprecate week and weekofyear - continue result = getattr(idx, field) expected = Index([getattr(x, field) for x in idx]) @@ -88,9 +85,6 @@ def test_nat_vector_field_access(): # on NaT/Timestamp for compat with datetime if field == "weekday": continue - if field in ["week", "weekofyear"]: - # GH#33595 Deprecate week and weekofyear - continue result = getattr(ser.dt, field) expected = [getattr(x, field) for x in idx] @@ -190,7 +184,7 @@ def test_nat_iso_format(get_nat): @pytest.mark.parametrize( "klass,expected", [ - (Timestamp, ["freqstr", "normalize", "to_julian_date", "to_period"]), + (Timestamp, ["normalize", "to_julian_date", "to_period"]), ( Timedelta, [ diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 4283575a67f4c..17fee1ff3f949 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -158,11 +158,7 @@ def test_addition_subtraction_types(self): # objects dt = datetime(2014, 3, 4) td = timedelta(seconds=1) - # build a timestamp with a frequency, since then it supports - # addition/subtraction of integers - with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): - # freq deprecated - ts = Timestamp(dt, freq="D") + ts = Timestamp(dt) msg = "Addition/subtraction of integers" with pytest.raises(TypeError, match=msg): @@ -183,34 +179,6 @@ def test_addition_subtraction_types(self): assert type(ts + td64) == Timestamp assert type(ts - td64) == Timestamp - @pytest.mark.parametrize( - "freq, td, td64", - [ - ("S", timedelta(seconds=1), np.timedelta64(1, "s")), - ("min", timedelta(minutes=1), np.timedelta64(1, "m")), - ("H", timedelta(hours=1), np.timedelta64(1, "h")), - ("D", timedelta(days=1), np.timedelta64(1, "D")), - ("W", timedelta(weeks=1), np.timedelta64(1, "W")), - ("M", None, np.timedelta64(1, "M")), - ], - ) - @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") - @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning") - def test_addition_subtraction_preserve_frequency(self, freq, td, td64): - ts = Timestamp("2014-03-05 00:00:00", freq=freq) - original_freq = ts.freq - - assert (ts + 1 * original_freq).freq == original_freq - assert (ts - 1 * original_freq).freq == original_freq - - if td is not None: - # timedelta does not support months as unit - assert (ts + td).freq == original_freq - assert (ts - td).freq == original_freq - - assert (ts + td64).freq == original_freq - assert (ts - td64).freq == original_freq - @pytest.mark.parametrize( "td", [Timedelta(hours=3), np.timedelta64(3, "h"), timedelta(hours=3)] ) diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 2c9b029bf109e..ad629604d1bc9 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -156,36 +156,22 @@ def test_compare_date(self, tz): # GH#36131 comparing Timestamp with date object is deprecated ts = Timestamp("2021-01-01 00:00:00.00000", tz=tz) dt = ts.to_pydatetime().date() - # These are incorrectly considered as equal because they - # dispatch to the date comparisons which truncates ts + # in 2.0 we disallow comparing pydate objects with Timestamps, + # following the stdlib datetime behavior. + msg = "Cannot compare Timestamp with datetime.date" for left, right in [(ts, dt), (dt, ts)]: - with tm.assert_produces_warning(FutureWarning): - assert left == right - with tm.assert_produces_warning(FutureWarning): - assert not left != right - with tm.assert_produces_warning(FutureWarning): - assert not left < right - with tm.assert_produces_warning(FutureWarning): - assert left <= right - with tm.assert_produces_warning(FutureWarning): - assert not left > right - with tm.assert_produces_warning(FutureWarning): - assert left >= right - - # Once the deprecation is enforced, the following assertions - # can be enabled: - # assert not left == right - # assert left != right - # - # with pytest.raises(TypeError): - # left < right - # with pytest.raises(TypeError): - # left <= right - # with pytest.raises(TypeError): - # left > right - # with pytest.raises(TypeError): - # left >= right + assert not left == right + assert left != right + + with pytest.raises(TypeError, match=msg): + left < right + with pytest.raises(TypeError, match=msg): + left <= right + with pytest.raises(TypeError, match=msg): + left > right + with pytest.raises(TypeError, match=msg): + left >= right def test_cant_compare_tz_naive_w_aware(self, utc_fixture): # see GH#1404 diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 341e850a7464e..9c3fa0f64153a 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -20,9 +20,6 @@ Timedelta, Timestamp, ) -import pandas._testing as tm - -from pandas.tseries import offsets class TestTimestampConstructors: @@ -54,18 +51,13 @@ def test_constructor_datetime64_with_tz(self): dt = np.datetime64("1970-01-01 05:00:00") tzstr = "UTC+05:00" - msg = "interpreted as a wall time" - with tm.assert_produces_warning(FutureWarning, match=msg): - ts = Timestamp(dt, tz=tzstr) + # pre-2.0 this interpreted dt as a UTC time. in 2.0 this is treated + # as a wall-time, consistent with DatetimeIndex behavior + ts = Timestamp(dt, tz=tzstr) - # Check that we match the old behavior - alt = Timestamp(dt).tz_localize("UTC").tz_convert(tzstr) + alt = Timestamp(dt).tz_localize(tzstr) assert ts == alt - - # Check that we *don't* match the future behavior - assert ts.hour != 5 - expected_future = Timestamp(dt).tz_localize(tzstr) - assert ts != expected_future + assert ts.hour == 5 def test_constructor(self): base_str = "2014-07-01 09:00" @@ -237,14 +229,12 @@ def test_constructor_invalid_tz(self): with pytest.raises(ValueError, match=msg): Timestamp("2017-10-22", tzinfo=pytz.utc, tz="UTC") - msg = "Invalid frequency:" - msg2 = "The 'freq' argument" + msg = "Cannot pass a date attribute keyword argument when passing a date string" with pytest.raises(ValueError, match=msg): # GH#5168 # case where user tries to pass tz as an arg, not kwarg, gets - # interpreted as a `freq` - with tm.assert_produces_warning(FutureWarning, match=msg2): - Timestamp("2012-01-01", "US/Pacific") + # interpreted as `year` + Timestamp("2012-01-01", "US/Pacific") def test_constructor_strptime(self): # GH25016 @@ -273,11 +263,15 @@ def test_constructor_positional_with_tzinfo(self): expected = Timestamp("2020-12-31", tzinfo=timezone.utc) assert ts == expected - @pytest.mark.xfail(reason="GH#45307") @pytest.mark.parametrize("kwd", ["nanosecond", "microsecond", "second", "minute"]) - def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd): + def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request): # TODO: if we passed microsecond with a keyword we would mess up # xref GH#45307 + if kwd != "nanosecond": + # nanosecond is keyword-only as of 2.0, others are not + mark = pytest.mark.xfail(reason="GH#45307") + request.node.add_marker(mark) + kwargs = {kwd: 4} ts = Timestamp(2020, 12, 31, tzinfo=timezone.utc, **kwargs) @@ -348,14 +342,11 @@ def test_constructor_keyword(self): ) ) == repr(Timestamp("2015-11-12 01:02:03.999999")) - @pytest.mark.filterwarnings("ignore:Timestamp.freq is:FutureWarning") - @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning") def test_constructor_fromordinal(self): base = datetime(2000, 1, 1) - ts = Timestamp.fromordinal(base.toordinal(), freq="D") + ts = Timestamp.fromordinal(base.toordinal()) assert base == ts - assert ts.freq == "D" assert base.toordinal() == ts.toordinal() ts = Timestamp.fromordinal(base.toordinal(), tz="US/Eastern") @@ -399,9 +390,7 @@ def test_constructor_fromordinal(self): tz="UTC", ), Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, None), - # error: Argument 9 to "Timestamp" has incompatible type "_UTCclass"; - # expected "Optional[int]" - Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC), # type: ignore[arg-type] + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC), ], ) def test_constructor_nanosecond(self, result): @@ -609,21 +598,6 @@ def test_construct_with_different_string_format(self, arg): expected = Timestamp(datetime(2013, 1, 1), tz=pytz.FixedOffset(540)) assert result == expected - def test_construct_timestamp_preserve_original_frequency(self): - # GH 22311 - with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): - result = Timestamp(Timestamp("2010-08-08", freq="D")).freq - expected = offsets.Day() - assert result == expected - - def test_constructor_invalid_frequency(self): - # GH 22311 - msg = "Invalid frequency:" - msg2 = "The 'freq' argument" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): - Timestamp("2012-01-01", freq=[]) - @pytest.mark.parametrize("box", [datetime, Timestamp]) def test_raise_tz_and_tzinfo_in_datetime_input(self, box): # GH 23579 diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py index 2f88f96b6bbea..3998142e568fe 100644 --- a/pandas/tests/scalar/timestamp/test_rendering.py +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -4,7 +4,6 @@ import pytz # noqa # a test below uses pytz but only inside a `eval` call from pandas import Timestamp -import pandas._testing as tm class TestTimestampRendering: @@ -36,27 +35,6 @@ def test_repr(self, date, freq, tz): assert freq_repr not in repr(date_tz) assert date_tz == eval(repr(date_tz)) - msg = "The 'freq' argument in Timestamp" - with tm.assert_produces_warning(FutureWarning, match=msg): - date_freq = Timestamp(date, freq=freq) - assert date in repr(date_freq) - assert tz_repr not in repr(date_freq) - assert freq_repr in repr(date_freq) - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - assert date_freq == eval(repr(date_freq)) - - with tm.assert_produces_warning(FutureWarning, match=msg): - date_tz_freq = Timestamp(date, tz=tz, freq=freq) - assert date in repr(date_tz_freq) - assert tz_repr in repr(date_tz_freq) - assert freq_repr in repr(date_tz_freq) - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - assert date_tz_freq == eval(repr(date_tz_freq)) - def test_repr_utcoffset(self): # This can cause the tz field to be populated, but it's redundant to # include this information in the date-string. diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 4f8c6fcc57186..a9a7a44f54dee 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -6,7 +6,6 @@ timedelta, ) import locale -import pickle import unicodedata from dateutil.tz import tzutc @@ -36,64 +35,31 @@ import pandas._testing as tm from pandas.tseries import offsets +from pandas.tseries.frequencies import to_offset class TestTimestampProperties: - def test_freq_deprecation(self): - # GH#41586 - msg = "The 'freq' argument in Timestamp is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - # warning issued at construction - ts = Timestamp("2021-06-01", freq="D") - ts2 = Timestamp("2021-06-01", freq="B") - - msg = "Timestamp.freq is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - # warning issued at attribute lookup - ts.freq - - for per in ["month", "quarter", "year"]: - for side in ["start", "end"]: - attr = f"is_{per}_{side}" - - with tm.assert_produces_warning(FutureWarning, match=msg): - getattr(ts2, attr) - - # is_(month|quarter|year)_(start|end) does _not_ issue a warning - # with freq="D" bc the result will be unaffected by the deprecation - with tm.assert_produces_warning(None): - getattr(ts, attr) - - @pytest.mark.filterwarnings("ignore:The 'freq' argument:FutureWarning") - @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") def test_properties_business(self): - ts = Timestamp("2017-10-01", freq="B") - control = Timestamp("2017-10-01") + freq = to_offset("B") + + ts = Timestamp("2017-10-01") assert ts.dayofweek == 6 assert ts.day_of_week == 6 - assert not ts.is_month_start # not a weekday - assert not ts.freq.is_month_start(ts) - assert ts.freq.is_month_start(ts + Timedelta(days=1)) - assert not ts.is_quarter_start # not a weekday - assert not ts.freq.is_quarter_start(ts) - assert ts.freq.is_quarter_start(ts + Timedelta(days=1)) - # Control case: non-business is month/qtr start - assert control.is_month_start - assert control.is_quarter_start - - ts = Timestamp("2017-09-30", freq="B") - control = Timestamp("2017-09-30") + assert ts.is_month_start # not a weekday + assert not freq.is_month_start(ts) + assert freq.is_month_start(ts + Timedelta(days=1)) + assert not freq.is_quarter_start(ts) + assert freq.is_quarter_start(ts + Timedelta(days=1)) + + ts = Timestamp("2017-09-30") assert ts.dayofweek == 5 assert ts.day_of_week == 5 - assert not ts.is_month_end # not a weekday - assert not ts.freq.is_month_end(ts) - assert ts.freq.is_month_end(ts - Timedelta(days=1)) - assert not ts.is_quarter_end # not a weekday - assert not ts.freq.is_quarter_end(ts) - assert ts.freq.is_quarter_end(ts - Timedelta(days=1)) - # Control case: non-business is month/qtr start - assert control.is_month_end - assert control.is_quarter_end + assert ts.is_month_end + assert not freq.is_month_end(ts) + assert freq.is_month_end(ts - Timedelta(days=1)) + assert ts.is_quarter_end + assert not freq.is_quarter_end(ts) + assert freq.is_quarter_end(ts - Timedelta(days=1)) @pytest.mark.parametrize( "attr, expected", @@ -476,26 +442,6 @@ def test_hash_timestamp_with_fold(self, timezone, year, month, day, hour): ) assert hash(transition_1) == hash(transition_2) - def test_tz_conversion_freq(self, tz_naive_fixture): - # GH25241 - with tm.assert_produces_warning(FutureWarning, match="freq"): - t1 = Timestamp("2019-01-01 10:00", freq="H") - assert t1.tz_localize(tz=tz_naive_fixture).freq == t1.freq - with tm.assert_produces_warning(FutureWarning, match="freq"): - t2 = Timestamp("2019-01-02 12:00", tz="UTC", freq="T") - assert t2.tz_convert(tz="UTC").freq == t2.freq - - def test_pickle_freq_no_warning(self): - # GH#41949 we don't want a warning on unpickling - with tm.assert_produces_warning(FutureWarning, match="freq"): - ts = Timestamp("2019-01-01 10:00", freq="H") - - out = pickle.dumps(ts) - with tm.assert_produces_warning(None): - res = pickle.loads(out) - - assert res._freq == ts._freq - class TestTimestampNsOperations: def test_nanosecond_string_parsing(self): @@ -749,18 +695,13 @@ def test_start_end_fields(self, ts): assert not ts.is_month_end assert not ts.is_month_end - freq = offsets.BDay() - ts._set_freq(freq) - # 2016-01-01 is a Friday, so is year/quarter/month start with this freq - msg = "Timestamp.freq is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - assert ts.is_year_start - assert ts.is_quarter_start - assert ts.is_month_start - assert not ts.is_year_end - assert not ts.is_month_end - assert not ts.is_month_end + assert ts.is_year_start + assert ts.is_quarter_start + assert ts.is_month_start + assert not ts.is_year_end + assert not ts.is_month_end + assert not ts.is_month_end def test_day_name(self, dt64, ts): alt = Timestamp(dt64) diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index 750e84b8cde08..9266afc89fa19 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -46,13 +46,6 @@ def test_cat_accessor(self): exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # issue #37643 inplace kwarg deprecated - return_value = ser.cat.set_categories(["b", "a"], inplace=True) - - assert return_value is None - tm.assert_categorical_equal(ser.values, exp) - res = ser.cat.set_categories(["b", "a"]) tm.assert_categorical_equal(res.values, exp) @@ -78,17 +71,6 @@ def test_cat_accessor_no_new_attributes(self): with pytest.raises(AttributeError, match="You cannot add any new attribute"): cat.cat.xlabel = "a" - def test_cat_accessor_updates_on_inplace(self): - ser = Series(list("abc")).astype("category") - return_value = ser.drop(0, inplace=True) - assert return_value is None - - with tm.assert_produces_warning(FutureWarning): - return_value = ser.cat.remove_unused_categories(inplace=True) - - assert return_value is None - assert len(ser.cat.categories) == 2 - def test_categorical_delegations(self): # invalid accessor @@ -110,8 +92,7 @@ def test_categorical_delegations(self): ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["a", "b", "c"]) tm.assert_index_equal(ser.cat.categories, exp_categories) - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - ser.cat.categories = [1, 2, 3] + ser = ser.cat.rename_categories([1, 2, 3]) exp_categories = Index([1, 2, 3]) tm.assert_index_equal(ser.cat.categories, exp_categories) @@ -121,9 +102,8 @@ def test_categorical_delegations(self): assert ser.cat.ordered ser = ser.cat.as_unordered() assert not ser.cat.ordered - with tm.assert_produces_warning(FutureWarning, match="The `inplace`"): - return_value = ser.cat.as_ordered(inplace=True) - assert return_value is None + + ser = ser.cat.as_ordered() assert ser.cat.ordered # reorder @@ -229,9 +209,6 @@ def test_dt_accessor_api_for_categorical(self, idx): tm.assert_equal(res, exp) for attr in attr_names: - if attr in ["week", "weekofyear"]: - # GH#33595 Deprecate week and weekofyear - continue res = getattr(cat.dt, attr) exp = getattr(ser.dt, attr) @@ -245,34 +222,13 @@ def test_dt_accessor_api_for_categorical_invalid(self): invalid.dt assert not hasattr(invalid, "str") - def test_reorder_categories_updates_dtype(self): - # GH#43232 - ser = Series(["a", "b", "c"], dtype="category") - orig_dtype = ser.dtype - - # Need to construct this before calling reorder_categories inplace - expected = ser.cat.reorder_categories(["c", "b", "a"]) - - with tm.assert_produces_warning(FutureWarning, match="`inplace` parameter"): - ser.cat.reorder_categories(["c", "b", "a"], inplace=True) - - assert not orig_dtype.categories.equals(ser.dtype.categories) - assert not orig_dtype.categories.equals(expected.dtype.categories) - assert ser.dtype == expected.dtype - assert ser.dtype.categories.equals(expected.dtype.categories) - - tm.assert_series_equal(ser, expected) - def test_set_categories_setitem(self): # GH#43334 df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category") - # change the dtype in-place - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - df["Survived"].cat.categories = ["No", "Yes"] - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - df["Sex"].cat.categories = ["female", "male"] + df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"]) + df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"]) # values should not be coerced to NaN assert list(df["Sex"]) == ["female", "male", "male"] diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 47e59be907929..ccd79d5cc58f4 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -107,8 +107,7 @@ def test_dt_namespace_accessor_datetime64(self, freq): for prop in ok_for_dt: # we test freq below - # we ignore week and weekofyear because they are deprecated - if prop not in ["freq", "week", "weekofyear"]: + if prop != "freq": self._compare(ser, prop) for prop in ok_for_dt_methods: @@ -146,8 +145,7 @@ def test_dt_namespace_accessor_datetime64tz(self): for prop in ok_for_dt: # we test freq below - # we ignore week and weekofyear because they are deprecated - if prop not in ["freq", "week", "weekofyear"]: + if prop != "freq": self._compare(ser, prop) for prop in ok_for_dt_methods: @@ -794,15 +792,6 @@ def test_to_period(self, input_vals): tm.assert_series_equal(result, expected) -def test_week_and_weekofyear_are_deprecated(): - # GH#33595 Deprecate week and weekofyear - series = pd.to_datetime(Series(["2020-01-01"])) - with tm.assert_produces_warning(FutureWarning): - series.dt.week - with tm.assert_produces_warning(FutureWarning): - series.dt.weekofyear - - def test_normalize_pre_epoch_dates(): # GH: 36294 ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"])) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index b8291471225d7..a60456f14242d 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -426,10 +426,11 @@ def test_indexing(): expected.name = "A" df = DataFrame({"A": ts}) - with tm.assert_produces_warning(FutureWarning): - # GH#36179 string indexing on rows for DataFrame deprecated - result = df["2001"]["A"] - tm.assert_series_equal(expected, result) + + # GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves + # like any other key, so raises + with pytest.raises(KeyError, match="2001"): + df["2001"] # setting ts["2001"] = 1 @@ -438,10 +439,8 @@ def test_indexing(): df.loc["2001", "A"] = 1 - with tm.assert_produces_warning(FutureWarning): - # GH#36179 string indexing on rows for DataFrame deprecated - result = df["2001"]["A"] - tm.assert_series_equal(expected, result) + with pytest.raises(KeyError, match="2001"): + df["2001"] def test_getitem_str_month_with_datetimeindex(): @@ -479,6 +478,6 @@ def test_getitem_str_second_with_datetimeindex(): with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"): df["2012-01-02 18:01:02"] - msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" + msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)" with pytest.raises(KeyError, match=msg): df[df.index[2]] diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 993c056045ae0..faaa61e84a351 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -269,28 +269,22 @@ def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self): def test_getitem_slice_2d(self, datetime_series): # GH#30588 multi-dimensional indexing deprecated - - with tm.assert_produces_warning( - FutureWarning, match="Support for multi-dimensional indexing" - ): - # GH#30867 Don't want to support this long-term, but - # for now ensure that the warning from Index - # doesn't comes through via Series.__getitem__. - result = datetime_series[:, np.newaxis] - expected = datetime_series.values[:, np.newaxis] - tm.assert_almost_equal(result, expected) + with pytest.raises(ValueError, match="Multi-dimensional indexing"): + datetime_series[:, np.newaxis] # FutureWarning from NumPy. - @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_getitem_median_slice_bug(self): index = date_range("20090415", "20090519", freq="2B") - s = Series(np.random.randn(13), index=index) + ser = Series(np.random.randn(13), index=index) indexer = [slice(6, 7, None)] - with tm.assert_produces_warning(FutureWarning): + msg = "Indexing with a single-item list" + with pytest.raises(ValueError, match=msg): # GH#31299 - result = s[indexer] - expected = s[indexer[0]] + ser[indexer] + # but we're OK with a single-element tuple + result = ser[(indexer[0],)] + expected = ser[indexer[0]] tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -555,14 +549,8 @@ def test_getitem_generator(string_series): ], ) def test_getitem_ndim_deprecated(series): - with tm.assert_produces_warning( - FutureWarning, - match="Support for multi-dimensional indexing", - ): - result = series[:, None] - - expected = np.asarray(series)[:, None] - tm.assert_numpy_array_equal(result, expected) + with pytest.raises(ValueError, match="Multi-dimensional indexing"): + series[:, None] def test_getitem_multilevel_scalar_slice_not_implemented( @@ -647,7 +635,7 @@ def test_getitem_with_integer_labels(): def test_getitem_missing(datetime_series): # missing d = datetime_series.index[0] - BDay() - msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" + msg = r"Timestamp\('1999-12-31 00:00:00'\)" with pytest.raises(KeyError, match=msg): datetime_series[d] @@ -703,14 +691,14 @@ def test_duplicated_index_getitem_positional_indexer(index_vals): class TestGetitemDeprecatedIndexers: @pytest.mark.parametrize("key", [{1}, {1: 1}]) def test_getitem_dict_and_set_deprecated(self, key): - # GH#42825 + # GH#42825 enforced in 2.0 ser = Series([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="as an indexer is not supported"): ser[key] @pytest.mark.parametrize("key", [{1}, {1: 1}]) - def test_setitem_dict_and_set_deprecated(self, key): - # GH#42825 + def test_setitem_dict_and_set_disallowed(self, key): + # GH#42825 enforced in 2.0 ser = Series([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="as an indexer is not supported"): ser[key] = 1 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index e3df9671e6c64..c0e4db9f2c295 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -84,7 +84,6 @@ def test_getitem_setitem_ellipsis(): assert (result == 5).all() -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "result_1, duplicate_item, expected_1", [ @@ -102,8 +101,8 @@ def test_getitem_setitem_ellipsis(): ) def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1): # GH 17610 - result = result_1.append(duplicate_item) - expected = expected_1.append(duplicate_item) + result = result_1._append(duplicate_item) + expected = expected_1._append(duplicate_item) tm.assert_series_equal(result[1], expected) assert result[2] == result_1[2] @@ -184,8 +183,6 @@ def test_setslice(datetime_series): assert sl.index.is_unique is True -# FutureWarning from NumPy about [slice(None, 5). -@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_basic_getitem_setitem_corner(datetime_series): # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] msg = "key of type tuple not found and not a MultiIndex" @@ -194,10 +191,14 @@ def test_basic_getitem_setitem_corner(datetime_series): with pytest.raises(KeyError, match=msg): datetime_series[:, 2] = 2 - # weird lists. [slice(0, 5)] will work but not two slices - with tm.assert_produces_warning(FutureWarning): + # weird lists. [slice(0, 5)] raises but not two slices + msg = "Indexing with a single-item list" + with pytest.raises(ValueError, match=msg): # GH#31299 - result = datetime_series[[slice(None, 5)]] + datetime_series[[slice(None, 5)]] + + # but we're OK with a single-element tuple + result = datetime_series[(slice(None, 5),)] expected = datetime_series[:5] tm.assert_series_equal(result, expected) @@ -377,28 +378,28 @@ def test_getitem_bool_int_key(): class TestDeprecatedIndexers: @pytest.mark.parametrize("key", [{1}, {1: 1}]) def test_getitem_dict_and_set_deprecated(self, key): - # GH#42825 + # GH#42825 enforced in 2.0 ser = Series([1, 2]) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="as an indexer is not supported"): ser.loc[key] @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) def test_getitem_dict_and_set_deprecated_multiindex(self, key): - # GH#42825 + # GH#42825 enforced in 2.0 ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="as an indexer is not supported"): ser.loc[key] @pytest.mark.parametrize("key", [{1}, {1: 1}]) - def test_setitem_dict_and_set_deprecated(self, key): - # GH#42825 + def test_setitem_dict_and_set_disallowed(self, key): + # GH#42825 enforced in 2.0 ser = Series([1, 2]) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="as an indexer is not supported"): ser.loc[key] = 1 @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) - def test_setitem_dict_and_set_deprecated_multiindex(self, key): - # GH#42825 + def test_setitem_dict_and_set_disallowed_multiindex(self, key): + # GH#42825 enforced in 2.0 ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="as an indexer is not supported"): ser.loc[key] = 1 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e07da3fcdb53c..74d05b7e43b2f 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1035,28 +1035,18 @@ def key(self): return 0 @pytest.fixture - def expected(self): + def expected(self, obj, val): + # pre-2.0 this would cast to object, in 2.0 we cast the val to + # the target tz expected = Series( [ - Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"), + val.tz_convert("US/Central"), Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"), ], - dtype=object, + dtype=obj.dtype, ) return expected - @pytest.fixture(autouse=True) - def assert_warns(self, request): - # check that we issue a FutureWarning about timezone-matching - if request.function.__name__ == "test_slice_key": - key = request.getfixturevalue("key") - if not isinstance(key, slice): - # The test is a no-op, so no warning will be issued - yield - return - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - yield - @pytest.mark.parametrize( "obj,expected", @@ -1341,7 +1331,8 @@ def obj(self): "val,exp_dtype", [ (Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), - (Timestamp("2012-01-01", tz="US/Pacific"), object), + # pre-2.0, a mis-matched tz would end up casting to object + (Timestamp("2012-01-01", tz="US/Pacific"), "datetime64[ns, US/Eastern]"), (Timestamp("2012-01-01"), object), (1, object), ], @@ -1353,24 +1344,6 @@ def obj(self): tz = "US/Eastern" return Series(date_range("2011-01-01", freq="D", periods=4, tz=tz)) - @pytest.fixture(autouse=True) - def assert_warns(self, request): - # check that we issue a FutureWarning about timezone-matching - if request.function.__name__ == "test_slice_key": - key = request.getfixturevalue("key") - if not isinstance(key, slice): - # The test is a no-op, so no warning will be issued - yield - return - - exp_dtype = request.getfixturevalue("exp_dtype") - val = request.getfixturevalue("val") - if exp_dtype == object and isinstance(val, Timestamp) and val.tz is not None: - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - yield - else: - yield - @pytest.mark.parametrize( "val,exp_dtype", diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index 8f3414f673627..aaccad0f2bd70 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( MultiIndex, @@ -74,8 +75,8 @@ def test_xs_key_as_list(self): # GH#41760 mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"]) ser = Series([1], index=mi) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="list keys are not supported"): ser.xs(["a", "x"], axis=0, drop_level=False) - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="list keys are not supported"): ser.xs(["a"], axis=0, drop_level=False) diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py deleted file mode 100644 index 6f8852ade6408..0000000000000 --- a/pandas/tests/series/methods/test_append.py +++ /dev/null @@ -1,271 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - Series, - Timestamp, - date_range, -) -import pandas._testing as tm - - -class TestSeriesAppend: - def test_append_preserve_name(self, datetime_series): - result = datetime_series[:5]._append(datetime_series[5:]) - assert result.name == datetime_series.name - - def test_append(self, datetime_series, string_series, object_series): - appended_series = string_series._append(object_series) - for idx, value in appended_series.items(): - if idx in string_series.index: - assert value == string_series[idx] - elif idx in object_series.index: - assert value == object_series[idx] - else: - raise AssertionError("orphaned index!") - - msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): - datetime_series._append(datetime_series, verify_integrity=True) - - def test_append_many(self, datetime_series): - pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] - - result = pieces[0]._append(pieces[1:]) - tm.assert_series_equal(result, datetime_series) - - def test_append_duplicates(self): - # GH 13677 - s1 = Series([1, 2, 3]) - s2 = Series([4, 5, 6]) - exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(s1._append(s2), exp) - tm.assert_series_equal(pd.concat([s1, s2]), exp) - - # the result must have RangeIndex - exp = Series([1, 2, 3, 4, 5, 6]) - tm.assert_series_equal( - s1._append(s2, ignore_index=True), exp, check_index_type=True - ) - tm.assert_series_equal( - pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True - ) - - msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): - s1._append(s2, verify_integrity=True) - with pytest.raises(ValueError, match=msg): - pd.concat([s1, s2], verify_integrity=True) - - def test_append_tuples(self): - # GH 28410 - s = Series([1, 2, 3]) - list_input = [s, s] - tuple_input = (s, s) - - expected = s._append(list_input) - result = s._append(tuple_input) - - tm.assert_series_equal(expected, result) - - def test_append_dataframe_raises(self): - # GH 31413 - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - - msg = "to_append should be a Series or list/tuple of Series, got DataFrame" - with pytest.raises(TypeError, match=msg): - df.A._append(df) - with pytest.raises(TypeError, match=msg): - df.A._append([df]) - - def test_append_raises_future_warning(self): - # GH#35407 - with tm.assert_produces_warning(FutureWarning): - Series([1, 2]).append(Series([3, 4])) - - -class TestSeriesAppendWithDatetimeIndex: - def test_append(self): - rng = date_range("5/8/2012 1:45", periods=10, freq="5T") - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts._append(ts) - result_df = df._append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = "foo" - rng2.name = "bar" - - assert rng1.append(rng1).name == "foo" - assert rng1.append(rng2).name is None - - def test_append_tz(self): - # see gh-2938 - rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern") - rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern") - rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern") - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts._append(ts2) - result_df = df._append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_tz_explicit_pytz(self): - # see gh-2938 - from pytz import timezone as timezone - - rng = date_range( - "5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern") - ) - rng2 = date_range( - "5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern") - ) - rng3 = date_range( - "5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern") - ) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts._append(ts2) - result_df = df._append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_tz_dateutil(self): - # see gh-2938 - rng = date_range( - "5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern" - ) - rng2 = date_range( - "5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern" - ) - rng3 = date_range( - "5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern" - ) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts._append(ts2) - result_df = df._append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_series_append_aware(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1._append(ser2) - - exp_index = DatetimeIndex( - ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H" - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz - - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1._append(ser2) - - exp_index = DatetimeIndex( - ["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H" - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - utc = rng1.tz - assert utc == ts_result.index.tz - - # GH#7795 - # different tz coerces to object dtype, not UTC - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") - ser1 = Series([1], index=rng1) - ser2 = Series([2], index=rng2) - ts_result = ser1._append(ser2) - exp_index = Index( - [ - Timestamp("1/1/2011 01:00", tz="US/Eastern"), - Timestamp("1/1/2011 02:00", tz="US/Central"), - ] - ) - exp = Series([1, 2], index=exp_index) - tm.assert_series_equal(ts_result, exp) - - def test_series_append_aware_naive(self): - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1._append(ser2) - - expected = ser1.index.astype(object).append(ser2.index.astype(object)) - assert ts_result.index.equals(expected) - - # mixed - rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") - rng2 = range(100) - ser1 = Series(np.random.randn(len(rng1)), index=rng1) - ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1._append(ser2) - - expected = ser1.index.astype(object).append(ser2.index) - assert ts_result.index.equals(expected) - - def test_series_append_dst(self): - rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") - ser1 = Series([1, 2, 3], index=rng1) - ser2 = Series([10, 11, 12], index=rng2) - ts_result = ser1._append(ser2) - - exp_index = DatetimeIndex( - [ - "2016-01-01 01:00", - "2016-01-01 02:00", - "2016-01-01 03:00", - "2016-08-01 01:00", - "2016-08-01 02:00", - "2016-08-01 03:00", - ], - tz="US/Eastern", - ) - exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - tm.assert_series_equal(ts_result, exp) - assert ts_result.index.tz == rng1.tz diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9b57f0f634a6c..768cc50857e50 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -30,18 +30,20 @@ class TestAstypeAPI: - def test_astype_unitless_dt64_deprecated(self): + def test_astype_unitless_dt64_raises(self): # GH#47844 ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]") + df = ser.to_frame() - msg = "Passing unit-less datetime64 dtype to .astype is deprecated and " - with tm.assert_produces_warning(FutureWarning, match=msg): - res = ser.astype(np.datetime64) - tm.assert_series_equal(ser, res) - - with tm.assert_produces_warning(FutureWarning, match=msg): - res = ser.astype("datetime64") - tm.assert_series_equal(ser, res) + msg = "Casting to unit-less dtype 'datetime64' is not supported" + with pytest.raises(TypeError, match=msg): + ser.astype(np.datetime64) + with pytest.raises(TypeError, match=msg): + df.astype(np.datetime64) + with pytest.raises(TypeError, match=msg): + ser.astype("datetime64") + with pytest.raises(TypeError, match=msg): + df.astype("datetime64") def test_arg_for_errors_in_astype(self): # see GH#14878 @@ -95,6 +97,20 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: + def test_astype_mixed_object_to_dt64tz(self): + # pre-2.0 this raised ValueError bc of tz mismatch + # xref GH#32581 + ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific") + ts2 = ts.tz_convert("Asia/Tokyo") + + ser = Series([ts, ts2], dtype=object) + res = ser.astype("datetime64[ns, Europe/Brussels]") + expected = Series( + [ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")], + dtype="datetime64[ns, Europe/Brussels]", + ) + tm.assert_series_equal(res, expected) + @pytest.mark.parametrize("dtype", np.typecodes["All"]) def test_astype_empty_constructor_equality(self, dtype): # see GH#15524 @@ -106,8 +122,7 @@ def test_astype_empty_constructor_equality(self, dtype): "m", # Generic timestamps raise a ValueError. Already tested. ): init_empty = Series([], dtype=dtype) - with tm.assert_produces_warning(FutureWarning): - as_type_empty = Series([]).astype(dtype) + as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) @pytest.mark.parametrize("dtype", [str, np.str_]) @@ -394,10 +409,7 @@ def test_astype_nan_to_bool(self): def test_astype_ea_to_datetimetzdtype(self, dtype): # GH37553 ser = Series([4, 0, 9], dtype=dtype) - warn = FutureWarning if ser.dtype.kind == "f" else None - msg = "with a timezone-aware dtype and floating-dtype data" - with tm.assert_produces_warning(warn, match=msg): - result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) + result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) expected = Series( { @@ -407,21 +419,6 @@ def test_astype_ea_to_datetimetzdtype(self, dtype): } ) - if dtype in tm.FLOAT_EA_DTYPES: - expected = Series( - { - 0: Timestamp( - "1970-01-01 00:00:00.000000004-08:00", tz="US/Pacific" - ), - 1: Timestamp( - "1970-01-01 00:00:00.000000000-08:00", tz="US/Pacific" - ), - 2: Timestamp( - "1970-01-01 00:00:00.000000009-08:00", tz="US/Pacific" - ), - } - ) - tm.assert_series_equal(result, expected) def test_astype_retain_Attrs(self, any_numpy_dtype): diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index b838797b5f9b9..1d104b12ce7d2 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -79,9 +79,7 @@ def test_combine_first_dt64(self): s1 = Series([np.NaN, "2011"]) rs = s0.combine_first(s1) - msg = "containing strings is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - xp = Series([datetime(2010, 1, 1), "2011"]) + xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]") tm.assert_series_equal(rs, xp) diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index 58a332ace244f..be3483c773143 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -58,7 +58,7 @@ def test_cov_ddof(self, test_ddof): class TestSeriesCorr: @td.skip_if_no_scipy def test_corr(self, datetime_series): - import scipy.stats as stats + from scipy import stats # full overlap tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) @@ -88,7 +88,7 @@ def test_corr(self, datetime_series): @td.skip_if_no_scipy def test_corr_rank(self): - import scipy.stats as stats + from scipy import stats # kendall and spearman A = tm.makeTimeSeries() diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py index a7cedd580b2d0..3d813268b57be 100644 --- a/pandas/tests/series/methods/test_describe.py +++ b/pandas/tests/series/methods/test_describe.py @@ -99,7 +99,7 @@ def test_describe_with_tz(self, tz_naive_fixture): start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s = Series(date_range(start, end, tz=tz), name=name) - result = s.describe(datetime_is_numeric=True) + result = s.describe() expected = Series( [ 5, @@ -115,32 +115,32 @@ def test_describe_with_tz(self, tz_naive_fixture): ) tm.assert_series_equal(result, expected) - def test_describe_with_tz_warns(self): + def test_describe_with_tz_numeric(self): name = tz = "CET" start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s = Series(date_range(start, end, tz=tz), name=name) - with tm.assert_produces_warning(FutureWarning): - result = s.describe() + result = s.describe() expected = Series( [ 5, - 5, - s.value_counts().index[0], - 1, - start.tz_localize(tz), - end.tz_localize(tz), + Timestamp("2018-01-03 00:00:00", tz=tz), + Timestamp("2018-01-01 00:00:00", tz=tz), + Timestamp("2018-01-02 00:00:00", tz=tz), + Timestamp("2018-01-03 00:00:00", tz=tz), + Timestamp("2018-01-04 00:00:00", tz=tz), + Timestamp("2018-01-05 00:00:00", tz=tz), ], name=name, - index=["count", "unique", "top", "freq", "first", "last"], + index=["count", "mean", "min", "25%", "50%", "75%", "max"], ) tm.assert_series_equal(result, expected) def test_datetime_is_numeric_includes_datetime(self): s = Series(date_range("2012", periods=3)) - result = s.describe(datetime_is_numeric=True) + result = s.describe() expected = Series( [ 3, diff --git a/pandas/tests/series/methods/test_drop.py b/pandas/tests/series/methods/test_drop.py index c960c281b2b95..dc4a11fd881fb 100644 --- a/pandas/tests/series/methods/test_drop.py +++ b/pandas/tests/series/methods/test_drop.py @@ -90,19 +90,6 @@ def test_drop_non_empty_list(data, index, drop_labels): ser.drop(drop_labels) -def test_drop_pos_args_deprecation(): - # https://github.com/pandas-dev/pandas/issues/41485 - ser = Series([1, 2, 3]) - msg = ( - r"In a future version of pandas all arguments of Series\.drop " - r"except for the argument 'labels' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.drop(1, 0) - expected = Series([1, 3], index=[0, 2]) - tm.assert_series_equal(result, expected) - - def test_drop_index_ea_dtype(any_numeric_ea_dtype): # GH#45860 df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype)) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index c5cffa0c9fb0f..698430095b453 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -242,16 +242,3 @@ def test_drop_duplicates_categorical_bool_na(self, nulls_fixture): index=[0, 1, 4], ) tm.assert_series_equal(result, expected) - - -def test_drop_duplicates_pos_args_deprecation(): - # GH#41485 - s = Series(["a", "b", "c", "b"]) - msg = ( - "In a future version of pandas all arguments of " - "Series.drop_duplicates will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.drop_duplicates("last") - expected = Series(["a", "c", "b"], index=[0, 2, 3]) - tm.assert_series_equal(expected, result) diff --git a/pandas/tests/series/methods/test_dropna.py b/pandas/tests/series/methods/test_dropna.py index 0dab9271bfee5..5bff7306fac33 100644 --- a/pandas/tests/series/methods/test_dropna.py +++ b/pandas/tests/series/methods/test_dropna.py @@ -101,15 +101,3 @@ def test_datetime64_tz_dropna(self): ) assert result.dtype == "datetime64[ns, Asia/Tokyo]" tm.assert_series_equal(result, expected) - - def test_dropna_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - ser = Series([1, 2, 3]) - msg = ( - r"In a future version of pandas all arguments of Series\.dropna " - r"will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.dropna(0) - expected = Series([1, 2, 3]) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 26416c7a2b483..47703b294c2b1 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -151,17 +151,10 @@ def test_fillna_consistency(self): ) tm.assert_series_equal(result, expected) - # where (we ignore the errors=) - with tm.assert_produces_warning(FutureWarning, match="the 'errors' keyword"): - result = ser.where( - [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" - ) + result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern")) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="the 'errors' keyword"): - result = ser.where( - [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" - ) + result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern")) tm.assert_series_equal(result, expected) # with a non-datetime @@ -250,13 +243,12 @@ def test_timedelta_fillna(self, frame_or_series): expected = frame_or_series(expected) tm.assert_equal(result, expected) - # interpreted as seconds, no longer supported - msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'" - wmsg = "In a future version, this will cast to a common dtype" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=wmsg): - # GH#45746 - obj.fillna(1) + # GH#45746 pre-1.? ints were interpreted as seconds. then that was + # deprecated and changed to raise. In 2.0 it casts to common dtype, + # consistent with every other dtype's behavior + res = obj.fillna(1) + expected = obj.astype(object).fillna(1) + tm.assert_equal(res, expected) result = obj.fillna(Timedelta(seconds=1)) expected = Series( @@ -365,10 +357,7 @@ def test_datetime64_fillna(self): def test_datetime64_fillna_backfill(self): # GH#6587 # make sure that we are treating as integer when filling - msg = "containing strings is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - # this also tests inference of a datetime-like with NaT's - ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"]) + ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"], dtype="M8[ns]") expected = Series( [ @@ -569,14 +558,15 @@ def test_datetime64_tz_fillna(self, tz): tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) + # pre-2.0 fillna with mixed tzs would cast to object, in 2.0 + # it retains dtype. + result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) expected = Series( [ Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz), Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz), ] ) tm.assert_series_equal(expected, result) @@ -827,30 +817,15 @@ def test_fillna_datetime64_with_timezone_tzinfo(self): result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc)) tm.assert_series_equal(result, expected) - # but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent + # pre-2.0 we cast to object with mixed tzs, in 2.0 we retain dtype ts = Timestamp("2000-01-01", tz="US/Pacific") ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific")) assert ser2.dtype.kind == "M" - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = ser2.fillna(ts) - expected = Series([ser[0], ts, ser[2]], dtype=object) - # TODO(2.0): once deprecation is enforced - # expected = Series( - # [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], - # dtype=ser2.dtype, - # ) - tm.assert_series_equal(result, expected) - - def test_fillna_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - srs = Series([1, 2, 3, np.nan], dtype=float) - msg = ( - r"In a future version of pandas all arguments of Series.fillna " - r"except for the argument 'value' will be keyword-only" + result = ser2.fillna(ts) + expected = Series( + [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], + dtype=ser2.dtype, ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = srs.fillna(0, None, None) - expected = Series([1, 2, 3, 0], dtype=float) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index c46f427fd6f09..fc2f636199493 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -811,15 +811,3 @@ def test_interpolate_unsorted_index(self, ascending, expected_values): result = ts.sort_index(ascending=ascending).interpolate(method="index") expected = Series(data=expected_values, index=expected_values, dtype=float) tm.assert_series_equal(result, expected) - - def test_interpolate_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - ser = Series([1, 2, 3]) - msg = ( - r"In a future version of pandas all arguments of Series.interpolate except " - r"for the argument 'method' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.interpolate("pad", 0) - expected = Series([1, 2, 3]) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py index 960057cb3d646..db77f77467b42 100644 --- a/pandas/tests/series/methods/test_is_unique.py +++ b/pandas/tests/series/methods/test_is_unique.py @@ -2,7 +2,6 @@ import pytest from pandas import Series -from pandas.core.construction import create_series_with_explicit_dtype @pytest.mark.parametrize( @@ -19,7 +18,7 @@ ) def test_is_unique(data, expected): # GH#11946 / GH#25180 - ser = create_series_with_explicit_dtype(data, dtype_if_empty=object) + ser = Series(data) assert ser.is_unique is expected diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 3af06502a3066..3a66bf1adf25b 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -44,7 +44,7 @@ def test_rank(self, datetime_series): from scipy.stats import rankdata datetime_series[::2] = np.nan - datetime_series[:10][::3] = 4.0 + datetime_series[:10:3] = 4.0 ranks = datetime_series.rank() oranks = datetime_series.astype("O").rank() diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 77c9cf4013bd7..1f00665efe579 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -309,8 +309,7 @@ def test_replace_with_empty_dictlike(self): s = pd.Series(list("abcd")) tm.assert_series_equal(s, s.replace({})) - with tm.assert_produces_warning(FutureWarning): - empty_series = pd.Series([]) + empty_series = pd.Series([]) tm.assert_series_equal(s, s.replace(empty_series)) def test_replace_string_with_number(self): @@ -667,3 +666,11 @@ def test_replace_different_int_types(self, any_int_numpy_dtype): result = labs.replace(map_dict) expected = labs.replace({0: 0, 2: 1, 1: 2}) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("val", [2, np.nan, 2.0]) + def test_replace_value_none_dtype_numeric(self, val): + # GH#48231 + ser = pd.Series([1, val]) + result = ser.replace(val, None) + expected = pd.Series([1, None], dtype=object) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index e7340aaf376e5..9362b0b52a698 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -148,18 +148,6 @@ def test_reset_index_with_drop(self, series_with_multilevel_index): assert isinstance(deleveled, Series) assert deleveled.index.name == ser.index.name - def test_drop_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - ser = Series([1, 2, 3], index=Index([1, 2, 3], name="a")) - msg = ( - r"In a future version of pandas all arguments of Series\.reset_index " - r"except for the argument 'level' will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.reset_index("a", False) - expected = DataFrame({"a": [1, 2, 3], 0: [1, 2, 3]}) - tm.assert_frame_equal(result, expected) - def test_reset_index_inplace_and_drop_ignore_name(self): # GH#44575 ser = Series(range(2), name="old") diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index d7bd92c673e69..4df6f52e0fff4 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -320,15 +320,3 @@ def test_sort_values_key_type(self): result = s.sort_index(key=lambda x: x.month_name()) expected = s.iloc[[2, 1, 0]] tm.assert_series_equal(result, expected) - - def test_sort_index_pos_args_deprecation(self): - # https://github.com/pandas-dev/pandas/issues/41485 - ser = Series([1, 2, 3]) - msg = ( - r"In a future version of pandas all arguments of Series.sort_index " - r"will be keyword-only" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.sort_index(0) - expected = Series([1, 2, 3]) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 5a66597bdb314..4be0aa15523fb 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -171,16 +171,14 @@ def test_attrs(self): def test_inspect_getmembers(self): # GH38782 ser = Series(dtype=object) - # TODO(2.0): Change to None once is_monotonic deprecation - # is enforced - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(None, check_stacklevel=False): inspect.getmembers(ser) def test_unknown_attribute(self): # GH#9680 tdi = pd.timedelta_range(start=0, periods=10, freq="1s") ser = Series(np.random.normal(size=10), index=tdi) - assert "foo" not in ser.__dict__.keys() + assert "foo" not in ser.__dict__ msg = "'Series' object has no attribute 'foo'" with pytest.raises(AttributeError, match=msg): ser.foo diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index c8044a44b48ee..37711054f2285 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -30,9 +30,7 @@ from pandas.core.computation import expressions as expr -@pytest.fixture( - autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"] -) +@pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"]) def switch_numexpr_min_elements(request): _MIN_ELEMENTS = expr._MIN_ELEMENTS expr._MIN_ELEMENTS = request.param diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3faddfeca38bd..abb0ca5407505 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -7,7 +7,7 @@ from dateutil.tz import tzoffset import numpy as np -import numpy.ma as ma +from numpy import ma import pytest from pandas._libs import ( @@ -52,6 +52,30 @@ class TestSeriesConstructors: + def test_infer_with_date_and_datetime(self): + # GH#49341 pre-2.0 we inferred datetime-and-date to datetime64, which + # was inconsistent with Index behavior + ts = Timestamp(2016, 1, 1) + vals = [ts.to_pydatetime(), ts.date()] + + ser = Series(vals) + expected = Series(vals, dtype=object) + tm.assert_series_equal(ser, expected) + + idx = Index(vals) + expected = Index(vals, dtype=object) + tm.assert_index_equal(idx, expected) + + def test_unparseable_strings_with_dt64_dtype(self): + # pre-2.0 these would be silently ignored and come back with object dtype + vals = ["aa"] + msg = "Unknown string format: aa present at position 0" + with pytest.raises(ValueError, match=msg): + Series(vals, dtype="datetime64[ns]") + + with pytest.raises(ValueError, match=msg): + Series(np.array(vals, dtype=object), dtype="datetime64[ns]") + @pytest.mark.parametrize( "constructor,check_index_type", [ @@ -74,9 +98,8 @@ class TestSeriesConstructors: ) def test_empty_constructor(self, constructor, check_index_type): # TODO: share with frame test of the same name - with tm.assert_produces_warning(FutureWarning): - expected = Series() - result = constructor() + expected = Series() + result = constructor() assert len(result.index) == 0 tm.assert_series_equal(result, expected, check_index_type=check_index_type) @@ -119,8 +142,7 @@ def test_scalar_extension_dtype(self, ea_scalar_and_dtype): tm.assert_series_equal(ser, expected) def test_constructor(self, datetime_series): - with tm.assert_produces_warning(FutureWarning): - empty_series = Series() + empty_series = Series() assert datetime_series.index._is_all_dates # Pass in Series @@ -134,11 +156,10 @@ def test_constructor(self, datetime_series): # Mixed type Series mixed = Series(["hello", np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ - assert mixed[1] is np.NaN + assert np.isnan(mixed[1]) assert not empty_series.index._is_all_dates - with tm.assert_produces_warning(FutureWarning): - assert not Series().index._is_all_dates + assert not Series().index._is_all_dates # exception raised is of type ValueError GH35744 with pytest.raises(ValueError, match="Data must be 1-dimensional"): @@ -163,9 +184,8 @@ def test_constructor_index_ndim_gt_1_raises(self): @pytest.mark.parametrize("input_class", [list, dict, OrderedDict]) def test_constructor_empty(self, input_class): - with tm.assert_produces_warning(FutureWarning): - empty = Series() - empty2 = Series(input_class()) + empty = Series() + empty2 = Series(input_class()) # these are Index() and RangeIndex() which don't compare type equal # but are just .equals @@ -183,9 +203,8 @@ def test_constructor_empty(self, input_class): if input_class is not list: # With index: - with tm.assert_produces_warning(FutureWarning): - empty = Series(index=range(10)) - empty2 = Series(input_class(), index=range(10)) + empty = Series(index=range(10)) + empty2 = Series(input_class(), index=range(10)) tm.assert_series_equal(empty, empty2) # With index and dtype float64: @@ -217,8 +236,7 @@ def test_constructor_dtype_only(self, dtype, index): assert len(result) == 0 def test_constructor_no_data_index_order(self): - with tm.assert_produces_warning(FutureWarning): - result = Series(index=["b", "a", "c"]) + result = Series(index=["b", "a", "c"]) assert result.index.tolist() == ["b", "a", "c"] def test_constructor_no_data_string_type(self): @@ -478,8 +496,7 @@ def test_categorical_sideeffects_free(self): cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=True) assert s.cat is not cat - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - s.cat.categories = [1, 2, 3] + s = s.cat.rename_categories([1, 2, 3]) exp_s = np.array([1, 2, 3, 1], dtype=np.int64) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(s.__array__(), exp_s) @@ -496,16 +513,14 @@ def test_categorical_sideeffects_free(self): cat = Categorical(["a", "b", "c", "a"]) s = Series(cat) assert s.values is cat - with tm.assert_produces_warning(FutureWarning, match="Use rename_categories"): - s.cat.categories = [1, 2, 3] + s = s.cat.rename_categories([1, 2, 3]) + assert s.values is not cat exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) - tm.assert_numpy_array_equal(cat.__array__(), exp_s) s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) - tm.assert_numpy_array_equal(cat.__array__(), exp_s2) def test_unordered_compare_equal(self): left = Series(["a", "b", "c"], dtype=CategoricalDtype(["a", "b"])) @@ -696,8 +711,7 @@ def test_constructor_limit_copies(self, index): assert s._mgr.blocks[0].values is not index def test_constructor_pass_none(self): - with tm.assert_produces_warning(FutureWarning): - s = Series(None, index=range(5)) + s = Series(None, index=range(5)) assert s.dtype == np.float64 s = Series(None, index=range(5), dtype=object) @@ -705,9 +719,8 @@ def test_constructor_pass_none(self): # GH 7431 # inference on the index - with tm.assert_produces_warning(FutureWarning): - s = Series(index=np.array([None])) - expected = Series(index=Index([None])) + s = Series(index=np.array([None])) + expected = Series(index=Index([None])) tm.assert_series_equal(s, expected) def test_constructor_pass_nan_nat(self): @@ -782,25 +795,16 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series): # GH#40110 arr = np.random.randn(2) - if frame_or_series is Series: - # Long-standing behavior has been to ignore the dtype on these; - # not clear if this is what we want long-term - expected = frame_or_series(arr) - - res = frame_or_series(arr, dtype="i8") - tm.assert_equal(res, expected) + # Long-standing behavior (for Series, new in 2.0 for DataFrame) + # has been to ignore the dtype on these; + # not clear if this is what we want long-term + expected = frame_or_series(arr) - res = frame_or_series(list(arr), dtype="i8") - tm.assert_equal(res, expected) + res = frame_or_series(arr, dtype="i8") + tm.assert_equal(res, expected) - else: - msg = "passing float-dtype values and an integer dtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - # DataFrame will behave like Series - frame_or_series(arr, dtype="i8") - with tm.assert_produces_warning(FutureWarning, match=msg): - # DataFrame will behave like Series - frame_or_series(list(arr), dtype="i8") + res = frame_or_series(list(arr), dtype="i8") + tm.assert_equal(res, expected) # When we have NaNs, we silently ignore the integer dtype arr[0] = np.nan @@ -1018,24 +1022,20 @@ def test_constructor_dtype_datetime64_7(self): assert series1.dtype == object def test_constructor_dtype_datetime64_6(self): - # these will correctly infer a datetime - msg = "containing strings is deprecated" + # as of 2.0, these no longer infer datetime64 based on the strings, + # matching the Index behavior - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([None, NaT, "2013-08-05 15:30:00.000001"]) - assert ser.dtype == "datetime64[ns]" + ser = Series([None, NaT, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == object - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([np.nan, NaT, "2013-08-05 15:30:00.000001"]) - assert ser.dtype == "datetime64[ns]" + ser = Series([np.nan, NaT, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == object - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([NaT, None, "2013-08-05 15:30:00.000001"]) - assert ser.dtype == "datetime64[ns]" + ser = Series([NaT, None, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == object - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"]) - assert ser.dtype == "datetime64[ns]" + ser = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"]) + assert ser.dtype == object def test_constructor_dtype_datetime64_5(self): # tz-aware (UTC and other tz's) @@ -1241,14 +1241,14 @@ def test_construction_consistency(self): result = Series(ser.dt.tz_convert("UTC"), dtype=ser.dtype) tm.assert_series_equal(result, ser) - msg = "will interpret the data as wall-times" - with tm.assert_produces_warning(FutureWarning, match=msg): - # deprecate behavior inconsistent with DatetimeIndex GH#33401 - result = Series(ser.values, dtype=ser.dtype) - tm.assert_series_equal(result, ser) + # Pre-2.0 dt64 values were treated as utc, which was inconsistent + # with DatetimeIndex, which treats them as wall times, see GH#33401 + result = Series(ser.values, dtype=ser.dtype) + expected = Series(ser.values).dt.tz_localize(ser.dtype.tz) + tm.assert_series_equal(result, expected) with tm.assert_produces_warning(None): - # one suggested alternative to the deprecated usage + # one suggested alternative to the deprecated (changed in 2.0) usage middle = Series(ser.values).dt.tz_localize("UTC") result = middle.dt.tz_convert(ser.dtype.tz) tm.assert_series_equal(result, ser) @@ -1517,23 +1517,19 @@ def test_constructor_dtype_timedelta64(self): td = Series([timedelta(days=i) for i in range(3)] + ["foo"]) assert td.dtype == "object" - # these will correctly infer a timedelta - msg = "containing strings is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([None, NaT, "1 Day"]) - assert ser.dtype == "timedelta64[ns]" + # as of 2.0, these no longer infer timedelta64 based on the strings, + # matching Index behavior + ser = Series([None, NaT, "1 Day"]) + assert ser.dtype == object - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([np.nan, NaT, "1 Day"]) - assert ser.dtype == "timedelta64[ns]" + ser = Series([np.nan, NaT, "1 Day"]) + assert ser.dtype == object - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([NaT, None, "1 Day"]) - assert ser.dtype == "timedelta64[ns]" + ser = Series([NaT, None, "1 Day"]) + assert ser.dtype == object - with tm.assert_produces_warning(FutureWarning, match=msg): - ser = Series([NaT, np.nan, "1 Day"]) - assert ser.dtype == "timedelta64[ns]" + ser = Series([NaT, np.nan, "1 Day"]) + assert ser.dtype == object # GH 16406 def test_constructor_mixed_tz(self): @@ -1983,8 +1979,6 @@ class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - with tm.assert_produces_warning(FutureWarning): - assert ser.index.is_all_dates # as of 2.0, we no longer silently cast the object-dtype index # to DatetimeIndex GH#39307, GH#23598 assert not isinstance(ser.index, DatetimeIndex) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index fd6f4e0083b08..a5620de7de65b 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -35,8 +35,7 @@ def test_subclass_unstack(self): tm.assert_frame_equal(res, exp) def test_subclass_empty_repr(self): - with tm.assert_produces_warning(FutureWarning): - sub_series = tm.SubclassedSeries() + sub_series = tm.SubclassedSeries() assert "SubclassedSeries" in repr(sub_series) def test_asof(self): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 924980b62a51b..6483ad37a2886 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_dtype_equal - import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray @@ -87,9 +85,7 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): name = "name" # op(pd.Series, array) preserves the name. series = pd.Series(a1, name=name) - warn = None if not sparse else FutureWarning - with tm.assert_produces_warning(warn): - other = pd.Index(a2, name=name).astype("int64") + other = pd.Index(a2, name=name).astype("int64") array_args = (a1, a2) series_args = (series, other) # ufunc(series, array) @@ -277,14 +273,10 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request): box = box_with_array values = values_for_np_reduce - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) - if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index: + if isinstance(values, pd.core.arrays.SparseArray): mark = pytest.mark.xfail(reason="SparseArray has no 'prod'") request.node.add_marker(mark) @@ -316,11 +308,7 @@ def test_add(self, values_for_np_reduce, box_with_array): box = box_with_array values = values_for_np_reduce - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) if values.dtype.kind in "miuf": @@ -355,11 +343,7 @@ def test_max(self, values_for_np_reduce, box_with_array): # ATM Index casts to object, so we get python ints/floats same_type = False - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) result = np.maximum.reduce(obj) @@ -383,11 +367,7 @@ def test_min(self, values_for_np_reduce, box_with_array): # ATM Index casts to object, so we get python ints/floats same_type = False - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) result = np.minimum.reduce(obj) diff --git a/pandas/tests/strings/conftest.py b/pandas/tests/strings/conftest.py index 15cc5af97a2d6..1f87608a79f98 100644 --- a/pandas/tests/strings/conftest.py +++ b/pandas/tests/strings/conftest.py @@ -160,7 +160,7 @@ def any_allowed_skipna_inferred_dtype(request): Examples -------- - >>> import pandas._libs.lib as lib + >>> from pandas._libs import lib >>> >>> def test_something(any_allowed_skipna_inferred_dtype): ... inferred_dtype, values = any_allowed_skipna_inferred_dtype diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 59b41e0ec944a..6f6acb7a996b2 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -423,7 +423,7 @@ def test_replace_callable_raises(any_string_dtype, repl): with tm.maybe_produces_warning( PerformanceWarning, any_string_dtype == "string[pyarrow]" ): - values.str.replace("a", repl) + values.str.replace("a", repl, regex=True) def test_replace_callable_named_groups(any_string_dtype): @@ -477,7 +477,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype): with tm.maybe_produces_warning( PerformanceWarning, any_string_dtype == "string[pyarrow]" ): - result = ser.str.replace(pat, ", ") + result = ser.str.replace(pat, ", ", regex=True) tm.assert_series_equal(result, expected) @@ -490,13 +490,13 @@ def test_replace_compiled_regex_raises(any_string_dtype): msg = "case and flags cannot be set when pat is a compiled regex" with pytest.raises(ValueError, match=msg): - ser.str.replace(pat, "", flags=re.IGNORECASE) + ser.str.replace(pat, "", flags=re.IGNORECASE, regex=True) with pytest.raises(ValueError, match=msg): - ser.str.replace(pat, "", case=False) + ser.str.replace(pat, "", case=False, regex=True) with pytest.raises(ValueError, match=msg): - ser.str.replace(pat, "", case=True) + ser.str.replace(pat, "", case=True, regex=True) def test_replace_compiled_regex_callable(any_string_dtype): @@ -507,7 +507,7 @@ def test_replace_compiled_regex_callable(any_string_dtype): with tm.maybe_produces_warning( PerformanceWarning, any_string_dtype == "string[pyarrow]" ): - result = ser.str.replace(pat, repl, n=2) + result = ser.str.replace(pat, repl, n=2, regex=True) expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -617,48 +617,25 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype): tm.assert_series_equal(result, expected) -def test_replace_regex_default_warning(any_string_dtype): +def test_replace_regex(any_string_dtype): # https://github.com/pandas-dev/pandas/pull/24809 s = Series(["a", "b", "ac", np.nan, ""], dtype=any_string_dtype) - msg = ( - "The default value of regex will change from True to False in a " - "future version\\.$" - ) - - with tm.assert_produces_warning( - FutureWarning, - match=msg, - raise_on_extra_warnings=any_string_dtype != "string[pyarrow]", - ): - result = s.str.replace("^.$", "a") + result = s.str.replace("^.$", "a", regex=True) expected = Series(["a", "a", "ac", np.nan, ""], dtype=any_string_dtype) tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("regex", [True, False, None]) +@pytest.mark.parametrize("regex", [True, False]) def test_replace_regex_single_character(regex, any_string_dtype): - # https://github.com/pandas-dev/pandas/pull/24809 - - # The current behavior is to treat single character patterns as literal strings, - # even when ``regex`` is set to ``True``. - + # https://github.com/pandas-dev/pandas/pull/24809, enforced in 2.0 + # GH 24804 s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype) - if regex is None: - msg = re.escape( - "The default value of regex will change from True to False in a future " - "version. In addition, single character regular expressions will *not* " - "be treated as literal strings when regex=True." - ) - with tm.assert_produces_warning( - FutureWarning, - match=msg, - ): - result = s.str.replace(".", "a", regex=regex) + result = s.str.replace(".", "a", regex=regex) + if regex: + expected = Series(["aaa", "a", "a", np.nan, ""], dtype=any_string_dtype) else: - result = s.str.replace(".", "a", regex=regex) - - expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype) + expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py index 7d73414a672c8..74458c13e8df7 100644 --- a/pandas/tests/strings/test_split_partition.py +++ b/pandas/tests/strings/test_split_partition.py @@ -130,23 +130,6 @@ def test_rsplit_max_number(any_string_dtype): tm.assert_series_equal(result, exp) -@pytest.mark.parametrize("method", ["split", "rsplit"]) -def test_posargs_deprecation(method): - # GH 47423; Deprecate passing n as positional. - s = Series(["foo,bar,lorep"]) - - msg = ( - f"In a future version of pandas all arguments of StringMethods.{method} " - "except for the argument 'pat' will be keyword-only" - ) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(s.str, method)(",", 3) - - expected = Series([["foo", "bar", "lorep"]]) - tm.assert_series_equal(result, expected) - - def test_split_blank_string(any_string_dtype): # expand blank split GH 20067 values = Series([""], name="test", dtype=any_string_dtype) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a6b765117f616..c6aefd5bb73b9 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -230,21 +230,11 @@ def test_factorize_nan(self): key = np.array([1, 2, 1, np.nan], dtype="O") rizer = ht.ObjectFactorizer(len(key)) for na_sentinel in (-1, 20): - ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) - expected = np.array([0, 1, 0, na_sentinel], dtype="int32") + ids = rizer.factorize(key, na_sentinel=na_sentinel) + expected = np.array([0, 1, 0, na_sentinel], dtype=np.intp) assert len(set(key)) == len(set(expected)) tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) - - # nan still maps to na_sentinel when sort=False - key = np.array([0, np.nan, 1], dtype="O") - na_sentinel = -1 - - # TODO(wesm): unused? - ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) # noqa - - expected = np.array([2, -1, 0], dtype="int32") - assert len(set(key)) == len(set(expected)) - tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) + tm.assert_numpy_array_equal(ids, expected) @pytest.mark.parametrize( "data, expected_codes, expected_uniques", diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 30f9b2aad5f9a..e00fa9a245782 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -242,7 +242,6 @@ def test_geopandas(): # Cython import warning @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") -@pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning") def test_pyarrow(df): pyarrow = import_module("pyarrow") diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index e03194227f576..3980ec023960c 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -14,44 +14,88 @@ ) from pandas.core.computation import expressions as expr -_frame = DataFrame(np.random.randn(10001, 4), columns=list("ABCD"), dtype="float64") -_frame2 = DataFrame(np.random.randn(100, 4), columns=list("ABCD"), dtype="float64") -_mixed = DataFrame( - { - "A": _frame["A"].copy(), - "B": _frame["B"].astype("float32"), - "C": _frame["C"].astype("int64"), - "D": _frame["D"].astype("int32"), - } -) -_mixed2 = DataFrame( - { - "A": _frame2["A"].copy(), - "B": _frame2["B"].astype("float32"), - "C": _frame2["C"].astype("int64"), - "D": _frame2["D"].astype("int32"), - } -) -_integer = DataFrame( - np.random.randint(1, 100, size=(10001, 4)), columns=list("ABCD"), dtype="int64" -) -_integer2 = DataFrame( - np.random.randint(1, 100, size=(101, 4)), columns=list("ABCD"), dtype="int64" -) -_array = _frame["A"].values.copy() -_array2 = _frame2["A"].values.copy() -_array_mixed = _mixed["D"].values.copy() -_array_mixed2 = _mixed2["D"].values.copy() +@pytest.fixture +def _frame(): + return DataFrame(np.random.randn(10001, 4), columns=list("ABCD"), dtype="float64") + + +@pytest.fixture +def _frame2(): + return DataFrame(np.random.randn(100, 4), columns=list("ABCD"), dtype="float64") + + +@pytest.fixture +def _mixed(_frame): + return DataFrame( + { + "A": _frame["A"].copy(), + "B": _frame["B"].astype("float32"), + "C": _frame["C"].astype("int64"), + "D": _frame["D"].astype("int32"), + } + ) + + +@pytest.fixture +def _mixed2(_frame2): + return DataFrame( + { + "A": _frame2["A"].copy(), + "B": _frame2["B"].astype("float32"), + "C": _frame2["C"].astype("int64"), + "D": _frame2["D"].astype("int32"), + } + ) + + +@pytest.fixture +def _integer(): + return DataFrame( + np.random.randint(1, 100, size=(10001, 4)), columns=list("ABCD"), dtype="int64" + ) + + +@pytest.fixture +def _integer_randint(_integer): + # randint to get a case with zeros + return _integer * np.random.randint(0, 2, size=np.shape(_integer)) + + +@pytest.fixture +def _integer2(): + return DataFrame( + np.random.randint(1, 100, size=(101, 4)), columns=list("ABCD"), dtype="int64" + ) + + +@pytest.fixture +def _array(_frame): + return _frame["A"].values.copy() + + +@pytest.fixture +def _array2(_frame2): + return _frame2["A"].values.copy() + + +@pytest.fixture +def _array_mixed(_mixed): + return _mixed["D"].values.copy() + + +@pytest.fixture +def _array_mixed2(_mixed2): + return _mixed2["D"].values.copy() @pytest.mark.skipif(not expr.USE_NUMEXPR, reason="not using numexpr") class TestExpressions: - def setup_method(self): - self._MIN_ELEMENTS = expr._MIN_ELEMENTS - - def teardown_method(self): - expr._MIN_ELEMENTS = self._MIN_ELEMENTS + @pytest.fixture(autouse=True) + def save_min_elements(self): + min_elements = expr._MIN_ELEMENTS + yield + expr._MIN_ELEMENTS = min_elements @staticmethod def call_op(df, other, flex: bool, opname: str): @@ -70,23 +114,23 @@ def call_op(df, other, flex: bool, opname: str): return result, expected @pytest.mark.parametrize( - "df", + "fixture", [ - _integer, - _integer2, - # randint to get a case with zeros - _integer * np.random.randint(0, 2, size=np.shape(_integer)), - _frame, - _frame2, - _mixed, - _mixed2, + "_integer", + "_integer2", + "_integer_randint", + "_frame", + "_frame2", + "_mixed", + "_mixed2", ], ) @pytest.mark.parametrize("flex", [True, False]) @pytest.mark.parametrize( "arith", ["add", "sub", "mul", "mod", "truediv", "floordiv"] ) - def test_run_arithmetic(self, df, flex, arith): + def test_run_arithmetic(self, request, fixture, flex, arith): + df = request.getfixturevalue(fixture) expr._MIN_ELEMENTS = 0 result, expected = self.call_op(df, df, flex, arith) @@ -101,25 +145,25 @@ def test_run_arithmetic(self, df, flex, arith): tm.assert_equal(expected, result) @pytest.mark.parametrize( - "df", + "fixture", [ - _integer, - _integer2, - # randint to get a case with zeros - _integer * np.random.randint(0, 2, size=np.shape(_integer)), - _frame, - _frame2, - _mixed, - _mixed2, + "_integer", + "_integer2", + "_integer_randint", + "_frame", + "_frame2", + "_mixed", + "_mixed2", ], ) @pytest.mark.parametrize("flex", [True, False]) - def test_run_binary(self, df, flex, comparison_op): + def test_run_binary(self, request, fixture, flex, comparison_op): """ tests solely that the result is the same whether or not numexpr is enabled. Need to test whether the function does the correct thing elsewhere. """ + df = request.getfixturevalue(fixture) arith = comparison_op.__name__ with option_context("compute.use_numexpr", False): other = df.copy() + 1 @@ -160,9 +204,12 @@ def test_invalid(self): [("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")], ) @pytest.mark.parametrize( - "left,right", [(_array, _array2), (_array_mixed, _array_mixed2)] + "left_fix,right_fix", [("_array", "_array2"), ("_array_mixed", "_array_mixed2")] ) - def test_binary_ops(self, opname, op_str, left, right): + def test_binary_ops(self, request, opname, op_str, left_fix, right_fix): + left = request.getfixturevalue(left_fix) + right = request.getfixturevalue(right_fix) + def testit(): if opname == "pow": @@ -202,9 +249,12 @@ def testit(): ], ) @pytest.mark.parametrize( - "left,right", [(_array, _array2), (_array_mixed, _array_mixed2)] + "left_fix,right_fix", [("_array", "_array2"), ("_array_mixed", "_array_mixed2")] ) - def test_comparison_ops(self, opname, op_str, left, right): + def test_comparison_ops(self, request, opname, op_str, left_fix, right_fix): + left = request.getfixturevalue(left_fix) + right = request.getfixturevalue(right_fix) + def testit(): f12 = left + 1 f22 = right + 1 @@ -227,8 +277,10 @@ def testit(): testit() @pytest.mark.parametrize("cond", [True, False]) - @pytest.mark.parametrize("df", [_frame, _frame2, _mixed, _mixed2]) - def test_where(self, cond, df): + @pytest.mark.parametrize("fixture", ["_frame", "_frame2", "_mixed", "_mixed2"]) + def test_where(self, request, cond, fixture): + df = request.getfixturevalue(fixture) + def testit(): c = np.empty(df.shape, dtype=np.bool_) c.fill(cond) @@ -350,7 +402,7 @@ def test_bool_ops_column_name_dtype(self, test_input, expected): "arith", ("add", "sub", "mul", "mod", "truediv", "floordiv") ) @pytest.mark.parametrize("axis", (0, 1)) - def test_frame_series_axis(self, axis, arith): + def test_frame_series_axis(self, axis, arith, _frame): # GH#26736 Dataframe.floordiv(Series, axis=1) fails df = _frame diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 005ef6747da95..0e64181bd46a7 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -15,8 +15,8 @@ isna, ) import pandas._testing as tm +from pandas.core import nanops from pandas.core.arrays import DatetimeArray -import pandas.core.nanops as nanops use_bn = nanops._USE_BOTTLENECK diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index f524bc18793d8..c3b4159c2cbfc 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -626,20 +626,15 @@ def test_to_datetime_unparsable_ignore(self): def test_to_datetime_now(self): # See GH#18666 with tm.set_timezone("US/Eastern"): - msg = "The parsing of 'now' in pd.to_datetime" - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - # checking stacklevel is tricky because we go through cython code - # GH#18705 - npnow = np.datetime64("now").astype("datetime64[ns]") - pdnow = to_datetime("now") - pdnow2 = to_datetime(["now"])[0] + # GH#18705 + now = Timestamp("now") + pdnow = to_datetime("now") + pdnow2 = to_datetime(["now"])[0] # These should all be equal with infinite perf; this gives # a generous margin of 10 seconds - assert abs(pdnow.value - npnow.astype(np.int64)) < 1e10 - assert abs(pdnow2.value - npnow.astype(np.int64)) < 1e10 + assert abs(pdnow.value - now.value) < 1e10 + assert abs(pdnow2.value - now.value) < 1e10 assert pdnow.tzinfo is None assert pdnow2.tzinfo is None @@ -673,12 +668,7 @@ def test_to_datetime_today(self, tz): @pytest.mark.parametrize("arg", ["now", "today"]) def test_to_datetime_today_now_unicode_bytes(self, arg): - warn = FutureWarning if arg == "now" else None - msg = "The parsing of 'now' in pd.to_datetime" - with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): - # checking stacklevel is tricky because we go through cython code - # GH#18705 - to_datetime([arg]) + to_datetime([arg]) @pytest.mark.parametrize( "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index c80b1e080a1d1..5046fd9d0edc1 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -8,7 +8,6 @@ from pandas import Series import pandas._testing as tm -from pandas.core.tools.datetimes import to_time as to_time_alias from pandas.core.tools.times import to_time # The tests marked with this are locale-dependent. @@ -69,12 +68,3 @@ def test_arraylike(self): res = to_time(np.array(arg)) assert isinstance(res, list) assert res == expected_arr - - -def test_to_time_alias(): - expected = time(14, 15) - - with tm.assert_produces_warning(FutureWarning): - result = to_time_alias(expected) - - assert result == expected diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index fd808328ef386..60d54a48965df 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -207,9 +207,7 @@ def test_to_timedelta_on_missing_values(self): ) tm.assert_series_equal(actual, expected) - with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"): - ser = Series(["00:00:01", pd.NaT]) - assert ser.dtype == "m8[ns]" + ser = Series(["00:00:01", pd.NaT], dtype="m8[ns]") actual = to_timedelta(ser) tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index ea9a09ff2d65c..60ede451ddd81 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -10,6 +10,7 @@ DAYS, MONTHS, ) +from pandas._libs.tslibs.offsets import _get_offset from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG from pandas.compat import is_platform_windows @@ -28,8 +29,10 @@ ) from pandas.core.tools.datetimes import to_datetime -import pandas.tseries.frequencies as frequencies -import pandas.tseries.offsets as offsets +from pandas.tseries import ( + frequencies, + offsets, +) @pytest.fixture( @@ -447,7 +450,7 @@ def test_series_datetime_index(freq): @pytest.mark.parametrize( "offset_func", [ - frequencies._get_offset, + _get_offset, lambda freq: date_range("2011-01-01", periods=5, freq=freq), ], ) @@ -507,18 +510,13 @@ def test_legacy_offset_warnings(offset_func, freq): def test_ms_vs_capital_ms(): - left = frequencies._get_offset("ms") - right = frequencies._get_offset("MS") + left = _get_offset("ms") + right = _get_offset("MS") assert left == offsets.Milli() assert right == offsets.MonthBegin() -def test_infer_freq_warn_deprecated(): - with tm.assert_produces_warning(FutureWarning): - frequencies.infer_freq(date_range(2022, periods=3), warn=False) - - def test_infer_freq_non_nano(): arr = np.arange(10).astype(np.int64).view("M8[s]") dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) diff --git a/pandas/tests/tseries/holiday/test_federal.py b/pandas/tests/tseries/holiday/test_federal.py index 64c60d4e365e6..2565877f8a2a4 100644 --- a/pandas/tests/tseries/holiday/test_federal.py +++ b/pandas/tests/tseries/holiday/test_federal.py @@ -1,7 +1,11 @@ from datetime import datetime +from pandas import DatetimeIndex +import pandas._testing as tm + from pandas.tseries.holiday import ( AbstractHolidayCalendar, + USFederalHolidayCalendar, USMartinLutherKingJr, USMemorialDay, ) @@ -36,3 +40,19 @@ class MemorialDay(AbstractHolidayCalendar): datetime(1978, 5, 29, 0, 0), datetime(1979, 5, 28, 0, 0), ] + + +def test_federal_holiday_inconsistent_returntype(): + # GH 49075 test case + # Instantiate two calendars to rule out _cache + cal1 = USFederalHolidayCalendar() + cal2 = USFederalHolidayCalendar() + + results_2018 = cal1.holidays(start=datetime(2018, 8, 1), end=datetime(2018, 8, 31)) + results_2019 = cal2.holidays(start=datetime(2019, 8, 1), end=datetime(2019, 8, 31)) + expected_results = DatetimeIndex([], dtype="datetime64[ns]", freq=None) + + # Check against expected results to ensure both date + # ranges generate expected results as per GH49075 submission + tm.assert_index_equal(results_2018, expected_results) + tm.assert_index_equal(results_2019, expected_results) diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index cefb2f86703b2..ee83ca144d38a 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -3,6 +3,7 @@ import pytest from pytz import utc +from pandas import DatetimeIndex import pandas._testing as tm from pandas.tseries.holiday import ( @@ -264,3 +265,49 @@ def test_both_offset_observance_raises(): offset=[DateOffset(weekday=SA(4))], observance=next_monday, ) + + +def test_half_open_interval_with_observance(): + # Prompted by GH 49075 + # Check for holidays that have a half-open date interval where + # they have either a start_date or end_date defined along + # with a defined observance pattern to make sure that the return type + # for Holiday.dates() remains consistent before & after the year that + # marks the 'edge' of the half-open date interval. + + holiday_1 = Holiday( + "Arbitrary Holiday - start 2022-03-14", + start_date=datetime(2022, 3, 14), + month=3, + day=14, + observance=next_monday, + ) + holiday_2 = Holiday( + "Arbitrary Holiday 2 - end 2022-03-20", + end_date=datetime(2022, 3, 20), + month=3, + day=20, + observance=next_monday, + ) + + class TestHolidayCalendar(AbstractHolidayCalendar): + rules = [ + USMartinLutherKingJr, + holiday_1, + holiday_2, + USLaborDay, + ] + + start = Timestamp("2022-08-01") + end = Timestamp("2022-08-31") + year_offset = DateOffset(years=5) + expected_results = DatetimeIndex([], dtype="datetime64[ns]", freq=None) + test_cal = TestHolidayCalendar() + + date_interval_low = test_cal.holidays(start - year_offset, end - year_offset) + date_window_edge = test_cal.holidays(start, end) + date_interval_high = test_cal.holidays(start + year_offset, end + year_offset) + + tm.assert_index_equal(date_interval_low, expected_results) + tm.assert_index_equal(date_window_edge, expected_results) + tm.assert_index_equal(date_interval_high, expected_results) diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py index b07771af4e1c1..c9c4d6c456c53 100644 --- a/pandas/tests/tseries/offsets/conftest.py +++ b/pandas/tests/tseries/offsets/conftest.py @@ -5,7 +5,7 @@ from pandas._libs.tslibs import Timestamp from pandas._libs.tslibs.offsets import MonthOffset -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets @pytest.fixture( diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index d0801b2cede29..26937c348d9c8 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -6,17 +6,13 @@ from dateutil.relativedelta import relativedelta import pytest -from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG - from pandas import Timestamp -import pandas._testing as tm from pandas.tests.tseries.offsets.common import ( WeekDay, assert_is_on_offset, assert_offset_equal, ) -from pandas.tseries.frequencies import get_offset from pandas.tseries.offsets import ( FY5253, FY5253Quarter, @@ -54,46 +50,6 @@ def test_get_offset_name(): ) -def test_get_offset(): - with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): - with tm.assert_produces_warning(FutureWarning): - get_offset("gibberish") - with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): - with tm.assert_produces_warning(FutureWarning): - get_offset("QS-JAN-B") - - pairs = [ - ("RE-N-DEC-MON", makeFY5253NearestEndMonth(weekday=0, startingMonth=12)), - ("RE-L-DEC-TUE", makeFY5253LastOfMonth(weekday=1, startingMonth=12)), - ( - "REQ-L-MAR-TUE-4", - makeFY5253LastOfMonthQuarter( - weekday=1, startingMonth=3, qtr_with_extra_week=4 - ), - ), - ( - "REQ-L-DEC-MON-3", - makeFY5253LastOfMonthQuarter( - weekday=0, startingMonth=12, qtr_with_extra_week=3 - ), - ), - ( - "REQ-N-DEC-MON-3", - makeFY5253NearestEndMonthQuarter( - weekday=0, startingMonth=12, qtr_with_extra_week=3 - ), - ), - ] - - for name, expected in pairs: - with tm.assert_produces_warning(FutureWarning): - offset = get_offset(name) - assert offset == expected, ( - f"Expected {repr(name)} to yield {repr(expected)} " - f"(actual: {repr(offset)})" - ) - - class TestFY5253LastOfMonth: offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, weekday=WeekDay.SAT) offset_lom_sat_sep = makeFY5253LastOfMonth(1, startingMonth=9, weekday=WeekDay.SAT) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index f1e511713d720..29b82f27234a5 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -40,7 +40,7 @@ import pandas._testing as tm from pandas.tests.tseries.offsets.common import WeekDay -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets from pandas.tseries.offsets import ( FY5253, BaseOffset, diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 7e7f6dc86b8f9..13619c2c0c828 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -44,7 +44,7 @@ def test_apply_ticks(): - result = offsets.Hour(3)._apply(offsets.Hour(4)) + result = offsets.Hour(3) + offsets.Hour(4) exp = offsets.Hour(7) assert result == exp @@ -74,7 +74,6 @@ def test_tick_add_sub(cls, n, m): expected = cls(n + m) assert left + right == expected - assert left._apply(right) == expected expected = cls(n - m) assert left - right == expected diff --git a/pandas/tests/tseries/offsets/test_week.py b/pandas/tests/tseries/offsets/test_week.py index 51eb662967db6..f42ff091af277 100644 --- a/pandas/tests/tseries/offsets/test_week.py +++ b/pandas/tests/tseries/offsets/test_week.py @@ -114,11 +114,7 @@ def test_is_on_offset(self, weekday): for day in range(1, 8): date = datetime(2008, 1, day) - - if day % 7 == weekday: - expected = True - else: - expected = False + expected = day % 7 == weekday assert_is_on_offset(offset, date, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index a4e12315d34e0..f47dd2e725aec 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -148,6 +148,8 @@ def test_parsers_month_freq(date_str, expected): ("20111230", "%Y%m%d"), ("2011-12-30", "%Y-%m-%d"), ("2011", "%Y"), + ("2011-01", "%Y-%m"), + ("2011/01", "%Y/%m"), ("30-12-2011", "%d-%m-%Y"), ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"), ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), @@ -215,6 +217,7 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): "this_is_not_a_datetime", "51a", "13/2019", + "202001", # YYYYMM isn't ISO8601 ], ) def test_guess_datetime_format_invalid_inputs(invalid_dt): diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py index ab53707771be6..ba52536e246d0 100644 --- a/pandas/tests/util/test_assert_almost_equal.py +++ b/pandas/tests/util/test_assert_almost_equal.py @@ -69,16 +69,6 @@ def _assert_not_almost_equal_both(a, b, **kwargs): _assert_not_almost_equal(b, a, **kwargs) -@pytest.mark.parametrize( - "a,b,check_less_precise", - [(1.1, 1.1, False), (1.1, 1.100001, True), (1.1, 1.1001, 2)], -) -def test_assert_almost_equal_deprecated(a, b, check_less_precise): - # GH#30562 - with tm.assert_produces_warning(FutureWarning): - _assert_almost_equal_both(a, b, check_less_precise=check_less_precise) - - @pytest.mark.parametrize( "a,b", [ @@ -122,7 +112,7 @@ def test_assert_not_almost_equal_numbers(a, b): ], ) def test_assert_almost_equal_numbers_atol(a, b): - # Equivalent to the deprecated check_less_precise=True + # Equivalent to the deprecated check_less_precise=True, enforced in 2.0 _assert_almost_equal_both(a, b, rtol=0.5e-3, atol=0.5e-3) diff --git a/pandas/tests/util/test_deprecate.py b/pandas/tests/util/test_deprecate.py index ee4f7e3f34f2e..92f422b8269f5 100644 --- a/pandas/tests/util/test_deprecate.py +++ b/pandas/tests/util/test_deprecate.py @@ -34,7 +34,6 @@ def new_func_with_deprecation(): This is the extended summary. The deprecate directive goes before this. """ - pass def test_deprecate_ok(): diff --git a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py index f6501fa8315e4..2ea3dae19a3e4 100644 --- a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py +++ b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py @@ -140,7 +140,7 @@ def test_i_signature(): class Foo: @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "bar"]) - def baz(self, bar=None, foobar=None): + def baz(self, bar=None, foobar=None): # pylint: disable=disallowed-name ... diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index b7ed9415ecb90..c68501e3ea260 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -2,8 +2,7 @@ import pytest -import pandas.compat as compat - +from pandas import compat import pandas._testing as tm diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 12f9cb27e8cbe..56c2432ab1429 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -16,7 +16,7 @@ ) import pandas._testing as tm -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets def f(x): @@ -163,7 +163,7 @@ def test_invalid_raw_numba(): @pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]]) def test_rolling_apply_args_kwargs(args_kwargs): # GH 33433 - def foo(x, par): + def numpysum(x, par): return np.sum(x + par) df = DataFrame({"gr": [1, 1], "a": [1, 2]}) @@ -171,7 +171,7 @@ def foo(x, par): idx = Index(["gr", "a"]) expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx) - result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + result = df.rolling(1).apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1]) tm.assert_frame_equal(result, expected) midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None]) @@ -179,7 +179,7 @@ def foo(x, par): gb_rolling = df.groupby("gr")["a"].rolling(1) - result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + result = gb_rolling.apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 3f6574a4b54ea..887e6d317689a 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -170,14 +170,6 @@ def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na): assert result == expected -def test_ewm_vol_deprecated(): - ser = Series(range(1)) - with tm.assert_produces_warning(FutureWarning): - result = ser.ewm(com=0.1).vol() - expected = ser.ewm(com=0.1).std() - tm.assert_series_equal(result, expected) - - def test_ewma_times_adjust_false_raises(): # GH 40098 with pytest.raises( diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 688a93223b3f4..3da14bce6facd 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -277,11 +277,11 @@ def test_rolling_apply_mutability(self): def test_groupby_rolling(self, expected_value, raw_value): # GH 31754 - def foo(x): + def isnumpyarray(x): return int(isinstance(x, np.ndarray)) df = DataFrame({"id": [1, 1, 1], "value": [1, 2, 3]}) - result = df.groupby("id").value.rolling(1).apply(foo, raw=raw_value) + result = df.groupby("id").value.rolling(1).apply(isnumpyarray, raw=raw_value) expected = Series( [expected_value] * 3, index=MultiIndex.from_tuples(((1, 0), (1, 1), (1, 2)), names=["id", None]), diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index fc64c8efc8fcf..bb6faf4f4eb22 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -15,7 +15,7 @@ ) import pandas._testing as tm -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets @pytest.mark.parametrize( diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py index 815ee419590f7..e78e997f220b5 100644 --- a/pandas/tests/window/test_rolling_quantile.py +++ b/pandas/tests/window/test_rolling_quantile.py @@ -12,7 +12,7 @@ ) import pandas._testing as tm -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets def scoreatpercentile(a, per): diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 4489fada9c11e..8f162f376c863 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -14,7 +14,7 @@ ) import pandas._testing as tm -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets @td.skip_if_no_scipy diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index eaa4181ac5df6..d04cdb3e46bc0 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -12,7 +12,7 @@ ) import pandas._testing as tm -import pandas.tseries.offsets as offsets +from pandas.tseries import offsets @pytest.fixture diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index e274838d45b27..9fdf95d09fe52 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -2,7 +2,7 @@ Timeseries API """ +from pandas.tseries import offsets from pandas.tseries.frequencies import infer_freq -import pandas.tseries.offsets as offsets __all__ = ["infer_freq", "offsets"] diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index a7fe2da703908..1cf9fb9a85b37 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,7 +1,5 @@ from __future__ import annotations -import warnings - import numpy as np from pandas._libs.algos import unique_deltas @@ -23,16 +21,13 @@ month_position_check, ) from pandas._libs.tslibs.offsets import ( - BaseOffset, DateOffset, Day, - _get_offset, to_offset, ) from pandas._libs.tslibs.parsing import get_rule_month from pandas._typing import npt from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_datetime64_dtype, @@ -102,30 +97,11 @@ def get_period_alias(offset_str: str) -> str | None: return _offset_to_period_map.get(offset_str, None) -def get_offset(name: str) -> BaseOffset: - """ - Return DateOffset object associated with rule name. - - .. deprecated:: 1.0.0 - - Examples - -------- - get_offset('EOM') --> BMonthEnd(1) - """ - warnings.warn( - "get_offset is deprecated and will be removed in a future version, " - "use to_offset instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return _get_offset(name) - - # --------------------------------------------------------------------- # Period codes -def infer_freq(index, warn: bool = True) -> str | None: +def infer_freq(index) -> str | None: """ Infer the most likely frequency given the input index. @@ -133,8 +109,6 @@ def infer_freq(index, warn: bool = True) -> str | None: ---------- index : DatetimeIndex or TimedeltaIndex If passed a Series will use the values of the series (NOT THE INDEX). - warn : bool, default True - .. deprecated:: 1.5.0 Returns ------- @@ -186,7 +160,7 @@ def infer_freq(index, warn: bool = True) -> str | None: ) elif is_timedelta64_dtype(index.dtype): # Allow TimedeltaIndex and TimedeltaArray - inferer = _TimedeltaFrequencyInferer(index, warn=warn) + inferer = _TimedeltaFrequencyInferer(index) return inferer.get_freq() if isinstance(index, Index) and not isinstance(index, DatetimeIndex): @@ -199,7 +173,7 @@ def infer_freq(index, warn: bool = True) -> str | None: if not isinstance(index, DatetimeIndex): index = DatetimeIndex(index) - inferer = _FrequencyInferer(index, warn=warn) + inferer = _FrequencyInferer(index) return inferer.get_freq() @@ -208,7 +182,7 @@ class _FrequencyInferer: Not sure if I can avoid the state machine here """ - def __init__(self, index, warn: bool = True) -> None: + def __init__(self, index) -> None: self.index = index self.i8values = index.asi8 @@ -230,15 +204,6 @@ def __init__(self, index, warn: bool = True) -> None: if index.tz is not None: self.i8values = tz_convert_from_utc(self.i8values, index.tz) - if warn is not True: - warnings.warn( - "warn is deprecated (and never implemented) and " - "will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - self.warn = warn - if len(index) < 3: raise ValueError("Need at least 3 dates to infer frequency") @@ -652,7 +617,6 @@ def _is_weekly(rule: str) -> bool: __all__ = [ "Day", - "get_offset", "get_period_alias", "infer_freq", "is_subperiod", diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index cb65fc958414f..0583b714ea101 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -335,6 +335,9 @@ def _apply_rule(self, dates): ------- Dates with rules applied """ + if dates.empty: + return DatetimeIndex([]) + if self.observance is not None: return dates.map(lambda d: self.observance(d)) @@ -553,8 +556,7 @@ def merge(self, other, inplace: bool = False): class USFederalHolidayCalendar(AbstractHolidayCalendar): """ US Federal Government Holiday Calendar based on rules specified by: - https://www.opm.gov/policy-data-oversight/ - snow-dismissal-procedures/federal-holidays/ + https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/ """ rules = [ diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 5d62f57126ef3..f18066769f214 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -206,8 +206,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: f"or {repr(new_arg_name)}, not both." ) raise TypeError(msg) - else: - kwargs[new_arg_name] = new_arg_value + kwargs[new_arg_name] = new_arg_value return func(*args, **kwargs) return cast(F, wrapper) diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py index 80ec9724931bc..dc8a22b1fe0ef 100644 --- a/pandas/util/_doctools.py +++ b/pandas/util/_doctools.py @@ -55,7 +55,7 @@ def plot(self, left, right, labels: Iterable[str] = (), vertical: bool = True): vertical : bool, default True If True, use vertical layout. If False, use horizontal layout. """ - import matplotlib.gridspec as gridspec + from matplotlib import gridspec import matplotlib.pyplot as plt if not isinstance(left, list): @@ -141,7 +141,7 @@ def _make_table(self, ax, df, title: str, height: float | None = None) -> None: ax.set_visible(False) return - import pandas.plotting as plotting + from pandas import plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 4594c0cb057df..646e05b08a8a9 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -386,7 +386,7 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = Tru if value is None and method is None: raise ValueError("Must specify a fill 'value' or 'method'.") - elif value is None and method is not None: + if value is None and method is not None: method = clean_fill_method(method) elif value is not None and method is None: diff --git a/pyproject.toml b/pyproject.toml index b6b53f4bfd578..3ec0b7a9efe71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,14 +2,12 @@ # Minimum requirements for the build system to execute. # See https://github.com/scipy/scipy/pull/12940 for the AIX issue. requires = [ - "setuptools>=51.0.0", + "meson-python", "wheel", "Cython>=0.29.32,<3", # Note: sync with setup.py, environment.yml and asv.conf.json - "oldest-supported-numpy>=0.10" + "oldest-supported-numpy>=2022.8.16" ] -# uncomment to enable pep517 after versioneer problem is fixed. -# https://github.com/python-versioneer/python-versioneer/issues/193 -# build-backend = "setuptools.build_meta" +build-backend = "mesonpy" [tool.cibuildwheel] skip = "cp36-* cp37-* pp37-* *-manylinux_i686 *_ppc64le *_s390x *-musllinux*" @@ -59,7 +57,6 @@ exclude = ''' max-line-length = 88 disable = [ "abstract-class-instantiated", - "access-member-before-definition", "import-error", "invalid-repr-returned", "invalid-unary-operand-type", @@ -79,9 +76,6 @@ disable = [ "used-before-assignment", # pylint type "C": convention, for programming standard violation - "consider-iterating-dictionary", - "consider-using-f-string", - "disallowed-name", "import-outside-toplevel", "invalid-name", "line-too-long", @@ -98,7 +92,6 @@ disable = [ "unneeded-not", "use-implicit-booleaness-not-comparison", "use-implicit-booleaness-not-len", - "use-maxsplit-arg", "use-sequence-for-iteration", "useless-import-alias", "wrong-import-order", @@ -106,27 +99,16 @@ disable = [ # pylint type "R": refactor, for bad code smell "chained-comparison", - "comparison-of-constants", "comparison-with-itself", "consider-merging-isinstance", - "consider-using-from-import", - "consider-using-get", "consider-using-min-builtin", - "consider-using-sys-exit", "consider-using-ternary", "consider-using-with", "cyclic-import", "duplicate-code", "inconsistent-return-statements", - "invalid-sequence-index", - "literal-comparison", - "no-else-continue", - "no-else-raise", "no-else-return", - "no-self-use", "redefined-argument-from-local", - "simplifiable-if-expression", - "simplifiable-if-statement", "too-few-public-methods", "too-many-ancestors", "too-many-arguments", @@ -142,7 +124,6 @@ disable = [ "unnecessary-list-index-lookup", "use-a-generator", "useless-option-value", - "useless-return", # pylint type "W": warning, for python specific problems "abstract-method", @@ -151,12 +132,10 @@ disable = [ "arguments-renamed", "attribute-defined-outside-init", "broad-except", - "cell-var-from-loop", "comparison-with-callable", "confusing-with-statement", "dangerous-default-value", "deprecated-module", - "duplicate-value", "eval-used", "expression-not-assigned", "fixme", @@ -167,7 +146,6 @@ disable = [ "invalid-overridden-method", "keyword-arg-before-vararg", "method-cache-max-size-none", - "nan-comparison", "non-parent-init-called", "overridden-final-method", "pointless-statement", @@ -186,16 +164,13 @@ disable = [ "try-except-raise", "undefined-loop-variable", "unnecessary-lambda", - "unnecessary-pass", "unspecified-encoding", "unused-argument", "unused-import", "unused-variable", - "unused-wildcard-import", "using-constant-test", "useless-else-on-loop", - "useless-parent-delegation", - "wildcard-import" + "useless-parent-delegation" ] [tool.pytest.ini_options] diff --git a/requirements-dev.txt b/requirements-dev.txt index 90e4c6dca5ff1..fe464827da419 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -44,7 +44,6 @@ tzdata>=2022.1 xarray xlrd xlsxwriter -xlwt zstandard aiobotocore<2.0.0 botocore @@ -101,4 +100,5 @@ requests jupyterlab >=3.4,<4 jupyterlite==0.1.0b12 sphinx-toggleprompt -setuptools>=51.0.0 +git+https://github.com/mesonbuild/meson.git@master +git+https://github.com/mesonbuild/meson-python.git@main diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index dcfef648e8f1c..b490c2ffdc2e8 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -23,9 +23,8 @@ def prefix_pandas(self): pandas.Series.rename : Alter Series index labels or name. DataFrame.head : The first `n` rows of the caller object. """ - pass - def redundant_import(self, foo=None, bar=None): + def redundant_import(self, paramx=None, paramy=None): """ A sample DataFrame method. @@ -45,7 +44,6 @@ def redundant_import(self, foo=None, bar=None): >>> df.all(bool_only=True) Series([], dtype: bool) """ - pass def unused_import(self): """ @@ -54,7 +52,6 @@ def unused_import(self): >>> import pandas as pdf >>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c')) """ - pass def missing_whitespace_around_arithmetic_operator(self): """ @@ -63,7 +60,6 @@ def missing_whitespace_around_arithmetic_operator(self): >>> 2+5 7 """ - pass def indentation_is_not_a_multiple_of_four(self): """ @@ -72,7 +68,6 @@ def indentation_is_not_a_multiple_of_four(self): >>> if 2 + 5: ... pass """ - pass def missing_whitespace_after_comma(self): """ @@ -80,13 +75,11 @@ def missing_whitespace_after_comma(self): -------- >>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c')) """ - pass def write_array_like_with_hyphen_not_underscore(self): """ In docstrings, use array-like over array_like """ - pass def leftover_files(self): """ @@ -95,7 +88,6 @@ def leftover_files(self): >>> import pathlib >>> pathlib.Path("foo.txt").touch() """ - pass class TestValidator: diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index d0490b53fa957..f647f066d0125 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -67,7 +67,6 @@ "IntervalIndex", "Categorical", "CategoricalIndex", - "Categorical", "GroupBy", "DataFrameGroupBy", "SeriesGroupBy", @@ -145,7 +144,6 @@ "False", "Styler", "os", - "UTC", "str", "msgpack", "ExtensionArray", diff --git a/setup.cfg b/setup.cfg index eede4a66d598d..5680db30ec50d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,6 +22,7 @@ classifiers = Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 Topic :: Scientific/Engineering project_urls = Bug Tracker = https://github.com/pandas-dev/pandas/issues @@ -33,6 +34,7 @@ packages = find: install_requires = numpy>=1.20.3; python_version<'3.10' numpy>=1.21.0; python_version>='3.10' + numpy>=1.23.2; python_version>='3.11' python-dateutil>=2.8.2 pytz>=2020.1 python_requires = >=3.8 @@ -78,7 +80,6 @@ excel = openpyxl>=3.0.7 pyxlsb>=1.0.8 xlrd>=2.0.1 - xlwt>=1.3.0 xlsxwriter>=1.4.3 parquet = pyarrow>=6.0.0 @@ -158,7 +159,6 @@ all = xarray>=0.19.0 xlrd>=2.0.1 xlsxwriter>=1.4.3 - xlwt>=1.3.0 zstandard>=0.15.2 [build_ext] diff --git a/versioneer.py b/versioneer.py index 2ab269f9c3ea8..1ff2e936e15cc 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,4 +1,5 @@ # Version: 0.19 +# pylint: disable=consider-using-f-string """The Versioneer - like a rocketeer, but for versions. @@ -420,6 +421,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= LONG_VERSION_PY[ "git" ] = r''' +# pylint: disable=consider-using-f-string # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by GitHub's download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build