Skip to content

Commit 53a47cd

Browse files
committed
Merge branch 'master' into doc/info
2 parents 5151e33 + 5468223 commit 53a47cd

File tree

491 files changed

+11716
-6890
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

491 files changed

+11716
-6890
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
pull_request:
77
branches:
88
- master
9-
- 1.1.x
9+
- 1.2.x
1010

1111
env:
1212
ENV_FILE: environment.yml
@@ -64,7 +64,7 @@ jobs:
6464
- name: Testing docstring validation script
6565
run: |
6666
source activate pandas-dev
67-
pytest --capture=no --strict scripts
67+
pytest --capture=no --strict-markers scripts
6868
if: always()
6969

7070
- name: Running benchmarks
@@ -74,14 +74,10 @@ jobs:
7474
asv check -E existing
7575
git remote add upstream https://github.com/pandas-dev/pandas.git
7676
git fetch upstream
77-
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
78-
asv machine --yes
79-
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
80-
if grep "failed" benchmarks.log > /dev/null ; then
81-
exit 1
82-
fi
83-
else
84-
echo "Benchmarks did not run, no changes detected"
77+
asv machine --yes
78+
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
79+
if grep "failed" benchmarks.log > /dev/null ; then
80+
exit 1
8581
fi
8682
if: always()
8783

.pre-commit-config.yaml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
minimum_pre_commit_version: '2.9.2'
12
repos:
23
- repo: https://github.com/python/black
34
rev: 20.8b1
@@ -21,10 +22,8 @@ repos:
2122
rev: 5.6.4
2223
hooks:
2324
- id: isort
24-
name: isort (python)
25-
- id: isort
26-
name: isort (cython)
27-
types: [cython]
25+
types: [text] # overwrite upstream `types: [python]`
26+
types_or: [python, cython]
2827
- repo: https://github.com/asottile/pyupgrade
2928
rev: v2.7.4
3029
hooks:
@@ -96,17 +95,17 @@ repos:
9695
name: Check for incorrect code block or IPython directives
9796
language: pygrep
9897
entry: (\.\. code-block ::|\.\. ipython ::)
99-
files: \.(py|pyx|rst)$
98+
types_or: [python, cython, rst]
10099
- id: unwanted-patterns-strings-to-concatenate
101100
name: Check for use of not concatenated strings
102101
language: python
103102
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
104-
files: \.(py|pyx|pxd|pxi)$
103+
types_or: [python, cython]
105104
- id: unwanted-patterns-strings-with-wrong-placed-whitespace
106105
name: Check for strings with wrong placed spaces
107106
language: python
108107
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
109-
files: \.(py|pyx|pxd|pxi)$
108+
types_or: [python, cython]
110109
- id: unwanted-patterns-private-import-across-module
111110
name: Check for import of private attributes across modules
112111
language: python

Dockerfile

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM continuumio/miniconda3
1+
FROM quay.io/condaforge/miniforge3
22

33
# if you forked pandas, you can pass in your own GitHub username to use your fork
44
# i.e. gh_username=myname
@@ -15,10 +15,6 @@ RUN apt-get update \
1515
# Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
1616
&& apt-get -y install git iproute2 procps iproute2 lsb-release \
1717
#
18-
# Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
19-
# needed to build pandas C extensions
20-
&& apt-get -y install build-essential \
21-
#
2218
# cleanup
2319
&& apt-get autoremove -y \
2420
&& apt-get clean -y \
@@ -39,9 +35,14 @@ RUN mkdir "$pandas_home" \
3935
# we just update the base/root one from the 'environment.yml' file instead of creating a new one.
4036
#
4137
# Set up environment
42-
RUN conda env update -n base -f "$pandas_home/environment.yml"
38+
RUN conda install -y mamba
39+
RUN mamba env update -n base -f "$pandas_home/environment.yml"
4340

4441
# Build C extensions and pandas
45-
RUN cd "$pandas_home" \
42+
SHELL ["/bin/bash", "-c"]
43+
RUN . /opt/conda/etc/profile.d/conda.sh \
44+
&& conda activate base \
45+
&& cd "$pandas_home" \
46+
&& export \
4647
&& python setup.py build_ext -j 4 \
4748
&& python -m pip install -e .

README.md

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -63,31 +63,31 @@ Here are just a few of the things that pandas does well:
6363
date shifting and lagging
6464

6565

66-
[missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data
67-
[insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion
68-
[alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures
69-
[groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine
70-
[conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe
71-
[slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges
72-
[fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix
73-
[subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing
74-
[merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging
75-
[joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index
76-
[reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables
77-
[pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations
78-
[mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex
79-
[flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files
80-
[excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files
81-
[db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries
82-
[hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables
83-
[timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality
66+
[missing-data]: https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html
67+
[insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#column-selection-addition-deletion
68+
[alignment]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html?highlight=alignment#intro-to-data-structures
69+
[groupby]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#group-by-split-apply-combine
70+
[conversion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#dataframe
71+
[slicing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#slicing-ranges
72+
[fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced
73+
[subsetting]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing
74+
[merging]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging
75+
[joining]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-on-index
76+
[reshape]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html
77+
[pivot-table]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html
78+
[mi]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#hierarchical-indexing-multiindex
79+
[flat-files]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#csv-text-files
80+
[excel]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#excel-files
81+
[db]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#sql-queries
82+
[hdfstore]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#hdf5-pytables
83+
[timeseries]: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-series-date-functionality
8484

8585
## Where to get it
8686
The source code is currently hosted on GitHub at:
8787
https://github.com/pandas-dev/pandas
8888

8989
Binary installers for the latest released version are available at the [Python
90-
package index](https://pypi.org/project/pandas) and on conda.
90+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
9191

9292
```sh
9393
# conda
@@ -100,15 +100,15 @@ pip install pandas
100100
```
101101

102102
## Dependencies
103-
- [NumPy](https://www.numpy.org)
104-
- [python-dateutil](https://labix.org/python-dateutil)
105-
- [pytz](https://pythonhosted.org/pytz)
103+
- [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org)
104+
- [python-dateutil - Provides powerful extensions to the standard datetime module](https://labix.org/python-dateutil)
105+
- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://pythonhosted.org/pytz)
106106

107107
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
108108

109109
## Installation from sources
110-
To install pandas from source you need Cython in addition to the normal
111-
dependencies above. Cython can be installed from pypi:
110+
To install pandas from source you need [Cython](https://cython.org/) in addition to the normal
111+
dependencies above. Cython can be installed from PyPI:
112112

113113
```sh
114114
pip install cython
@@ -145,7 +145,7 @@ See the full instructions for [installing from source](https://pandas.pydata.org
145145
The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable
146146

147147
## Background
148-
Work on ``pandas`` started at AQR (a quantitative hedge fund) in 2008 and
148+
Work on ``pandas`` started at [AQR](https://www.aqr.com/) (a quantitative hedge fund) in 2008 and
149149
has been under active development since then.
150150

151151
## Getting Help
@@ -154,7 +154,7 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
154154
Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
155155

156156
## Discussion and Development
157-
Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
157+
Most development discussions take place on GitHub in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
158158

159159
## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
160160

asv_bench/benchmarks/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ def setup(self):
486486
tmp2 = (np.random.random(10000) * 10.0).astype(np.float32)
487487
tmp = np.concatenate((tmp1, tmp2))
488488
arr = np.repeat(tmp, 10)
489-
self.df = DataFrame(dict(a=arr, b=arr))
489+
self.df = DataFrame({"a": arr, "b": arr})
490490

491491
def time_sum(self):
492492
self.df.groupby(["a"])["b"].sum()

asv_bench/benchmarks/indexing.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
lower-level methods directly on Index and subclasses, see index_object.py,
44
indexing_engine.py, and index_cached.py
55
"""
6+
import itertools
7+
import string
68
import warnings
79

810
import numpy as np
@@ -255,6 +257,9 @@ def setup(self, index):
255257
"non_monotonic": CategoricalIndex(list("abc" * N)),
256258
}
257259
self.data = indices[index]
260+
self.data_unique = CategoricalIndex(
261+
["".join(perm) for perm in itertools.permutations(string.printable, 3)]
262+
)
258263

259264
self.int_scalar = 10000
260265
self.int_list = list(range(10000))
@@ -281,7 +286,7 @@ def time_get_loc_scalar(self, index):
281286
self.data.get_loc(self.cat_scalar)
282287

283288
def time_get_indexer_list(self, index):
284-
self.data.get_indexer(self.cat_list)
289+
self.data_unique.get_indexer(self.cat_list)
285290

286291

287292
class MethodLookup:
@@ -358,6 +363,14 @@ def time_assign_with_setitem(self):
358363
for i in range(100):
359364
self.df[i] = np.random.randn(self.N)
360365

366+
def time_assign_list_like_with_setitem(self):
367+
np.random.seed(1234)
368+
self.df[list(range(100))] = np.random.randn(self.N, 100)
369+
370+
def time_assign_list_of_columns_concat(self):
371+
df = DataFrame(np.random.randn(self.N, 100))
372+
concat([self.df, df], axis=1)
373+
361374

362375
class ChainIndexing:
363376

0 commit comments

Comments
 (0)