Skip to content

Commit

Permalink
merging
Browse files Browse the repository at this point in the history
  • Loading branch information
makbigc committed Jun 6, 2019
2 parents cf96e22 + 891a419 commit 8418a07
Show file tree
Hide file tree
Showing 95 changed files with 2,443 additions and 1,609 deletions.
2 changes: 2 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def time_dict_rename_both_axes(self):


class Iteration:
# mem_itertuples_* benchmarks are slow
timeout = 120

def setup(self):
N = 1000
Expand Down
7 changes: 1 addition & 6 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from functools import partial
from itertools import product
from string import ascii_letters
import warnings

import numpy as np

from pandas import (
Categorical, DataFrame, MultiIndex, Series, TimeGrouper, Timestamp,
Categorical, DataFrame, MultiIndex, Series, Timestamp,
date_range, period_range)
import pandas.util.testing as tm

Expand Down Expand Up @@ -301,10 +300,6 @@ def setup(self):
def time_multi_size(self):
self.df.groupby(['key1', 'key2']).size()

def time_dt_timegrouper_size(self):
with warnings.catch_warnings(record=True):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()

def time_category_size(self):
self.draws.groupby(self.cats).size()

Expand Down
7 changes: 6 additions & 1 deletion asv_bench/benchmarks/index_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def time_is_dates_only(self):

class Ops:

sample_time = 0.2
params = ['float', 'int']
param_names = ['dtype']

Expand Down Expand Up @@ -95,6 +94,12 @@ def time_min(self):
def time_min_trivial(self):
self.idx_inc.min()

def time_get_loc_inc(self):
self.idx_inc.get_loc(900000)

def time_get_loc_dec(self):
self.idx_dec.get_loc(100000)


class IndexAppend:

Expand Down
8 changes: 6 additions & 2 deletions asv_bench/benchmarks/io/parsers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import numpy as np

from pandas._libs.tslibs.parsing import (
_concat_date_cols, _does_string_look_like_datetime)
try:
from pandas._libs.tslibs.parsing import (
_concat_date_cols, _does_string_look_like_datetime)
except ImportError:
# Avoid whole benchmark suite import failure on asv (currently 0.4)
pass


class DoesStringLookLikeDatetime(object):
Expand Down
6 changes: 0 additions & 6 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

class Methods:

sample_time = 0.2
params = (['DataFrame', 'Series'],
[10, 1000],
['int', 'float'],
Expand All @@ -23,7 +22,6 @@ def time_rolling(self, constructor, window, dtype, method):

class ExpandingMethods:

sample_time = 0.2
params = (['DataFrame', 'Series'],
['int', 'float'],
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
Expand All @@ -41,7 +39,6 @@ def time_expanding(self, constructor, dtype, method):

class EWMMethods:

sample_time = 0.2
params = (['DataFrame', 'Series'],
[10, 1000],
['int', 'float'],
Expand All @@ -58,7 +55,6 @@ def time_ewm(self, constructor, window, dtype, method):


class VariableWindowMethods(Methods):
sample_time = 0.2
params = (['DataFrame', 'Series'],
['50s', '1h', '1d'],
['int', 'float'],
Expand All @@ -75,7 +71,6 @@ def setup(self, constructor, window, dtype, method):

class Pairwise:

sample_time = 0.2
params = ([10, 1000, None],
['corr', 'cov'],
[True, False])
Expand All @@ -95,7 +90,6 @@ def time_pairwise(self, window, method, pairwise):


class Quantile:
sample_time = 0.2
params = (['DataFrame', 'Series'],
[10, 1000],
['int', 'float'],
Expand Down
5 changes: 3 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
name: Windows
vmImage: vs2017-win2016

- job: 'Checks_and_doc'
- job: 'Checks'
pool:
vmImage: ubuntu-16.04
timeoutInMinutes: 90
Expand Down Expand Up @@ -97,10 +97,11 @@ jobs:
- script: |
export PATH=$HOME/miniconda3/bin:$PATH
source activate pandas-dev
cd asv_bench
asv check -E existing
git remote add upstream https://github.com/pandas-dev/pandas.git
git fetch upstream
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
cd asv_bench
asv machine --yes
ASV_OUTPUT="$(asv dev)"
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-35-compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ dependencies:
- pip
- pip:
# for python 3.5, pytest>=4.0.2 is not available in conda
- pytest>=4.0.2
- pytest==4.5.0
- html5lib==1.0b2
2 changes: 1 addition & 1 deletion ci/deps/azure-macos-35.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- pip:
- python-dateutil==2.5.3
# universal
- pytest>=4.0.2
- pytest==4.5.0
- pytest-xdist
- pytest-mock
- hypothesis>=3.58.0
4 changes: 2 additions & 2 deletions ci/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ echo "conda list"
conda list

# Install DB for Linux
if [ ${TRAVIS_OS_NAME} == "linux" ]; then
if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
echo "installing dbs"
mysql -e 'create database pandas_nosetest;'
psql -c 'create database pandas_nosetest;' -U postgres
else
echo "not using dbs on non-linux"
echo "not using dbs on non-linux Travis builds or Azure Pipelines"
fi

echo "done"
3 changes: 2 additions & 1 deletion doc/source/ecosystem.rst
Original file line number Diff line number Diff line change
Expand Up @@ -363,4 +363,5 @@ Library Accessor Classes
============== ========== =========================

.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest
.. _pdvega: https://jakevdp.github.io/pdvega/
.. _pdvega: https://altair-viz.github.io/pdvega/

11 changes: 5 additions & 6 deletions doc/source/getting_started/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1455,9 +1455,8 @@ Iteration

The behavior of basic iteration over pandas objects depends on the type.
When iterating over a Series, it is regarded as array-like, and basic iteration
produces the values. Other data structures, like DataFrame,
follow the dict-like convention of iterating over the "keys" of the
objects.
produces the values. DataFrames follow the dict-like convention of iterating
over the "keys" of the objects.

In short, basic iteration (``for i in object``) produces:

Expand Down Expand Up @@ -1537,9 +1536,9 @@ For example:

.. ipython:: python
for item, frame in df.iteritems():
print(item)
print(frame)
for label, ser in df.iteritems():
print(label)
print(ser)
.. _basics.iterrows:

Expand Down
1 change: 0 additions & 1 deletion doc/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ Optional Dependencies
`qtpy <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),
`PyQt5 <https://www.riverbankcomputing.com/software/pyqt/download5>`__,
`PyQt4 <http://www.riverbankcomputing.com/software/pyqt/download>`__,
`pygtk <http://www.pygtk.org/>`__,
`xsel <http://www.vergenet.net/~conrad/software/xsel/>`__, or
`xclip <https://github.com/astrand/xclip/>`__: necessary to use
:func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
Expand Down
2 changes: 0 additions & 2 deletions doc/source/reference/frame.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ Conversion
:toctree: api/

DataFrame.astype
DataFrame.convert_objects
DataFrame.infer_objects
DataFrame.copy
DataFrame.isna
Expand Down Expand Up @@ -205,7 +204,6 @@ Reindexing / Selection / Label manipulation
DataFrame.rename_axis
DataFrame.reset_index
DataFrame.sample
DataFrame.select
DataFrame.set_axis
DataFrame.set_index
DataFrame.tail
Expand Down
2 changes: 0 additions & 2 deletions doc/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ Conversion

Series.astype
Series.infer_objects
Series.convert_objects
Series.copy
Series.bool
Series.to_numpy
Expand Down Expand Up @@ -212,7 +211,6 @@ Reindexing / Selection / Label manipulation
Series.rename_axis
Series.reset_index
Series.sample
Series.select
Series.set_axis
Series.take
Series.tail
Expand Down
74 changes: 61 additions & 13 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,67 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
'mean': 'bar',
'std': 'baz'}))
.. _groupby.aggregate.named:

Named Aggregation
~~~~~~~~~~~~~~~~~

.. versionadded:: 0.25.0

To support column-specific aggregation *with control over the output column names*, pandas
accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where

- The keywords are the *output* column names
- The values are tuples whose first element is the column to select
and the second element is the aggregation to apply to that column. Pandas
provides the ``pandas.NamedAgg`` namedtuple with the fields ``['column', 'aggfunc']``
to make it clearer what the arguments are. As usual, the aggregation can
be a callable or a string alias.

.. ipython:: python
animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'],
'height': [9.1, 6.0, 9.5, 34.0],
'weight': [7.9, 7.5, 9.9, 198.0]})
animals
animals.groupby("kind").agg(
min_height=pd.NamedAgg(column='height', aggfunc='min'),
max_height=pd.NamedAgg(column='height', aggfunc='max'),
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
)
``pandas.NamedAgg`` is just a ``namedtuple``. Plain tuples are allowed as well.

.. ipython:: python
animals.groupby("kind").agg(
min_height=('height', 'min'),
max_height=('height', 'max'),
average_weight=('height', np.mean),
)
If your desired output column names are not valid python keywords, construct a dictionary
and unpack the keyword arguments

.. ipython:: python
animals.groupby("kind").agg(**{
'total weight': pd.NamedAgg(column='weight', aggfunc=sum),
})
Additional keyword arguments are not passed through to the aggregation functions. Only pairs
of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions
requires additional arguments, partially apply them with :meth:`functools.partial`.

.. note::

For Python 3.5 and earlier, the order of ``**kwargs`` in a functions was not
preserved. This means that the output column ordering would not be
consistent. To ensure consistent ordering, the keys (and so output columns)
will always be sorted for Python 3.5.

Applying different functions to DataFrame columns
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -588,19 +649,6 @@ must be either implemented on GroupBy or available via :ref:`dispatching
grouped.agg({'C': 'sum', 'D': 'std'})
.. note::

If you pass a dict to ``aggregate``, the ordering of the output columns is
non-deterministic. If you want to be sure the output columns will be in a specific
order, you can use an ``OrderedDict``. Compare the output of the following two commands:

.. ipython:: python
from collections import OrderedDict
grouped.agg({'D': 'std', 'C': 'mean'})
grouped.agg(OrderedDict([('D', 'std'), ('C', 'mean')]))
.. _groupby.aggregate.cython:

Cython-optimized aggregation functions
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3272,7 +3272,7 @@ We can see that we got the same content back, which we had earlier written to th

.. note::

You may need to install xclip or xsel (with gtk, PyQt5, PyQt4 or qtpy) on Linux to use these methods.
You may need to install xclip or xsel (with PyQt5, PyQt4 or qtpy) on Linux to use these methods.

.. _io.pickle:

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/sparse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ have no replacement.
Interaction with scipy.sparse
-----------------------------

Use :meth:`DataFrame.sparse.from_coo` to create a ``DataFrame`` with sparse values from a sparse matrix.
Use :meth:`DataFrame.sparse.from_spmatrix` to create a ``DataFrame`` with sparse values from a sparse matrix.

.. versionadded:: 0.25.0

Expand Down
10 changes: 10 additions & 0 deletions doc/source/user_guide/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ and replacing any remaining whitespaces with underscores:
``.str`` methods which operate on elements of type ``list`` are not available on such a
``Series``.

.. _text.warn_types:

.. warning::

Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously.

Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
exceptions, other uses are not supported, and may be disabled at a later point.


Splitting and Replacing Strings
-------------------------------
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.16.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Interaction with scipy.sparse
Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here <sparse.scipysparse>`). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels:

.. ipython:: python
:okwarning:
s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0),
Expand Down Expand Up @@ -121,6 +122,7 @@ The from_coo method is a convenience method for creating a ``SparseSeries``
from a ``scipy.sparse.coo_matrix``:

.. ipython:: python
:okwarning:
from scipy import sparse
A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ used in the ``pandas`` implementation (:issue:`12644`, :issue:`12638`, :issue:`1
An example of this signature augmentation is illustrated below:

.. ipython:: python
:okwarning:
sp = pd.SparseDataFrame([1, 2, 3])
sp
Expand All @@ -409,6 +410,7 @@ Previous behaviour:
New behaviour:

.. ipython:: python
:okwarning:
np.cumsum(sp, axis=0)
Expand Down
Loading

0 comments on commit 8418a07

Please sign in to comment.