Skip to content

Commit 0af4ffe

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into perf-arith-series-col
2 parents 1dc2deb + f9b49c8 commit 0af4ffe

File tree

449 files changed

+39474
-8873
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

449 files changed

+39474
-8873
lines changed

.pre-commit-config.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,20 @@ repos:
1010
- id: flake8
1111
language: python_venv
1212
additional_dependencies: [flake8-comprehensions>=3.1.0]
13+
- id: flake8
14+
name: flake8-pyx
15+
language: python_venv
16+
files: \.(pyx|pxd)$
17+
types:
18+
- file
19+
args: [--append-config=flake8/cython.cfg]
20+
- id: flake8
21+
name: flake8-pxd
22+
language: python_venv
23+
files: \.pxi\.in$
24+
types:
25+
- file
26+
args: [--append-config=flake8/cython-template.cfg]
1327
- repo: https://github.com/pre-commit/mirrors-isort
1428
rev: v4.3.21
1529
hooks:

asv_bench/benchmarks/binary_ops.py renamed to asv_bench/benchmarks/arithmetic.py

Lines changed: 178 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
import operator
2+
import warnings
23

34
import numpy as np
45

5-
from pandas import DataFrame, Series, date_range
6+
import pandas as pd
7+
from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta
8+
import pandas._testing as tm
69
from pandas.core.algorithms import checked_add_with_arr
710

11+
from .pandas_vb_common import numeric_dtypes
12+
813
try:
914
import pandas.core.computation.expressions as expr
1015
except ImportError:
1116
import pandas.computation.expressions as expr
17+
try:
18+
import pandas.tseries.holiday
19+
except ImportError:
20+
pass
1221

1322

1423
class IntFrameWithScalar:
@@ -181,6 +190,110 @@ def time_timestamp_ops_diff_with_shift(self, tz):
181190
self.s - self.s.shift()
182191

183192

193+
class IrregularOps:
194+
def setup(self):
195+
N = 10 ** 5
196+
idx = date_range(start="1/1/2000", periods=N, freq="s")
197+
s = Series(np.random.randn(N), index=idx)
198+
self.left = s.sample(frac=1)
199+
self.right = s.sample(frac=1)
200+
201+
def time_add(self):
202+
self.left + self.right
203+
204+
205+
class TimedeltaOps:
206+
def setup(self):
207+
self.td = to_timedelta(np.arange(1000000))
208+
self.ts = Timestamp("2000")
209+
210+
def time_add_td_ts(self):
211+
self.td + self.ts
212+
213+
214+
class CategoricalComparisons:
215+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
216+
param_names = ["op"]
217+
218+
def setup(self, op):
219+
N = 10 ** 5
220+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
221+
222+
def time_categorical_op(self, op):
223+
getattr(self.cat, op)("b")
224+
225+
226+
class IndexArithmetic:
227+
228+
params = ["float", "int"]
229+
param_names = ["dtype"]
230+
231+
def setup(self, dtype):
232+
N = 10 ** 6
233+
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
234+
self.index = getattr(tm, indexes[dtype])(N)
235+
236+
def time_add(self, dtype):
237+
self.index + 2
238+
239+
def time_subtract(self, dtype):
240+
self.index - 2
241+
242+
def time_multiply(self, dtype):
243+
self.index * 2
244+
245+
def time_divide(self, dtype):
246+
self.index / 2
247+
248+
def time_modulo(self, dtype):
249+
self.index % 2
250+
251+
252+
class NumericInferOps:
253+
# from GH 7332
254+
params = numeric_dtypes
255+
param_names = ["dtype"]
256+
257+
def setup(self, dtype):
258+
N = 5 * 10 ** 5
259+
self.df = DataFrame(
260+
{"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)}
261+
)
262+
263+
def time_add(self, dtype):
264+
self.df["A"] + self.df["B"]
265+
266+
def time_subtract(self, dtype):
267+
self.df["A"] - self.df["B"]
268+
269+
def time_multiply(self, dtype):
270+
self.df["A"] * self.df["B"]
271+
272+
def time_divide(self, dtype):
273+
self.df["A"] / self.df["B"]
274+
275+
def time_modulo(self, dtype):
276+
self.df["A"] % self.df["B"]
277+
278+
279+
class DateInferOps:
280+
# from GH 7332
281+
def setup_cache(self):
282+
N = 5 * 10 ** 5
283+
df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")})
284+
df["timedelta"] = df["datetime64"] - df["datetime64"]
285+
return df
286+
287+
def time_subtract_datetimes(self, df):
288+
df["datetime64"] - df["datetime64"]
289+
290+
def time_timedelta_plus_datetime(self, df):
291+
df["timedelta"] + df["datetime64"]
292+
293+
def time_add_timedeltas(self, df):
294+
df["timedelta"] + df["timedelta"]
295+
296+
184297
class AddOverflowScalar:
185298

186299
params = [1, -1, 0]
@@ -218,4 +331,68 @@ def time_add_overflow_both_arg_nan(self):
218331
)
219332

220333

334+
hcal = pd.tseries.holiday.USFederalHolidayCalendar()
335+
# These offsets currently raise a NotImplimentedError with .apply_index()
336+
non_apply = [
337+
pd.offsets.Day(),
338+
pd.offsets.BYearEnd(),
339+
pd.offsets.BYearBegin(),
340+
pd.offsets.BQuarterEnd(),
341+
pd.offsets.BQuarterBegin(),
342+
pd.offsets.BMonthEnd(),
343+
pd.offsets.BMonthBegin(),
344+
pd.offsets.CustomBusinessDay(),
345+
pd.offsets.CustomBusinessDay(calendar=hcal),
346+
pd.offsets.CustomBusinessMonthBegin(calendar=hcal),
347+
pd.offsets.CustomBusinessMonthEnd(calendar=hcal),
348+
pd.offsets.CustomBusinessMonthEnd(calendar=hcal),
349+
]
350+
other_offsets = [
351+
pd.offsets.YearEnd(),
352+
pd.offsets.YearBegin(),
353+
pd.offsets.QuarterEnd(),
354+
pd.offsets.QuarterBegin(),
355+
pd.offsets.MonthEnd(),
356+
pd.offsets.MonthBegin(),
357+
pd.offsets.DateOffset(months=2, days=2),
358+
pd.offsets.BusinessDay(),
359+
pd.offsets.SemiMonthEnd(),
360+
pd.offsets.SemiMonthBegin(),
361+
]
362+
offsets = non_apply + other_offsets
363+
364+
365+
class OffsetArrayArithmetic:
366+
367+
params = offsets
368+
param_names = ["offset"]
369+
370+
def setup(self, offset):
371+
N = 10000
372+
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
373+
self.rng = rng
374+
self.ser = pd.Series(rng)
375+
376+
def time_add_series_offset(self, offset):
377+
with warnings.catch_warnings(record=True):
378+
self.ser + offset
379+
380+
def time_add_dti_offset(self, offset):
381+
with warnings.catch_warnings(record=True):
382+
self.rng + offset
383+
384+
385+
class ApplyIndex:
386+
params = other_offsets
387+
param_names = ["offset"]
388+
389+
def setup(self, offset):
390+
N = 10000
391+
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
392+
self.rng = rng
393+
394+
def time_apply_index(self, offset):
395+
offset.apply_index(self.rng)
396+
397+
221398
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/categoricals.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,6 @@ def time_existing_series(self):
6363
pd.Categorical(self.series)
6464

6565

66-
class CategoricalOps:
67-
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
68-
param_names = ["op"]
69-
70-
def setup(self, op):
71-
N = 10 ** 5
72-
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
73-
74-
def time_categorical_op(self, op):
75-
getattr(self.cat, op)("b")
76-
77-
7866
class Concat:
7967
def setup(self):
8068
N = 10 ** 5

asv_bench/benchmarks/index_object.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -63,32 +63,6 @@ def time_is_dates_only(self):
6363
self.dr._is_dates_only
6464

6565

66-
class Ops:
67-
68-
params = ["float", "int"]
69-
param_names = ["dtype"]
70-
71-
def setup(self, dtype):
72-
N = 10 ** 6
73-
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
74-
self.index = getattr(tm, indexes[dtype])(N)
75-
76-
def time_add(self, dtype):
77-
self.index + 2
78-
79-
def time_subtract(self, dtype):
80-
self.index - 2
81-
82-
def time_multiply(self, dtype):
83-
self.index * 2
84-
85-
def time_divide(self, dtype):
86-
self.index / 2
87-
88-
def time_modulo(self, dtype):
89-
self.index % 2
90-
91-
9266
class Range:
9367
def setup(self):
9468
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)

asv_bench/benchmarks/inference.py

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,8 @@
11
import numpy as np
22

3-
from pandas import DataFrame, Series, to_numeric
3+
from pandas import Series, to_numeric
44

5-
from .pandas_vb_common import lib, numeric_dtypes, tm
6-
7-
8-
class NumericInferOps:
9-
# from GH 7332
10-
params = numeric_dtypes
11-
param_names = ["dtype"]
12-
13-
def setup(self, dtype):
14-
N = 5 * 10 ** 5
15-
self.df = DataFrame(
16-
{"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)}
17-
)
18-
19-
def time_add(self, dtype):
20-
self.df["A"] + self.df["B"]
21-
22-
def time_subtract(self, dtype):
23-
self.df["A"] - self.df["B"]
24-
25-
def time_multiply(self, dtype):
26-
self.df["A"] * self.df["B"]
27-
28-
def time_divide(self, dtype):
29-
self.df["A"] / self.df["B"]
30-
31-
def time_modulo(self, dtype):
32-
self.df["A"] % self.df["B"]
33-
34-
35-
class DateInferOps:
36-
# from GH 7332
37-
def setup_cache(self):
38-
N = 5 * 10 ** 5
39-
df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")})
40-
df["timedelta"] = df["datetime64"] - df["datetime64"]
41-
return df
42-
43-
def time_subtract_datetimes(self, df):
44-
df["datetime64"] - df["datetime64"]
45-
46-
def time_timedelta_plus_datetime(self, df):
47-
df["timedelta"] + df["datetime64"]
48-
49-
def time_add_timedeltas(self, df):
50-
df["timedelta"] + df["timedelta"]
5+
from .pandas_vb_common import lib, tm
516

527

538
class ToNumeric:

asv_bench/benchmarks/multiindex_object.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,43 @@ def time_equals_non_object_index(self):
160160
self.mi_large_slow.equals(self.idx_non_object)
161161

162162

163+
class SetOperations:
164+
165+
params = [
166+
("monotonic", "non_monotonic"),
167+
("datetime", "int", "string"),
168+
("intersection", "union", "symmetric_difference"),
169+
]
170+
param_names = ["index_structure", "dtype", "method"]
171+
172+
def setup(self, index_structure, dtype, method):
173+
N = 10 ** 5
174+
level1 = range(1000)
175+
176+
level2 = date_range(start="1/1/2000", periods=N // 1000)
177+
dates_left = MultiIndex.from_product([level1, level2])
178+
179+
level2 = range(N // 1000)
180+
int_left = MultiIndex.from_product([level1, level2])
181+
182+
level2 = tm.makeStringIndex(N // 1000).values
183+
str_left = MultiIndex.from_product([level1, level2])
184+
185+
data = {
186+
"datetime": dates_left,
187+
"int": int_left,
188+
"string": str_left,
189+
}
190+
191+
if index_structure == "non_monotonic":
192+
data = {k: mi[::-1] for k, mi in data.items()}
193+
194+
data = {k: {"left": mi, "right": mi[:-1]} for k, mi in data.items()}
195+
self.left = data[dtype]["left"]
196+
self.right = data[dtype]["right"]
197+
198+
def time_operation(self, index_structure, dtype, method):
199+
getattr(self.left, method)(self.right)
200+
201+
163202
from .pandas_vb_common import setup # noqa: F401 isort:skip

0 commit comments

Comments
 (0)