Skip to content

BUG: Joining on non-unique PeriodIndex fails #16871

Closed
@Dr-Irv

Description

@Dr-Irv

Code Sample, a copy-pastable example if possible

import pandas as pd
perindex = pd.period_range('2016-01-01', periods=16, freq='M')
perdf = pd.DataFrame([i for i in range(len(perindex))],
                     index=perindex, columns=['pnum'])
df2 = pd.concat([perdf, perdf])
perdf.merge(df2, left_index=True, right_index=True, how='outer')

Problem description

I reported this in #GH16541 but I guess it fell through the cracks. Here is the stack trace:

TypeError                                 Traceback (most recent call last)
<ipython-input-2-c7c6bdf18c3f> in <module>()
      3                      index=perindex, columns=['pnum'])
      4 df2 = pd.concat([perdf, perdf])
----> 5 perdf.merge(df2, left_index=True, right_index=True, how='outer')

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
   4720                      right_on=right_on, left_index=left_index,
   4721                      right_index=right_index, sort=sort, suffixes=suffixes,
-> 4722                      copy=copy, indicator=indicator)
   4723 
   4724     def round(self, decimals=0, *args, **kwargs):

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
     52                          right_index=right_index, sort=sort, suffixes=suffixes,
     53                          copy=copy, indicator=indicator)
---> 54     return op.get_result()
     55 
     56 

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in get_result(self)
    567                 self.left, self.right)
    568 
--> 569         join_index, left_indexer, right_indexer = self._get_join_info()
    570 
    571         ldata, rdata = self.left._data, self.right._data

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _get_join_info(self)
    720             join_index, left_indexer, right_indexer = \
    721                 left_ax.join(right_ax, how=self.how, return_indexers=True,
--> 722                              sort=self.sort)
    723         elif self.right_index and self.how == 'left':
    724             join_index, left_indexer, right_indexer = \

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\period.py in join(self, other, how, level, return_indexers, sort)
    929         result = Int64Index.join(self, other, how=how, level=level,
    930                                  return_indexers=return_indexers,
--> 931                                  sort=sort)
    932 
    933         if return_indexers:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\base.py in join(self, other, how, level, return_indexers, sort)
   3044             else:
   3045                 return self._join_non_unique(other, how=how,
-> 3046                                              return_indexers=return_indexers)
   3047         elif self.is_monotonic and other.is_monotonic:
   3048             try:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\base.py in _join_non_unique(self, other, how, return_indexers)
   3125         left_idx, right_idx = _get_join_indexers([self.values],
   3126                                                  [other._values], how=how,
-> 3127                                                  sort=True)
   3128 
   3129         left_idx = _ensure_platform_int(left_idx)

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
    980 
    981     # get left & right join labels and num. of levels at each location
--> 982     llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
    983 
    984     # get flat i8 keys from label lists

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _factorize_keys(lk, rk, sort)
   1410     if sort:
   1411         uniques = rizer.uniques.to_array()
-> 1412         llab, rlab = _sort_labels(uniques, llab, rlab)
   1413 
   1414     # NA group

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _sort_labels(uniques, left, right)
   1436     labels = np.concatenate([left, right])
   1437 
-> 1438     _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
   1439     new_labels = _ensure_int64(new_labels)
   1440     new_left, new_right = new_labels[:l], new_labels[l:]

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\algorithms.py in safe_sort(values, labels, na_sentinel, assume_unique)
    481     if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer':
    482         # unorderable in py3 if mixed str/int
--> 483         ordered = sort_mixed(values)
    484     else:
    485         try:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\algorithms.py in sort_mixed(values)
    474         str_pos = np.array([isinstance(x, string_types) for x in values],
    475                            dtype=bool)
--> 476         nums = np.sort(values[~str_pos])
    477         strs = np.sort(values[str_pos])
    478         return _ensure_object(np.concatenate([nums, strs]))

C:\Anaconda3\envs\py36\lib\site-packages\numpy\core\fromnumeric.py in sort(a, axis, kind, order)
    820     else:
    821         a = asanyarray(a).copy(order="K")
--> 822     a.sort(axis=axis, kind=kind, order=order)
    823     return a
    824 

pandas/_libs/period.pyx in pandas._libs.period._Period.__richcmp__ (pandas\_libs\period.c:12067)()

TypeError: Cannot compare type 'Period' with type 'int'

Expected Output

Shouldn't get a stack trace!

Output of pd.show_versions()

INSTALLED VERSIONS ------------------ commit: None python: 3.6.1.final.0 python-bits: 64 OS: Windows OS-release: 10 machine: AMD64 processor: Intel64 Family 6 Model 60 Stepping 3, GenuineIntel byteorder: little LC_ALL: None LANG: None LOCALE: None.None

pandas: 0.20.3
pytest: None
pip: 9.0.1
setuptools: 27.2.0
Cython: None
numpy: 1.12.1
scipy: 0.19.0
xarray: None
IPython: 6.0.0
sphinx: None
patsy: 0.4.1
dateutil: 2.6.0
pytz: 2017.2
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 2.0.0
openpyxl: None
xlrd: 1.0.0
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 0.999
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
pandas_gbq: None
pandas_datareader: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugPeriodPeriod data typeReshapingConcat, Merge/Join, Stack/Unstack, Explode

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions