Skip to content

Commit 54367c9

Browse files
jrebackwesm
authored andcommitted
BUG: ordering in returned data on an index axis if had a selection filter
e.g. had > 61 fields that were specified in a Term (weird but true)
1 parent b33d1a9 commit 54367c9

File tree

2 files changed

+44
-26
lines changed

2 files changed

+44
-26
lines changed

pandas/io/pytables.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -961,8 +961,9 @@ def infer(self, table):
961961

962962
def convert(self, sel):
963963
""" set the values from this selection """
964-
self.values = _maybe_convert(sel.values[self.cname], self.kind)
965-
964+
self.values = Index(_maybe_convert(sel.values[self.cname], self.kind))
965+
self.factor = Categorical.from_array(self.values)
966+
966967
@property
967968
def attrs(self):
968969
return self.table._v_attrs
@@ -1481,6 +1482,22 @@ def create_axes(self, axes, obj, validate = True, min_itemsize = None):
14811482
if validate:
14821483
self.validate(existing_table)
14831484

1485+
def process_axes(self, obj):
1486+
""" process axes filters """
1487+
1488+
def reindex(obj, axis, filt, ordered):
1489+
axis_name = obj._get_axis_name(axis)
1490+
ordd = ordered & filt
1491+
ordd = sorted(ordered.get_indexer(ordd))
1492+
return obj.reindex_axis(ordered.take(ordd), axis = obj._get_axis_number(axis_name), copy = False)
1493+
1494+
# apply the selection filters (but keep in the same order)
1495+
if self.selection.filter:
1496+
for axis, filt in self.selection.filter:
1497+
obj = reindex(obj, axis, filt, getattr(obj,obj._get_axis_name(axis)))
1498+
1499+
return obj
1500+
14841501
def create_description(self, compression = None, complevel = None):
14851502
""" create the description of the table from the axes & values """
14861503

@@ -1556,8 +1573,7 @@ def read(self, where=None):
15561573

15571574
if not self.read_axes(where): return None
15581575

1559-
indicies = [ i.values for i in self.index_axes ]
1560-
factors = [ Categorical.from_array(i) for i in indicies ]
1576+
factors = [ a.factor for a in self.index_axes ]
15611577
levels = [ f.levels for f in factors ]
15621578
N = [ len(f.levels) for f in factors ]
15631579
labels = [ f.labels for f in factors ]
@@ -1597,7 +1613,8 @@ def read(self, where=None):
15971613
'appended')
15981614

15991615
# reconstruct
1600-
long_index = MultiIndex.from_arrays(indicies)
1616+
long_index = MultiIndex.from_arrays([ i.values for i in self.index_axes ])
1617+
16011618

16021619
for c in self.values_axes:
16031620
lp = DataFrame(c.data, index=long_index, columns=c.values)
@@ -1627,12 +1644,8 @@ def read(self, where=None):
16271644
for axis,labels in self.non_index_axes:
16281645
wp = wp.reindex_axis(labels,axis=axis,copy=False)
16291646

1630-
# apply the selection filters (but keep in the same order)
1631-
if self.selection.filter:
1632-
filter_axis_name = wp._get_axis_name(self.non_index_axes[0][0])
1633-
ordered = getattr(wp,filter_axis_name)
1634-
new_axis = sorted(ordered & self.selection.filter)
1635-
wp = wp.reindex(**{ filter_axis_name : new_axis, 'copy' : False })
1647+
# apply the selection filters & axis orderings
1648+
wp = self.process_axes(wp)
16361649

16371650
return wp
16381651

@@ -1792,7 +1805,7 @@ def read(self, where=None):
17921805

17931806
if not self.read_axes(where): return None
17941807

1795-
index = Index(self.index_axes[0].values)
1808+
index = self.index_axes[0].values
17961809
frames = []
17971810
for a in self.values_axes:
17981811
columns = Index(a.values)
@@ -1815,16 +1828,8 @@ def read(self, where=None):
18151828
for axis,labels in self.non_index_axes:
18161829
df = df.reindex_axis(labels,axis=axis,copy=False)
18171830

1818-
# apply the selection filters (but keep in the same order)
1819-
filter_axis_name = df._get_axis_name(self.non_index_axes[0][0])
1820-
1821-
ordered = getattr(df,filter_axis_name)
1822-
if self.selection.filter:
1823-
ordd = ordered & self.selection.filter
1824-
ordd = sorted(ordered.get_indexer(ordd))
1825-
df = df.reindex(**{ filter_axis_name : ordered.take(ordd), 'copy' : False })
1826-
else:
1827-
df = df.reindex(**{ filter_axis_name : ordered , 'copy' : False })
1831+
# apply the selection filters & axis orderings
1832+
df = self.process_axes(df)
18281833

18291834
return df
18301835

@@ -2185,11 +2190,11 @@ def eval(self):
21852190

21862191
# use a filter after reading
21872192
else:
2188-
self.filter = set([ v[1] for v in values ])
2193+
self.filter = (self.field,Index([ v[1] for v in values ]))
21892194

21902195
else:
21912196

2192-
self.filter = set([ v[1] for v in values ])
2197+
self.filter = (self.field,Index([ v[1] for v in values ]))
21932198

21942199
else:
21952200

@@ -2244,10 +2249,10 @@ def __init__(self, table, where=None):
22442249
conds = [ t.condition for t in self.terms if t.condition is not None ]
22452250
if len(conds):
22462251
self.condition = "(%s)" % ' & '.join(conds)
2247-
self.filter = set()
2252+
self.filter = []
22482253
for t in self.terms:
22492254
if t.filter is not None:
2250-
self.filter |= t.filter
2255+
self.filter.append(t.filter)
22512256

22522257
def generate(self, where):
22532258
""" where can be a : dict,list,tuple,string """

pandas/io/tests/test_pytables.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,13 +1080,26 @@ def test_select(self):
10801080
wp = tm.makePanel()
10811081

10821082
# put/select ok
1083+
self.store.remove('wp')
10831084
self.store.put('wp', wp, table=True)
10841085
self.store.select('wp')
10851086

10861087
# non-table ok (where = None)
1088+
self.store.remove('wp')
10871089
self.store.put('wp2', wp, table=False)
10881090
self.store.select('wp2')
10891091

1092+
# selection on the non-indexable with a large number of columns
1093+
wp = Panel(np.random.randn(100, 100, 100), items = [ 'Item%03d' % i for i in xrange(100) ],
1094+
major_axis=date_range('1/1/2000', periods=100), minor_axis = [ 'E%03d' % i for i in xrange(100) ])
1095+
1096+
self.store.remove('wp')
1097+
self.store.append('wp', wp)
1098+
items = [ 'Item%03d' % i for i in xrange(80) ]
1099+
result = self.store.select('wp', Term('items', items))
1100+
expected = wp.reindex(items = items)
1101+
tm.assert_panel_equal(expected, result)
1102+
10901103
# selectin non-table with a where
10911104
#self.assertRaises(Exception, self.store.select,
10921105
# 'wp2', ('column', ['A', 'D']))

0 commit comments

Comments
 (0)