Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: use eval expression parsing as replacement for Term in HDFStore #4155

Closed
wants to merge 48 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
89a03be
ENH: add new computation module and toplevel eval function
cpcloud Jun 16, 2013
bcd17b0
ENH/TST: add new instance testing functions and their tests
cpcloud Jun 16, 2013
81bacd1
BUG: prevent certain index types from joining with DatetimeIndex
cpcloud Jun 16, 2013
e380271
TST/ENH: add 2d bare numpy array and nan support
cpcloud Jun 16, 2013
99a3d28
ENH: add modulus support
cpcloud Jun 17, 2013
4db95fe
TST: add failing modulus tests
cpcloud Jun 17, 2013
6000c89
CLN: use format string for unicode
cpcloud Jun 18, 2013
c25a1d4
CLN: remove engine detection and manip for datetimes
cpcloud Jun 18, 2013
1132bc4
CLN/ENH: add new interface to encapsulate Terms and Constants
cpcloud Jun 20, 2013
54f1897
ENH: allow an already-parsed expression to be passed to eval
cpcloud Jun 20, 2013
e20900a
CLN: add automatic scope creating object
cpcloud Jun 26, 2013
51d80f6
CLN: make the environment an implementation detail
cpcloud Jun 28, 2013
038d79c
DOC: add docstring to eval
cpcloud Jun 28, 2013
599cf32
CLN: cleanup pytables.py a bit
cpcloud Jun 28, 2013
ea769e6
CLN: clean up engines
cpcloud Jun 29, 2013
ff78c08
CLN: clean up eval and have the Scope instance auto create the scope …
cpcloud Jul 4, 2013
f9f7fd7
CLN: add six.string_types checking instead of basestring
cpcloud Jul 4, 2013
48eff13
TST: clean up some tests, add minor assertions where none existed
cpcloud Jul 4, 2013
d87f027
CLN: clean up frame.py a bit
cpcloud Jul 4, 2013
5b58a08
CLN: clean up pytables arguments a bit
cpcloud Jul 4, 2013
7482a27
CLN: use shiny new string mixin to refactor repring
cpcloud Jul 4, 2013
0d40fe1
CLN: move align to its own file
cpcloud Jul 4, 2013
87957d2
CLN: clean up and use new stringmixin for Expr
cpcloud Jul 4, 2013
e35cb5c
ENH/CLN: be more careful about unicode
cpcloud Jul 4, 2013
1ceec39
CLN: run autopep8 on pandas/io/pytables.py
cpcloud Jul 4, 2013
c665a85
DOC: reference future enhancingperf.eval section
cpcloud Jul 4, 2013
cb27934
CLN/DOC: clean up docstrings in pytables
cpcloud Jul 4, 2013
63ba37d
CLN: actually pass fletcher32 in get_store
cpcloud Jul 4, 2013
dcde590
CLN: remove unused variables
cpcloud Jul 4, 2013
3c4e2b3
CLN: more pep8 and get rid of most raise Exception clauses
cpcloud Jul 4, 2013
226c786
CLN: change NameError to match python
cpcloud Jul 4, 2013
79871d8
API: expose the Expr object to top level pandas
cpcloud Jul 5, 2013
84fdb45
CLN/TST: fail with a NotImplementedError on and or not
cpcloud Jul 5, 2013
4d9f9a7
CLN: generlize operator/expression printing
cpcloud Jul 5, 2013
a0d2ce0
CLN: clean up testing and expr
cpcloud Jul 5, 2013
317a153
ENH: add modest type inference
cpcloud Jul 6, 2013
401bc28
ENH: rewrite assignment as equal comparison
cpcloud Jul 6, 2013
22dedcb
ENH: initial commit for adding Expr based terms for pytables support
jreback Jul 6, 2013
441285c
WIP: still some debugging statements in
jreback Jul 7, 2013
05a005f
WIP: conditions working now, filtering still only ok
jreback Jul 7, 2013
22b4a93
TST: more test changes
jreback Jul 7, 2013
ca292c2
BUG: added HDFStore to inherit from Stringmixin
jreback Jul 7, 2013
dfef617
BUG: process visit_Index
jreback Jul 7, 2013
b168fb3
ENH: use non_implemented function call in ExprVisitor
jreback Jul 7, 2013
5fac749
BUG: fixed scoping issues by _ensure_term at the top-level
jreback Jul 7, 2013
c5a3c9f
TST: fixed remaining tests
jreback Jul 7, 2013
71a23a8
BUG: py3 fixes; revise scoping rules to be more broad
jreback Jul 8, 2013
e712762
COMPAT: allow prior 0.12 query syntax for terms, e.g. Term('index','>…
jreback Jul 8, 2013
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
CLN: remove engine detection and manip for datetimes
  • Loading branch information
cpcloud committed Jul 6, 2013
commit c25a1d4b0853578183e75d341aaab051941bdce7
34 changes: 3 additions & 31 deletions pandas/computation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,13 @@

import sys
import numbers
import collections
import itertools

import numpy as np

Scope = collections.namedtuple('Scope', 'globals locals')

import pandas.core.common as com
from pandas.computation.expr import Expr
from pandas.computation.expr import Expr, Scope
from pandas.computation.engines import _engines


def _scope_has_series_and_frame_datetime_index(env):
from pandas import DatetimeIndex
series_index = frame_index = 0

for v in itertools.chain(env.locals.itervalues(),
env.globals.itervalues()):
series_index += com.is_series(v) and isinstance(v.index, DatetimeIndex)
frame_index += com.is_frame(v) and isinstance(v.index, DatetimeIndex)
return series_index, frame_index


def _maybe_convert_engine(env, engine):
assert isinstance(env, Scope), 'environment must be an instance of Scope'
assert isinstance(engine, basestring), 'engine name must be a string'

ret = engine

if all(_scope_has_series_and_frame_datetime_index(env)):
ret = 'python'
return ret


def eval(expr, engine='numexpr', truediv=True, local_dict=None,
global_dict=None):
# make sure we're passed a valid engine
Expand All @@ -44,7 +17,8 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
' {1}'.format(_engines.keys()))

# 1 up in the call stack for locals/globals; see the documentation for the
# inspect module for why you must decrease the refcount of frame
# inspect module for why you must decrease the refcount of frame at all
# costs
frame = sys._getframe(1)

try:
Expand All @@ -54,8 +28,6 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
# shallow copy the scope so we don't overwrite everything
env = Scope(gbl.copy(), lcl.copy())

engine = _maybe_convert_engine(env, engine)

# parse the expression
parsed_expr = Expr(expr, engine, env, truediv)

Expand Down
48 changes: 23 additions & 25 deletions pandas/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import numpy as np
import pandas.core.common as com

try:
import numexpr as ne
Expand Down Expand Up @@ -46,13 +47,10 @@ def set_use_numexpr(v=True):
def set_numexpr_threads(n=None):
# if we are using numexpr, set the threads to n
# otherwise reset
try:
if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
if n is None:
n = ne.detect_number_of_cores()
ne.set_num_threads(n)
except:
pass
if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
if n is None:
n = ne.detect_number_of_cores()
ne.set_num_threads(n)


def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
Expand Down Expand Up @@ -84,7 +82,8 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):

return False

def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):

def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs):
result = None

if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
Expand All @@ -94,15 +93,13 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
a_value = a_value.values
if hasattr(b_value, 'values'):
b_value = b_value.values
result = ne.evaluate('a_value %s b_value' % op_str,
local_dict={ 'a_value' : a_value,
'b_value' : b_value },
result = ne.evaluate('a_value %s b_value' % op_str,
local_dict={'a_value': a_value,
'b_value': b_value},
casting='safe', **eval_kwargs)
except (ValueError), detail:
if 'unknown type object' in str(detail):
pass
except (Exception), detail:
if raise_on_error:
except Exception as detail:
if ('unknown type object' not in com.pprint_thing(detail) and
raise_on_error):
raise

if result is None:
Expand All @@ -128,17 +125,15 @@ def _where_numexpr(cond, a, b, raise_on_error=False):
a_value = a_value.values
if hasattr(b_value, 'values'):
b_value = b_value.values
result = ne.evaluate('where(cond_value,a_value,b_value)',
result = ne.evaluate('where(cond_value, a_value, b_value)',
local_dict={'cond_value': cond_value,
'a_value': a_value,
'b_value': b_value},
casting='safe')
except (ValueError), detail:
if 'unknown type object' in str(detail):
pass
except (Exception), detail:
if raise_on_error:
raise TypeError(str(detail))
except Exception as detail:
if ('unknown type object' not in com.pprint_thing(detail) and
raise_on_error):
raise

if result is None:
result = _where_standard(cond, a, b, raise_on_error)
Expand All @@ -149,7 +144,9 @@ def _where_numexpr(cond, a, b, raise_on_error=False):
# turn myself on
set_use_numexpr(True)

def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs):

def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True,
**eval_kwargs):
""" evaluate and return the expression of the op on a and b

Parameters
Expand All @@ -166,7 +163,8 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kw
"""

if use_numexpr:
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs)
return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
**eval_kwargs)
return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)


Expand Down
35 changes: 31 additions & 4 deletions pandas/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,32 @@ def _update_names(env, mapping):
updater(key, value)


class Term(object):
def __init__(self, value, name, env):
self.value = value
self.name = name
self.env = env
self.type = type(value)

def __iter__(self):
yield self.value
raise StopIteration

def __str__(self):
return '{0}({1!r})'.format(self.__class__.__name__, self.name)

__repr__ = __str__

def update(self, env, value):
_update_name(self.env, self.name, value)
self.value = value


class Constant(Term):
def __init__(self, value, env):
super(Constant, self).__init__(value, value, env)


class Op(object):
"""Hold an operator of unknown arity
"""
Expand Down Expand Up @@ -89,13 +115,14 @@ def name(self):
def _cast(terms, env, dtype):
resolver = partial(_resolve_name, env)
updater = partial(_update_name, env)
dt = np.dtype(dtype)
for term in terms:
t = resolver(term)
try:
new_value = t.astype(dtype)
new_value = t.astype(dt)
except AttributeError:
new_value = dtype.type(t)
updater(term, t)
new_value = dt.type(t)
updater(term, new_value)

class BinOp(Op):
"""Hold a binary operator and its operands
Expand Down Expand Up @@ -160,7 +187,7 @@ def __call__(self, env):
class Mod(BinOp):
def __init__(self, lhs, rhs, env=None):
super(Mod, self).__init__('%', lhs, rhs)
_cast(env, (lhs, rhs), np.float_)
_cast((lhs, rhs), env, np.float_)


_unary_ops_syms = '+', '-', '~'
Expand Down
23 changes: 17 additions & 6 deletions pandas/computation/tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from itertools import product

import nose
from nose.tools import assert_raises, assert_tuple_equal, assert_equal
from nose.tools import assert_true
from nose.tools import assert_raises, assert_tuple_equal
from nose.tools import assert_true, assert_false

from numpy.random import randn, rand
import numpy as np
Expand All @@ -23,8 +23,6 @@
import pandas.computation.expr as expr
from pandas.computation.expressions import _USE_NUMEXPR
from pandas.computation.eval import Scope
from pandas.computation.eval import _scope_has_series_and_frame_datetime_index
from pandas.computation.eval import _maybe_convert_engine
from pandas.util.testing import assert_frame_equal, randbool


Expand Down Expand Up @@ -551,8 +549,6 @@ def check_datetime_index_rows_punts_to_python(engine):
index = getattr(df, 'index')
s = Series(np.random.randn(5), index[:5])
env = Scope(globals(), locals())
assert_true(_scope_has_series_and_frame_datetime_index(env))
assert_equal(_maybe_convert_engine(env, engine), 'python')


def test_datetime_index_rows_punts_to_python():
Expand Down Expand Up @@ -582,6 +578,21 @@ def test_global_scope():
yield check_global_scope, engine


def check_is_expr(engine):
s = 1
valid = 's + 1'
invalid = 's +'
assert_true(expr.isexpr(valid, check_names=True))
assert_false(expr.isexpr(valid, check_names=False))
assert_false(expr.isexpr(invalid, check_names=False))
assert_false(expr.isexpr(invalid, check_names=True))


def test_is_expr():
for engine in _engines:
check_is_expr(engine)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)