Skip to content

Commit

Permalink
apply @anmyachev's suggestion
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Sep 18, 2023
1 parent 05e43ec commit 94c361f
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
19 changes: 19 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2271,6 +2271,25 @@ def nsmallest(self, n=5, columns=None, keep="first"):
self, n=n, columns=columns, keep=keep
)

@doc_utils.add_refer_to("DataFrame.query")
def rowwise_query(self, expr, **kwargs):
"""
Query columns of the QueryCompiler with a boolean expression row-wise.
Parameters
----------
expr : str
**kwargs : dict
Returns
-------
BaseQueryCompiler
New QueryCompiler containing the rows where the boolean expression is satisfied.
"""
raise NotImplementedError(
"Row-wise queries execution is not implemented for the selected backend."
)

@doc_utils.add_refer_to("DataFrame.eval")
def eval(self, expr, **kwargs):
"""
Expand Down
2 changes: 1 addition & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3187,7 +3187,7 @@ def _list_like_func(self, func, axis, *args, **kwargs):
)
return self.__constructor__(new_modin_frame)

def _rowwise_query(self, expr, **kwargs):
def rowwise_query(self, expr, **kwargs):
"""
Query the columns of a ``PandasQueryCompiler`` with a boolean row-wise expression.
Expand Down
15 changes: 5 additions & 10 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pandas.io.formats.info import DataFrameInfo
from pandas.util._validators import validate_bool_kwarg

from modin.config import PersistentPickle, StorageFormat
from modin.config import PersistentPickle
from modin.error_message import ErrorMessage
from modin.logging import disable_logging
from modin.pandas import Categorical
Expand Down Expand Up @@ -1608,7 +1608,6 @@ def query(self, expr, inplace=False, **kwargs): # noqa: PR01, RT01, D200
"""
self._update_var_dicts_in_kwargs(expr, kwargs)
inplace = validate_bool_kwarg(inplace, "inplace")
new_query_compiler = None
# HACK: this condition kind of breaks the idea of backend agnostic API as all queries
# _should_ work fine for all of the engines using `pandas.DataFrame.query(...)` approach.
# However, at this point we know that we can execute simple queries way more efficiently
Expand All @@ -1617,14 +1616,10 @@ def query(self, expr, inplace=False, **kwargs): # noqa: PR01, RT01, D200
# and fixing the root cause of the perf difference appears to be much more complicated
# than putting this hack here. Hopefully, we'll get rid of it soon:
# https://github.com/modin-project/modin/issues/6499
if StorageFormat.get() == "Pandas":
self._validate_eval_query(expr, **kwargs)
try:
new_query_compiler = self._query_compiler._rowwise_query(expr, **kwargs)
except NotImplementedError:
# a non row-wise query was passed, falling back to pandas implementation
pass
if new_query_compiler is None:
try:
new_query_compiler = self._query_compiler.rowwise_query(expr, **kwargs)
except NotImplementedError:
# a non row-wise query was passed, falling back to pandas implementation
new_query_compiler = pandas.DataFrame.query(
self, expr, inplace=False, **kwargs
)._query_compiler
Expand Down

0 comments on commit 94c361f

Please sign in to comment.