From 94c361f04e89fc526b451c4c7444e2db35173265 Mon Sep 17 00:00:00 2001 From: Dmitry Chigarev Date: Mon, 18 Sep 2023 13:44:35 +0000 Subject: [PATCH] apply @anmyachev's suggestion Signed-off-by: Dmitry Chigarev --- .../storage_formats/base/query_compiler.py | 19 +++++++++++++++++++ .../storage_formats/pandas/query_compiler.py | 2 +- modin/pandas/dataframe.py | 15 +++++---------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py index 8e802cdec2e..e1e589d4a04 100644 --- a/modin/core/storage_formats/base/query_compiler.py +++ b/modin/core/storage_formats/base/query_compiler.py @@ -2271,6 +2271,25 @@ def nsmallest(self, n=5, columns=None, keep="first"): self, n=n, columns=columns, keep=keep ) + @doc_utils.add_refer_to("DataFrame.query") + def rowwise_query(self, expr, **kwargs): + """ + Query columns of the QueryCompiler with a boolean expression row-wise. + + Parameters + ---------- + expr : str + **kwargs : dict + + Returns + ------- + BaseQueryCompiler + New QueryCompiler containing the rows where the boolean expression is satisfied. + """ + raise NotImplementedError( + "Row-wise queries execution is not implemented for the selected backend." + ) + @doc_utils.add_refer_to("DataFrame.eval") def eval(self, expr, **kwargs): """ diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 6011b16b742..c025add7135 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -3187,7 +3187,7 @@ def _list_like_func(self, func, axis, *args, **kwargs): ) return self.__constructor__(new_modin_frame) - def _rowwise_query(self, expr, **kwargs): + def rowwise_query(self, expr, **kwargs): """ Query the columns of a ``PandasQueryCompiler`` with a boolean row-wise expression. diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index 710774a3b07..d7edd3ce37f 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -39,7 +39,7 @@ from pandas.io.formats.info import DataFrameInfo from pandas.util._validators import validate_bool_kwarg -from modin.config import PersistentPickle, StorageFormat +from modin.config import PersistentPickle from modin.error_message import ErrorMessage from modin.logging import disable_logging from modin.pandas import Categorical @@ -1608,7 +1608,6 @@ def query(self, expr, inplace=False, **kwargs): # noqa: PR01, RT01, D200 """ self._update_var_dicts_in_kwargs(expr, kwargs) inplace = validate_bool_kwarg(inplace, "inplace") - new_query_compiler = None # HACK: this condition kind of breaks the idea of backend agnostic API as all queries # _should_ work fine for all of the engines using `pandas.DataFrame.query(...)` approach. # However, at this point we know that we can execute simple queries way more efficiently @@ -1617,14 +1616,10 @@ def query(self, expr, inplace=False, **kwargs): # noqa: PR01, RT01, D200 # and fixing the root cause of the perf difference appears to be much more complicated # than putting this hack here. Hopefully, we'll get rid of it soon: # https://github.com/modin-project/modin/issues/6499 - if StorageFormat.get() == "Pandas": - self._validate_eval_query(expr, **kwargs) - try: - new_query_compiler = self._query_compiler._rowwise_query(expr, **kwargs) - except NotImplementedError: - # a non row-wise query was passed, falling back to pandas implementation - pass - if new_query_compiler is None: + try: + new_query_compiler = self._query_compiler.rowwise_query(expr, **kwargs) + except NotImplementedError: + # a non row-wise query was passed, falling back to pandas implementation new_query_compiler = pandas.DataFrame.query( self, expr, inplace=False, **kwargs )._query_compiler