Skip to content

Commit

Permalink
REFACTOR-modin-project#6071: Push first and last down to query compil…
Browse files Browse the repository at this point in the history
…er. (modin-project#64) (modin-project#6125)

Signed-off-by: mvashishtha <mahesh@ponder.io>
  • Loading branch information
mvashishtha authored May 15, 2023
1 parent 632d724 commit ac92fae
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 3 deletions.
38 changes: 38 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4392,6 +4392,44 @@ def dt_year(self):

# End of DateTime methods

def first(self, offset: pandas.DateOffset):
"""
Select initial periods of time series data based on a date offset.
When having a query compiler with dates as index, this function can
select the first few rows based on a date offset.
Parameters
----------
offset : pandas.DateOffset
The offset length of the data to select.
Returns
-------
BaseQueryCompiler
New compiler containing the selected data.
"""
return DataFrameDefault.register(pandas.DataFrame.first)(self, offset)

def last(self, offset: pandas.DateOffset):
"""
Select final periods of time series data based on a date offset.
For a query compiler with a sorted DatetimeIndex, this function
selects the last few rows based on a date offset.
Parameters
----------
offset : pandas.DateOffset
The offset length of the data to select.
Returns
-------
BaseQueryCompiler
New compiler containing the selected data.
"""
return DataFrameDefault.register(pandas.DataFrame.last)(self, offset)

# Resample methods

# FIXME:
Expand Down
10 changes: 7 additions & 3 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

"""Implement DataFrame/Series public API as pandas does."""
from __future__ import annotations
import numpy as np
Expand Down Expand Up @@ -39,6 +38,7 @@
)
from pandas._libs.lib import NoDefault
from pandas._libs.lib import no_default
from pandas._libs.tslibs import to_offset
from pandas._typing import (
IndexKeyFunc,
StorageOptions,
Expand Down Expand Up @@ -1628,7 +1628,9 @@ def first(self, offset): # noqa: PR01, RT01, D200
"""
Select initial periods of time series data based on a date offset.
"""
return self.loc[pandas.Series(index=self.index).first(offset).index]
return self._create_or_update_from_compiler(
self._query_compiler.first(offset=to_offset(offset))
)

def first_valid_index(self): # noqa: RT01, D200
"""
Expand Down Expand Up @@ -1809,7 +1811,9 @@ def last(self, offset): # noqa: PR01, RT01, D200
"""
Select final periods of time series data based on a date offset.
"""
return self.loc[pandas.Series(index=self.index).last(offset).index]
return self._create_or_update_from_compiler(
self._query_compiler.last(offset=to_offset(offset))
)

def last_valid_index(self): # noqa: RT01, D200
"""
Expand Down

0 comments on commit ac92fae

Please sign in to comment.