Closed
Description
from pandas.api.indexers import BaseIndexer
def calculate_variable_window_bounds(left, right, index):
num_values = len(index)
assert len(left) == len(right) == len(index)
start = np.empty(num_values, dtype='int64')
start.fill(-1)
end = np.empty(num_values, dtype='int64')
end.fill(-1)
# initial conditions
if index[0] > left[0]:
start[0] = 0
if index[0] <= right[0]:
end[0] = 1
#import pdb; pdb.set_trace()
for i in range(1, num_values):
value = index[i]
# advance the start bound until we are
# within the constraint
start[i] = start[i - 1]
for j in range(start[i - 1], i):
# if we are no longer in the right bounds
if value > right[j]:
start[i] = j
#break
elif value < left[j]:
start[i] = j
else:
break
# end bound is previous end
# or current index
if (index[end[i - 1]] - right[i]) <= 0:
end[i] = i + 1
else:
end[i] = end[i - 1]
print(start, end)
return start, end
class DatetimeIndexer(BaseIndexer):
def get_window_bounds(self, num_values, min_periods, center, closed):
# starts, ends, points are all DTI
starts = np.asarray(self.starts.view('i8'))
ends = np.asarray(self.ends.view('i8'))
points = np.asarray(self.points.view('i8'))
return calculate_variable_window_bounds(starts, ends, points)
Input frame
tweets_str = """
ticker,datetime,sentiment
GOOG,2020-05-27 15:00,0.6
GOOG,2020-05-28 11:00,0.5
IBM,2020-05-28 12:00,-0.1
GOOG,2020-05-28 13:00,0.2
GOOG,2020-05-28 20:00,0.3
GOOG,2020-05-29 07:00,-0.1
IBM,2020-05-29 09:00,-0.3
IBM,2020-05-29 12:00,-0.4
GOOG,2020-05-30 07:00,-0.2
GOOG,2020-05-30 08:00,-0.5
GOOG,2020-05-30 10:00,0.1
GOOG,2020-05-30 14:00,0.3
GOOG,2020-05-31 07:00,-0.1
GOOG,2020-06-01 08:00,0.2
GOOG,2020-06-01 10:00,0.4
"""
tweets = pd.read_csv(StringIO(dedent(tweets_str)), parse_dates=["datetime"])
Call it like this
bd = 1 * pd.tseries.offsets.BusinessDay()
starts = tweets.datetime -1 * bd
ends = tweets.datetime -0 * bd
tweets.rolling(window=DatetimeIndexer(starts=starts, ends=ends, points=tweets.datetime)).sentiment.mean()
Metadata
Metadata
Assignees
Labels
No labels