Skip to content

Commit

Permalink
[SlackReader] fix: add date filtering when paginating through channel (
Browse files Browse the repository at this point in the history
…run-llama#919)


Co-authored-by: Jerry Liu <jerryjliu98@gmail.com>
  • Loading branch information
jean-malo and jerryjliu authored Apr 5, 2023
1 parent cb27f61 commit 47dc5e5
Showing 1 changed file with 31 additions and 13 deletions.
44 changes: 31 additions & 13 deletions gpt_index/readers/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ def __init__(
"Must specify `earliest_date` if `latest_date` is specified."
)
if earliest_date is not None:
self.earliest_date_timestamp = earliest_date.timestamp()
if latest_date is not None:
self.latest_date_timestamp = latest_date.timestamp()
else:
self.latest_date_timestamp = datetime.now().timestamp()
self.earliest_date_timestamp: Optional[float] = earliest_date.timestamp()
else:
self.earliest_date_timestamp = None
if latest_date is not None:
self.latest_date_timestamp = latest_date.timestamp()
else:
self.latest_date_timestamp = datetime.now().timestamp()
res = self.client.api_test()
if not res["ok"]:
raise ValueError(f"Error initializing Slack API: {res['error']}")
Expand All @@ -82,12 +84,18 @@ def _read_message(self, channel_id: str, message_ts: str) -> str:
channel=channel_id, ts=message_ts, cursor=next_cursor
)
else:
conversations_replies_kwargs = {
"channel": channel_id,
"ts": message_ts,
"cursor": next_cursor,
"latest": str(self.latest_date_timestamp),
}
if self.earliest_date_timestamp is not None:
conversations_replies_kwargs["oldest"] = str(
self.earliest_date_timestamp
)
result = self.client.conversations_replies(
channel=channel_id,
ts=message_ts,
cursor=next_cursor,
oldest=str(self.earliest_date_timestamp),
latest=str(self.latest_date_timestamp),
**conversations_replies_kwargs # type: ignore
)
messages = result["messages"]
messages_text.extend(message["text"] for message in messages)
Expand Down Expand Up @@ -121,14 +129,24 @@ def _read_channel(self, channel_id: str, reverse_chronological: bool) -> str:
# conversations.history returns the first 100 messages by default
# These results are paginated,
# see: https://api.slack.com/methods/conversations.history$pagination
conversations_history_kwargs = {
"channel": channel_id,
"cursor": next_cursor,
"latest": str(self.latest_date_timestamp),
}
if self.earliest_date_timestamp is not None:
conversations_history_kwargs["oldest"] = str(
self.earliest_date_timestamp
)
result = self.client.conversations_history(
channel=channel_id,
cursor=next_cursor,
**conversations_history_kwargs # type: ignore
)
conversation_history = result["messages"]
# Print results
logger.info(
"{} messages found in {}".format(len(conversation_history), id)
"{} messages found in {}".format(
len(conversation_history), channel_id
)
)
result_messages.extend(
self._read_message(channel_id, message["ts"])
Expand Down

0 comments on commit 47dc5e5

Please sign in to comment.