This repository has been archived by the owner on Mar 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 735
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Barton Rhodes <barton@plurigrid.xyz>
- Loading branch information
Showing
5 changed files
with
105 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
## Zulip Loader | ||
|
||
The Zulip Loader is a Python script that allows you to load data from Zulip streams using a Zulip bot's API token. It fetches messages from specified streams or all streams if none are specified, and returns a list of documents with the stream content. | ||
|
||
### Prerequisites | ||
|
||
Create a Zulip bot and obtain its API token. Follow the instructions in the Zulip documentation to create a bot and get the API key (token). | ||
|
||
Set the ZULIP_TOKEN environment variable to your Zulip bot's API token: | ||
```bash | ||
export ZULIP_TOKEN="your-zulip-bot-api-token" | ||
``` | ||
|
||
Use the ZulipReader class to load data from Zulip streams: | ||
|
||
```python | ||
|
||
from zulip_loader import ZulipReader | ||
|
||
# Initialize the ZulipReader with the bot's email and Zulip domain | ||
reader = ZulipReader(zulip_email="your-bot-email@your-zulip-domain.zulipchat.com", zulip_domain="your-zulip-domain.zulipchat.com") | ||
|
||
# Load data from all streams | ||
data = reader.load_data(reader.get_all_streams()) | ||
|
||
# Load data from specific streams | ||
stream_names = ["stream1", "stream2"] | ||
data = reader.load_data(stream_names) | ||
# This will return a list of documents containing the content of the specified streams. | ||
``` | ||
|
||
For more customization, you can pass the `reverse_chronological` parameter to the load_data() method to indicate the order of messages in the output. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Init file.""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import logging | ||
from typing import List, Optional | ||
from datetime import datetime | ||
import os | ||
from llama_index.readers.base import BaseReader | ||
from llama_index.readers.schema.base import Document | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
class ZulipReader(BaseReader): | ||
"""Zulip reader.""" | ||
|
||
def __init__( | ||
self, | ||
zulip_email: str, | ||
zulip_domain: str, | ||
earliest_date: Optional[datetime] = None, | ||
latest_date: Optional[datetime] = None, | ||
) -> None: | ||
import zulip | ||
|
||
"""Initialize with parameters.""" | ||
# Read the Zulip token from the environment variable | ||
zulip_token = os.environ.get("ZULIP_TOKEN") | ||
|
||
if zulip_token is None: | ||
raise ValueError("ZULIP_TOKEN environment variable not set.") | ||
|
||
# Initialize Zulip client with provided parameters | ||
self.client = zulip.Client(api_key=zulip_token, email=zulip_email, site=zulip_domain) | ||
|
||
def _read_stream(self, stream_name: str, reverse_chronological: bool) -> str: | ||
"""Read a stream.""" | ||
params = { | ||
"narrow": [{"operator": "stream", "operand": stream_name}], | ||
"anchor": "newest", | ||
"num_before": 100, | ||
"num_after": 0, | ||
} | ||
response = self.client.get_messages(params) | ||
messages = response["messages"] | ||
if reverse_chronological: | ||
messages.reverse() | ||
return " ".join([message["content"] for message in messages]) | ||
|
||
def load_data( | ||
self, streams: List[str], reverse_chronological: bool = True | ||
) -> List[Document]: | ||
"""Load data from the input streams.""" | ||
# Load data logic here | ||
data = [] | ||
for stream_name in streams: | ||
stream_content = self._read_stream(stream_name, reverse_chronological) | ||
data.append(Document(stream_content, extra_info={"stream": stream_name})) | ||
return data | ||
|
||
def get_all_streams(self) -> list: | ||
# Fetch all streams | ||
response = self.client.get_streams() | ||
streams_data = response["streams"] | ||
# Collect the stream IDs | ||
stream_names = [stream['name'] for stream in streams_data] | ||
return stream_names | ||
|
||
if __name__ == "__main__": | ||
reader = ZulipReader(zulip_email="ianita-bot@plurigrid.zulipchat.com", zulip_domain="plurigrid.zulipchat.com") | ||
logging.info(reader.load_data(reader.get_all_streams())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
zulip |