Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
Add Zulip Reader (#181)
Browse files Browse the repository at this point in the history

Co-authored-by: Barton Rhodes <barton@plurigrid.xyz>
  • Loading branch information
blue-note and bmorphism authored Apr 13, 2023
1 parent 2ab8ee6 commit 9a98b46
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 99 deletions.
103 changes: 4 additions & 99 deletions loader_hub/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
"id": "asana",
"author": "daveey"
},
"AzCognitiveSearchReader": {
"id": "azcognitive_search",
"author": "mrcabellom"
},
"GoogleDocsReader": {
"id": "google_docs",
"author": "jerryjliu"
Expand Down Expand Up @@ -60,10 +56,6 @@
"id": "file/json",
"author": "yisding"
},
"JSONDataReader": {
"id": "jsondata",
"author": "Josh-XT"
},
"MarkdownReader": {
"id": "file/markdown",
"author": "hursh-desai"
Expand All @@ -72,10 +64,6 @@
"id": "file/audio",
"author": "ravi03071991"
},
"GladiaAudioTranscriber": {
"id": "file/audio_gladia",
"author": "ravi03071991"
},
"SimpleCSVReader": {
"id": "file/simple_csv",
"author": "vguillet"
Expand Down Expand Up @@ -388,91 +376,8 @@
"id": "airtable",
"author": "smyja"
},
"HatenaBlogReader": {
"id": "hatena_blog",
"author": "Shoya SHIRAKI",
"keywords": [
"hatena",
"blog"
]
},
"OpendalReader": {
"id": "opendal_reader",
"author": "OpenDAL Contributors",
"keywords": [
"storage"
]
},
"OpendalS3Reader": {
"id": "opendal_reader/s3",
"author": "OpenDAL Contributors",
"keywords": [
"storage",
"s3"
]
},
"OpendalAzblobReader": {
"id": "opendal_reader/azblob",
"author": "OpenDAL Contributors",
"keywords": [
"storage",
"azblob"
]
},
"OpendalGcsReader": {
"id": "opendal_reader/gcs",
"author": "OpenDAL Contributors",
"keywords": [
"storage",
"gcs"
]
},
"ConfluenceReader": {
"id": "confluence",
"author": "zywilliamli"
},
"ChatGPTRetrievalPluginReader": {
"id": "chatgpt_plugin",
"author": "jerryjliu"
},
"JiraReader": {
"id": "jira",
"author": "bearguy",
"keywords": [
"jira"
]
},
"UnstructuredURLLoader": {
"id": "web/unstructured_web",
"author": "kravetsmic",
"keywords": [
"unstructured.io",
"url"
]
},
"GoogleSheetsReader": {
"id": "google_sheets",
"author": "piroz"
},
"FeedlyRssReader": {
"id": "feedly_rss",
"author": "kychanbp",
"keywords": [
"feedly",
"rss"
]
},
"FlatPdfReader": {
"id": "file/flat_pdf",
"author": "emmanuel-oliveira",
"keywords": [
"pdf",
"flat",
"flattened"
]
},
"MilvusReader": {
"id": "milvus",
"author": "filip-halt"
"ZulipReader": {
"id": "zulip",
"author": "plurigrid"
}
}
}
32 changes: 32 additions & 0 deletions loader_hub/zulip/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
## Zulip Loader

The Zulip Loader is a Python script that allows you to load data from Zulip streams using a Zulip bot's API token. It fetches messages from specified streams or all streams if none are specified, and returns a list of documents with the stream content.

### Prerequisites

Create a Zulip bot and obtain its API token. Follow the instructions in the Zulip documentation to create a bot and get the API key (token).

Set the ZULIP_TOKEN environment variable to your Zulip bot's API token:
```bash
export ZULIP_TOKEN="your-zulip-bot-api-token"
```

Use the ZulipReader class to load data from Zulip streams:

```python

from zulip_loader import ZulipReader

# Initialize the ZulipReader with the bot's email and Zulip domain
reader = ZulipReader(zulip_email="your-bot-email@your-zulip-domain.zulipchat.com", zulip_domain="your-zulip-domain.zulipchat.com")

# Load data from all streams
data = reader.load_data(reader.get_all_streams())

# Load data from specific streams
stream_names = ["stream1", "stream2"]
data = reader.load_data(stream_names)
# This will return a list of documents containing the content of the specified streams.
```

For more customization, you can pass the `reverse_chronological` parameter to the load_data() method to indicate the order of messages in the output.
1 change: 1 addition & 0 deletions loader_hub/zulip/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Init file."""
67 changes: 67 additions & 0 deletions loader_hub/zulip/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import logging
from typing import List, Optional
from datetime import datetime
import os
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document

logger = logging.getLogger(__name__)

class ZulipReader(BaseReader):
"""Zulip reader."""

def __init__(
self,
zulip_email: str,
zulip_domain: str,
earliest_date: Optional[datetime] = None,
latest_date: Optional[datetime] = None,
) -> None:
import zulip

"""Initialize with parameters."""
# Read the Zulip token from the environment variable
zulip_token = os.environ.get("ZULIP_TOKEN")

if zulip_token is None:
raise ValueError("ZULIP_TOKEN environment variable not set.")

# Initialize Zulip client with provided parameters
self.client = zulip.Client(api_key=zulip_token, email=zulip_email, site=zulip_domain)

def _read_stream(self, stream_name: str, reverse_chronological: bool) -> str:
"""Read a stream."""
params = {
"narrow": [{"operator": "stream", "operand": stream_name}],
"anchor": "newest",
"num_before": 100,
"num_after": 0,
}
response = self.client.get_messages(params)
messages = response["messages"]
if reverse_chronological:
messages.reverse()
return " ".join([message["content"] for message in messages])

def load_data(
self, streams: List[str], reverse_chronological: bool = True
) -> List[Document]:
"""Load data from the input streams."""
# Load data logic here
data = []
for stream_name in streams:
stream_content = self._read_stream(stream_name, reverse_chronological)
data.append(Document(stream_content, extra_info={"stream": stream_name}))
return data

def get_all_streams(self) -> list:
# Fetch all streams
response = self.client.get_streams()
streams_data = response["streams"]
# Collect the stream IDs
stream_names = [stream['name'] for stream in streams_data]
return stream_names

if __name__ == "__main__":
reader = ZulipReader(zulip_email="ianita-bot@plurigrid.zulipchat.com", zulip_domain="plurigrid.zulipchat.com")
logging.info(reader.load_data(reader.get_all_streams()))
1 change: 1 addition & 0 deletions loader_hub/zulip/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
zulip

0 comments on commit 9a98b46

Please sign in to comment.