Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Add Zulip Reader #181

Merged
merged 10 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 4 additions & 99 deletions loader_hub/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
"id": "asana",
"author": "daveey"
},
"AzCognitiveSearchReader": {
"id": "azcognitive_search",
"author": "mrcabellom"
},
"GoogleDocsReader": {
"id": "google_docs",
"author": "jerryjliu"
Expand Down Expand Up @@ -60,10 +56,6 @@
"id": "file/json",
"author": "yisding"
},
"JSONDataReader": {
"id": "jsondata",
"author": "Josh-XT"
},
"MarkdownReader": {
"id": "file/markdown",
"author": "hursh-desai"
Expand All @@ -72,10 +64,6 @@
"id": "file/audio",
"author": "ravi03071991"
},
"GladiaAudioTranscriber": {
"id": "file/audio_gladia",
"author": "ravi03071991"
},
"SimpleCSVReader": {
"id": "file/simple_csv",
"author": "vguillet"
Expand Down Expand Up @@ -384,91 +372,8 @@
"id": "airtable",
"author": "smyja"
},
"HatenaBlogReader": {
"id": "hatena_blog",
"author": "Shoya SHIRAKI",
"keywords": [
"hatena",
"blog"
]
},
"OpendalReader": {
"id": "opendal_reader",
"author": "OpenDAL Contributors",
"keywords": [
"storage"
]
},
"OpendalS3Reader": {
"id": "opendal_reader/s3",
"author": "OpenDAL Contributors",
"keywords": [
"storage",
"s3"
]
},
"OpendalAzblobReader": {
"id": "opendal_reader/azblob",
"author": "OpenDAL Contributors",
"keywords": [
"storage",
"azblob"
]
},
"OpendalGcsReader": {
"id": "opendal_reader/gcs",
"author": "OpenDAL Contributors",
"keywords": [
"storage",
"gcs"
]
},
"ConfluenceReader": {
"id": "confluence",
"author": "zywilliamli"
},
"ChatGPTRetrievalPluginReader": {
"id": "chatgpt_plugin",
"author": "jerryjliu"
},
"JiraReader": {
"id": "jira",
"author": "bearguy",
"keywords": [
"jira"
]
},
"UnstructuredURLLoader": {
"id": "web/unstructured_web",
"author": "kravetsmic",
"keywords": [
"unstructured.io",
"url"
]
},
"GoogleSheetsReader": {
"id": "google_sheets",
"author": "piroz"
},
"FeedlyRssReader": {
"id": "feedly_rss",
"author": "kychanbp",
"keywords": [
"feedly",
"rss"
]
},
"FlatPdfReader": {
"id": "file/flat_pdf",
"author": "emmanuel-oliveira",
"keywords": [
"pdf",
"flat",
"flattened"
]
},
"MilvusReader": {
"id": "milvus",
"author": "filip-halt"
"ZulipReader": {
"id": "zulip",
"author": "plurigrid"
}
}
}
32 changes: 32 additions & 0 deletions loader_hub/zulip/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
## Zulip Loader

The Zulip Loader is a Python script that allows you to load data from Zulip streams using a Zulip bot's API token. It fetches messages from specified streams or all streams if none are specified, and returns a list of documents with the stream content.

### Prerequisites

Create a Zulip bot and obtain its API token. Follow the instructions in the Zulip documentation to create a bot and get the API key (token).

Set the ZULIP_TOKEN environment variable to your Zulip bot's API token:
```bash
export ZULIP_TOKEN="your-zulip-bot-api-token"
```

Use the ZulipReader class to load data from Zulip streams:

```python

from zulip_loader import ZulipReader

# Initialize the ZulipReader with the bot's email and Zulip domain
reader = ZulipReader(zulip_email="your-bot-email@your-zulip-domain.zulipchat.com", zulip_domain="your-zulip-domain.zulipchat.com")

# Load data from all streams
data = reader.load_data(reader.get_all_streams())

# Load data from specific streams
stream_names = ["stream1", "stream2"]
data = reader.load_data(stream_names)
# This will return a list of documents containing the content of the specified streams.
```

For more customization, you can pass the `reverse_chronological` parameter to the load_data() method to indicate the order of messages in the output.
1 change: 1 addition & 0 deletions loader_hub/zulip/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Init file."""
67 changes: 67 additions & 0 deletions loader_hub/zulip/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import logging
from typing import List, Optional
from datetime import datetime
import os
from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document

logger = logging.getLogger(__name__)

class ZulipReader(BaseReader):
"""Zulip reader."""

def __init__(
self,
zulip_email: str,
zulip_domain: str,
earliest_date: Optional[datetime] = None,
latest_date: Optional[datetime] = None,
) -> None:
import zulip

"""Initialize with parameters."""
# Read the Zulip token from the environment variable
zulip_token = os.environ.get("ZULIP_TOKEN")

if zulip_token is None:
raise ValueError("ZULIP_TOKEN environment variable not set.")

# Initialize Zulip client with provided parameters
self.client = zulip.Client(api_key=zulip_token, email=zulip_email, site=zulip_domain)

def _read_stream(self, stream_name: str, reverse_chronological: bool) -> str:
"""Read a stream."""
params = {
"narrow": [{"operator": "stream", "operand": stream_name}],
"anchor": "newest",
"num_before": 100,
"num_after": 0,
}
response = self.client.get_messages(params)
messages = response["messages"]
if reverse_chronological:
messages.reverse()
return " ".join([message["content"] for message in messages])

def load_data(
self, streams: List[str], reverse_chronological: bool = True
) -> List[Document]:
"""Load data from the input streams."""
# Load data logic here
data = []
for stream_name in streams:
stream_content = self._read_stream(stream_name, reverse_chronological)
data.append(Document(stream_content, extra_info={"stream": stream_name}))
return data

def get_all_streams(self) -> list:
# Fetch all streams
response = self.client.get_streams()
streams_data = response["streams"]
# Collect the stream IDs
stream_names = [stream['name'] for stream in streams_data]
return stream_names

if __name__ == "__main__":
reader = ZulipReader(zulip_email="ianita-bot@plurigrid.zulipchat.com", zulip_domain="plurigrid.zulipchat.com")
logging.info(reader.load_data(reader.get_all_streams()))
1 change: 1 addition & 0 deletions loader_hub/zulip/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
zulip