Skip to content

Commit

Permalink
Loader for Macrometa GDN (run-llama#484)
Browse files Browse the repository at this point in the history
Loader for Macrometa GDN
  • Loading branch information
dain-macrometa authored Sep 29, 2023
1 parent 43cf9c3 commit a207c69
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## Unreleased

### New Features
- Loader for Macrometa GDN (#484)

### Smaller Features + Bug Fixes
- fix: PyMuPDF Reader broken (#547)
- Add page id to extra_info (#542)
Expand Down
10 changes: 6 additions & 4 deletions llama_hub/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -919,11 +919,13 @@
"ZepReader": {
"id": "zep",
"author": "zep",
"keywords": ["zep", "retriever", "memory", "storage"]
},
"MacrometaGDNReader": {
"id": "macrometa_gdn",
"author": "Dain Im",
"keywords": [
"zep",
"retriever",
"memory",
"storage"
"macrometa"
]
},
"BagelReader": {
Expand Down
17 changes: 17 additions & 0 deletions llama_hub/macrometa_gdn/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Macrometa GDN Loader

This loader takes in a Macrometa federation URL, API key, and collection name and returns a list of vectors.

## Usage

To use this loader, you need to pass the URL and API key through the class contructor, and then load the data using an array of collection names.

```python
from llama_index import download_loader

MacrometaGDNReader = download_loader('MacrometaGDNReader')

collections = ['test_collection']
loader = MacrometaGDNReader(url="https://api-macrometa.io",apikey="test")
vectors= loader.load_data(collection_list=collections)
```
1 change: 1 addition & 0 deletions llama_hub/macrometa_gdn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Init file."""
90 changes: 90 additions & 0 deletions llama_hub/macrometa_gdn/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Macrometa GDN Reader."""

from typing import List
import requests
import json

from llama_index.readers.base import BaseReader
from llama_index.readers.schema.base import Document


class MacrometaGDNReader(BaseReader):
"""Macrometa GDN Reader.
Reads vectors from Macrometa GDN
"""

def __init__(self, url: str, apikey: str):
self.url = url
self.apikey = apikey

def load_data(self, collection_list: List[str]) -> List[Document]:
"""Loads data from the input directory.
Args:
api: Macrometa GDN API key
collection_name: Name of the collection to read from
"""
if collection_list is None:
raise ValueError("Must specify collection name(s)")

results = []
for collection_name in collection_list:
collection = self._load_collection(collection_name)
results.append(
Document(
text=collection, extra_info={"collection_name": collection_name}
)
)
return results

def _load_collection(self, collection_name: str) -> str:
all_documents = []
"""Loads a collection from the database.
Args:
collection_name: Name of the collection to read from
"""
url = self.url + "/_fabric/_system/_api/cursor"
headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": "apikey " + self.apikey,
}

data = {
"batchSize": 1000,
"ttl": 60,
"query": "FOR doc IN " + collection_name + " RETURN doc",
}
response = requests.post(url, headers=headers, data=json.dumps(data))
response_json = response.json()
if response.status_code == 201:
all_documents.extend(response_json.get("result", []))

while response_json.get("hasMore"):
cursor_id = response_json.get("id")

next_url = self.url + "/_fabric/_system/_api/cursor/" + cursor_id

response = requests.put(next_url, headers=headers)

if response.status_code == 200:
response_json = response.json()
all_documents.extend(response_json.get("result", []))
else:
print(f"Request failed with status code {response.status_code}")
break
else:
print(f"Initial request failed with status code {response.status_code}")

return str(all_documents)


if __name__ == "__main__":
reader = MacrometaGDNReader("https://api-anurag.eng.macrometa.io", "test")
print(reader.load_data(collection_list=["test"]))
2 changes: 2 additions & 0 deletions llama_hub/macrometa_gdn/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests
json

0 comments on commit a207c69

Please sign in to comment.