Significant-Gravitas · BillSchumacher · Apr 16, 2023 · Apr 7, 2023 · Apr 7, 2023 · Apr 8, 2023
@@ -77,6 +77,27 @@ REDIS_PASSWORD=
 WIPE_REDIS_ON_START=False
 MEMORY_INDEX=auto-gpt
 
+### WEAVIATE
+# MEMORY_BACKEND - Use 'weaviate' to use Weaviate vector storage
+# WEAVIATE_HOST - Weaviate host IP
+# WEAVIATE_PORT - Weaviate host port
+# WEAVIATE_PROTOCOL - Weaviate host protocol (e.g. 'http')
+# USE_WEAVIATE_EMBEDDED - Whether to use Embedded Weaviate
+# WEAVIATE_EMBEDDED_PATH - File system path were to persist data when running Embedded Weaviate
+# WEAVIATE_USERNAME - Weaviate username
+# WEAVIATE_PASSWORD - Weaviate password
+# WEAVIATE_API_KEY - Weaviate API key if using API-key-based authentication
+# MEMORY_INDEX - Name of index to create in Weaviate
+WEAVIATE_HOST="127.0.0.1"
+WEAVIATE_PORT=8080
+WEAVIATE_PROTOCOL="http"
+USE_WEAVIATE_EMBEDDED=False
+WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate"
+WEAVIATE_USERNAME=
+WEAVIATE_PASSWORD=
+WEAVIATE_API_KEY=
+MEMORY_INDEX=AutoGpt
+
 ### MILVUS
 # MILVUS_ADDR - Milvus remote address (e.g. localhost:19530)
 # MILVUS_COLLECTION - Milvus collection, 

@@ -301,6 +301,28 @@ export PINECONE_ENV="Your pinecone region" # something like: us-east4-gcp
 export MEMORY_BACKEND="pinecone"
 ```
 
+## Weaviate Setup
+
+[Weaviate](https://weaviate.io/) is an open-source vector database. It allows to store data objects and vector embeddings from ML-models and scales seamlessly to billion of data objects. [An instance of Weaviate can be created locally (using Docker), on Kubernetes or using Weaviate Cloud Services](https://weaviate.io/developers/weaviate/quickstart). 
+Although still experimental, [Embedded Weaviate](https://weaviate.io/developers/weaviate/installation/embedded) is supported which allows the Auto-GPT process itself to start a Weaviate instance. To enable it, set `USE_WEAVIATE_EMBEDDED` to `True` and make sure you `pip install "weaviate-client>=3.15.4"`. 
+
+#### Setting up environment variables
+
+In your `.env` file set the following:
+
+```
+MEMORY_BACKEND=weaviate
+WEAVIATE_HOST="127.0.0.1" # the IP or domain of the running Weaviate instance
+WEAVIATE_PORT="8080" 
+WEAVIATE_PROTOCOL="http"
+WEAVIATE_USERNAME="your username"
+WEAVIATE_PASSWORD="your password"
+WEAVIATE_API_KEY="your weaviate API key if you have one"
+WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate" # this is optional and indicates where the data should be persisted when running an embedded instance
+USE_WEAVIATE_EMBEDDED=False # set to True to run Embedded Weaviate
+MEMORY_INDEX="Autogpt" # name of the index to create for the application
+```
+
 ## Setting Your Cache Type
 
 By default Auto-GPT is going to use LocalCache instead of redis or Pinecone.

@@ -62,6 +62,16 @@ def __init__(self) -> None:
         self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
         self.pinecone_region = os.getenv("PINECONE_ENV")
 
+        self.weaviate_host  = os.getenv("WEAVIATE_HOST")
+        self.weaviate_port = os.getenv("WEAVIATE_PORT")
+        self.weaviate_protocol = os.getenv("WEAVIATE_PROTOCOL", "http")
+        self.weaviate_username = os.getenv("WEAVIATE_USERNAME", None)
+        self.weaviate_password = os.getenv("WEAVIATE_PASSWORD", None)
+        self.weaviate_scopes = os.getenv("WEAVIATE_SCOPES", None)
+        self.weaviate_embedded_path = os.getenv("WEAVIATE_EMBEDDED_PATH")
+        self.weaviate_api_key = os.getenv("WEAVIATE_API_KEY", None)
+        self.use_weaviate_embedded = os.getenv("USE_WEAVIATE_EMBEDDED", "False") == "True"
+
         # milvus configuration, e.g., localhost:19530.
         self.milvus_addr = os.getenv("MILVUS_ADDR", "localhost:19530")
         self.milvus_collection = os.getenv("MILVUS_COLLECTION", "autogpt")

@@ -21,6 +21,12 @@
     print("Pinecone not installed. Skipping import.")
     PineconeMemory = None
 
+try:
+    from memory.weaviate import WeaviateMemory
+except ImportError:
+    print("Weaviate not installed. Skipping import.")
+    WeaviateMemory = None
+
 try:
     from memory.milvus import MilvusMemory
 except ImportError:
@@ -48,6 +54,13 @@ def get_memory(cfg, init=False):
             )
         else:
             memory = RedisMemory(cfg)
+    elif cfg.memory_backend == "weaviate":
+        if not WeaviateMemory:
+            print("Error: Weaviate is not installed. Please install weaviate-client to"
+                  " use Weaviate as a memory backend.")
+        else:
+            memory = WeaviateMemory(cfg)
+
     elif cfg.memory_backend == "no_memory":
         memory = NoMemory(cfg)
     elif cfg.memory_backend == "milvus":
@@ -68,4 +81,4 @@ def get_supported_memory_backends():
     return supported_memory
 
 
-__all__ = ["get_memory", "LocalCache", "RedisMemory", "PineconeMemory", "NoMemory", "MilvusMemory"]
+__all__ = ["get_memory", "LocalCache", "RedisMemory", "PineconeMemory", "WeaviateMemory", "MilvusMemory", "NoMemory"]
@@ -0,0 +1,110 @@
+from autogpt.config import Config
+from autogpt.memory.base import MemoryProviderSingleton, get_ada_embedding
+import uuid
+import weaviate
+from weaviate import Client
+from weaviate.embedded import EmbeddedOptions
+from weaviate.util import generate_uuid5
+
+
+def default_schema(weaviate_index):
+    return {
+        "class": weaviate_index,
+        "properties": [
+            {
+                "name": "raw_text",
+                "dataType": ["text"],
+                "description": "original text for the embedding"
+            }
+        ],
+    }
+
+
+class WeaviateMemory(MemoryProviderSingleton):
+    def __init__(self, cfg):
+        auth_credentials = self._build_auth_credentials(cfg)
+
+        url = f'{cfg.weaviate_protocol}://{cfg.weaviate_host}:{cfg.weaviate_port}'
+
+        if cfg.use_weaviate_embedded:
+            self.client = Client(embedded_options=EmbeddedOptions(
+                hostname=cfg.weaviate_host,
+                port=int(cfg.weaviate_port),
+                persistence_data_path=cfg.weaviate_embedded_path
+            ))
+
+            print(f"Weaviate Embedded running on: {url} with persistence path: {cfg.weaviate_embedded_path}")
+        else:
+            self.client = Client(url, auth_client_secret=auth_credentials)
+
+        self.index = cfg.memory_index
+        self._create_schema()
+
+    def _create_schema(self):
+        schema = default_schema(self.index)
+        if not self.client.schema.contains(schema):
+            self.client.schema.create_class(schema)
+
+    def _build_auth_credentials(self, cfg):
+        if cfg.weaviate_username and cfg.weaviate_password:
+            return weaviate_auth.AuthClientPassword(cfg.weaviate_username, cfg.weaviate_password)
+        if cfg.weaviate_api_key:
+            return weaviate.auth.AuthApiKey(api_key=cfg.weaviate_api_key)
+        else:
+            return None
+
+    def add(self, data):
+        vector = get_ada_embedding(data)
+
+        doc_uuid = generate_uuid5(data, self.index)
+        data_object = {
+            'raw_text': data
+        }
+
+        with self.client.batch as batch:
+            batch.add_data_object(
+                uuid=doc_uuid,
+                data_object=data_object,
+                class_name=self.index,
+                vector=vector
+            )
+
+        return f"Inserting data into memory at uuid: {doc_uuid}:\n data: {data}"
+
+    def get(self, data):
+        return self.get_relevant(data, 1)
+
+    def clear(self):
+        self.client.schema.delete_all()
+
+        # weaviate does not yet have a neat way to just remove the items in an index
+        # without removing the entire schema, therefore we need to re-create it
+        # after a call to delete_all
+        self._create_schema()
+
+        return 'Obliterated'
+
+    def get_relevant(self, data, num_relevant=5):
+        query_embedding = get_ada_embedding(data)
+        try:
+            results = self.client.query.get(self.index, ['raw_text']) \
+                          .with_near_vector({'vector': query_embedding, 'certainty': 0.7}) \
+                          .with_limit(num_relevant)  \
+                          .do()
+
+            if len(results['data']['Get'][self.index]) > 0:
+                return [str(item['raw_text']) for item in results['data']['Get'][self.index]]
+            else:
+                return []
+
+        except Exception as err:
+            print(f'Unexpected error {err=}, {type(err)=}')
+            return []
+
+    def get_stats(self):
+        result = self.client.query.aggregate(self.index) \
+                     .with_meta_count() \
+                     .do()
+        class_data = result['data']['Aggregate'][self.index]
+
+        return class_data[0]['meta'] if class_data else {}
@@ -16,6 +16,7 @@ pymilvus==2.2.4
 redis
 orjson
 Pillow
+weaviate-client==3.15.5
 selenium
 webdriver-manager
 coverage

@@ -0,0 +1,117 @@
+import unittest
+from unittest import mock
+import sys
+import os
+
+from weaviate import Client
+from weaviate.util import get_valid_uuid
+from uuid import uuid4
+
+from autogpt.config import Config
+from autogpt.memory.weaviate import WeaviateMemory
+from autogpt.memory.base import get_ada_embedding
+
+
+@mock.patch.dict(os.environ, {
+    "WEAVIATE_HOST": "127.0.0.1",
+    "WEAVIATE_PROTOCOL": "http",
+    "WEAVIATE_PORT": "8080",
+    "WEAVIATE_USERNAME": "",
+    "WEAVIATE_PASSWORD": "",
+    "MEMORY_INDEX": "AutogptTests"
+})
+class TestWeaviateMemory(unittest.TestCase):
+    cfg = None
+    client = None
+
+    @classmethod
+    def setUpClass(cls):
+        # only create the connection to weaviate once
+        cls.cfg = Config()
+
+        if cls.cfg.use_weaviate_embedded:
+            from weaviate.embedded import EmbeddedOptions
+
+            cls.client = Client(embedded_options=EmbeddedOptions(
+                hostname=cls.cfg.weaviate_host,
+                port=int(cls.cfg.weaviate_port),
+                persistence_data_path=cls.cfg.weaviate_embedded_path
+            ))
+        else:
+            cls.client = Client(f"{cls.cfg.weaviate_protocol}://{cls.cfg.weaviate_host}:{self.cfg.weaviate_port}")
+
+    """
+    In order to run these tests you will need a local instance of
+    Weaviate running. Refer to https://weaviate.io/developers/weaviate/installation/docker-compose
+    for creating local instances using docker.
+    Alternatively in your .env file set the following environmental variables to run Weaviate embedded (see: https://weaviate.io/developers/weaviate/installation/embedded):
+
+        USE_WEAVIATE_EMBEDDED=True
+        WEAVIATE_EMBEDDED_PATH="/home/me/.local/share/weaviate"
+    """
+    def setUp(self):
+        try:
+            self.client.schema.delete_class(self.cfg.memory_index)
+        except:
+            pass
+
+        self.memory = WeaviateMemory(self.cfg)
+
+    def test_add(self):
+        doc = 'You are a Titan name Thanos and you are looking for the Infinity Stones'
+        self.memory.add(doc)
+        result = self.client.query.get(self.cfg.memory_index, ['raw_text']).do()
+        actual = result['data']['Get'][self.cfg.memory_index]
+
+        self.assertEqual(len(actual), 1)
+        self.assertEqual(actual[0]['raw_text'], doc)
+
+    def test_get(self):
+        doc = 'You are an Avenger and swore to defend the Galaxy from a menace called Thanos'
+
+        with self.client.batch as batch:
+            batch.add_data_object(
+                uuid=get_valid_uuid(uuid4()),
+                data_object={'raw_text': doc},
+                class_name=self.cfg.memory_index,
+                vector=get_ada_embedding(doc)
+            )
+
+            batch.flush()
+
+        actual = self.memory.get(doc)
+
+        self.assertEqual(len(actual), 1)
+        self.assertEqual(actual[0], doc)
+
+    def test_get_stats(self):
+        docs = [
+            'You are now about to count the number of docs in this index',
+            'And then you about to find out if you can count correctly'
+        ]
+
+        [self.memory.add(doc) for doc in docs]
+
+        stats = self.memory.get_stats()
+
+        self.assertTrue(stats)
+        self.assertTrue('count' in stats)
+        self.assertEqual(stats['count'], 2)
+
+    def test_clear(self):
+        docs = [
+            'Shame this is the last test for this class',
+            'Testing is fun when someone else is doing it'
+        ]
+
+        [self.memory.add(doc) for doc in docs]
+
+        self.assertEqual(self.memory.get_stats()['count'], 2)
+
+        self.memory.clear()
+
+        self.assertEqual(self.memory.get_stats()['count'], 0)
+
+
+if __name__ == '__main__':
+    unittest.main()