Skip to content

Commit ba175b1

Browse files
Merge pull request #17 from redis-performance/dvirdu_vectorsets
Redis Vector Sets support
2 parents 92dee85 + 2f1a6fb commit ba175b1

File tree

12 files changed

+2073
-22
lines changed

12 files changed

+2073
-22
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ results/*
88
tools/custom/data.json
99

1010
*.png
11+
venv/

engine/clients/client_factory.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@
3131
WeaviateUploader,
3232
)
3333

34+
from engine.clients.vectorsets import (
35+
RedisVsetConfigurator,
36+
RedisVsetSearcher,
37+
RedisVsetUploader,
38+
)
39+
3440
ENGINE_CONFIGURATORS = {
3541
"qdrant": QdrantConfigurator,
3642
"weaviate": WeaviateConfigurator,
@@ -39,6 +45,7 @@
3945
"opensearch": OpenSearchConfigurator,
4046
"redis": RedisConfigurator,
4147
"pgvector": PgVectorConfigurator,
48+
"vectorsets": RedisVsetConfigurator,
4249
}
4350

4451
ENGINE_UPLOADERS = {
@@ -49,6 +56,7 @@
4956
"opensearch": OpenSearchUploader,
5057
"redis": RedisUploader,
5158
"pgvector": PgVectorUploader,
59+
"vectorsets": RedisVsetUploader,
5260
}
5361

5462
ENGINE_SEARCHERS = {
@@ -59,6 +67,7 @@
5967
"opensearch": OpenSearchSearcher,
6068
"redis": RedisSearcher,
6169
"pgvector": PgVectorSearcher,
70+
"vectorsets": RedisVsetSearcher,
6271
}
6372

6473

engine/clients/vectorsets/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from engine.clients.vectorsets.configure import RedisVsetConfigurator
2+
from engine.clients.vectorsets.search import RedisVsetSearcher
3+
from engine.clients.vectorsets.upload import RedisVsetUploader

engine/clients/vectorsets/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import os
2+
3+
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
4+
REDIS_AUTH = os.getenv("REDIS_AUTH", None)
5+
REDIS_USER = os.getenv("REDIS_USER", None)
6+
REDIS_CLUSTER = bool(int(os.getenv("REDIS_CLUSTER", 0)))
7+
8+
# 90 seconds timeout
9+
REDIS_QUERY_TIMEOUT = int(os.getenv("REDIS_QUERY_TIMEOUT", 90 * 1000))
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import redis
2+
from redis import Redis, RedisCluster
3+
4+
from benchmark.dataset import Dataset
5+
from engine.base_client.configure import BaseConfigurator
6+
from engine.clients.vectorsets.config import (
7+
REDIS_AUTH,
8+
REDIS_CLUSTER,
9+
REDIS_PORT,
10+
REDIS_USER,
11+
)
12+
13+
14+
class RedisVsetConfigurator(BaseConfigurator):
15+
16+
def __init__(self, host, collection_params: dict, connection_params: dict):
17+
super().__init__(host, collection_params, connection_params)
18+
redis_constructor = RedisCluster if REDIS_CLUSTER else Redis
19+
self._is_cluster = True if REDIS_CLUSTER else False
20+
self.client = redis_constructor(
21+
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
22+
)
23+
self.client.flushall()
24+
25+
def clean(self):
26+
conns = [self.client]
27+
if self._is_cluster:
28+
conns = [
29+
self.client.get_redis_connection(node)
30+
for node in self.client.get_primaries()
31+
]
32+
for conn in conns:
33+
index = conn.ft()
34+
try:
35+
conn.flushall()
36+
except redis.ResponseError as e:
37+
print(e)
38+
39+
def recreate(self, dataset: Dataset, collection_params):
40+
pass
41+
42+
43+
if __name__ == "__main__":
44+
pass

engine/clients/vectorsets/search.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import random
2+
from typing import List, Tuple
3+
4+
import numpy as np
5+
from redis import Redis, RedisCluster
6+
7+
8+
from engine.base_client.search import BaseSearcher
9+
from engine.clients.vectorsets.config import (
10+
REDIS_AUTH,
11+
REDIS_CLUSTER,
12+
REDIS_PORT,
13+
REDIS_QUERY_TIMEOUT,
14+
REDIS_USER,
15+
)
16+
from engine.clients.redis.parser import RedisConditionParser
17+
18+
19+
class RedisVsetSearcher(BaseSearcher):
20+
search_params = {}
21+
client = None
22+
parser = RedisConditionParser()
23+
24+
@classmethod
25+
def init_client(cls, host, distance, connection_params: dict, search_params: dict):
26+
redis_constructor = RedisCluster if REDIS_CLUSTER else Redis
27+
cls.client = redis_constructor(
28+
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
29+
)
30+
cls.search_params = search_params
31+
cls._is_cluster = True if REDIS_CLUSTER else False
32+
# In the case of CLUSTER API enabled we randomly select the starting primary shard
33+
# when doing the client initialization to evenly distribute the load among the cluster
34+
cls.conns = [cls.client]
35+
if cls._is_cluster:
36+
cls.conns = [
37+
cls.client.get_redis_connection(node)
38+
for node in cls.client.get_primaries()
39+
]
40+
cls._ft = cls.conns[random.randint(0, len(cls.conns)) - 1].ft()
41+
42+
@classmethod
43+
def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]:
44+
ef = cls.search_params["search_params"]["ef"]
45+
response = cls.client.execute_command("VSIM", "idx", "FP32", np.array(vector).astype(np.float32).tobytes(), "WITHSCORES", "COUNT", top, "EF", ef)
46+
# decode responses
47+
# every even cell is id, every odd is the score
48+
# scores needs to be 1 - scores since on vector sets 1 is identical, 0 is opposite vector
49+
ids = [int(response[i]) for i in range(0, len(response), 2)]
50+
scores = [1 - float(response[i]) for i in range(1, len(response), 2)]
51+
# we need to return a list of tuples
52+
# where the first element is the id and the second is the score
53+
return list(zip(ids, scores))

engine/clients/vectorsets/upload.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from typing import List, Optional
2+
3+
import numpy as np
4+
from redis import Redis, RedisCluster
5+
6+
from engine.base_client.upload import BaseUploader
7+
from engine.clients.vectorsets.config import (
8+
REDIS_AUTH,
9+
REDIS_CLUSTER,
10+
REDIS_PORT,
11+
REDIS_USER,
12+
)
13+
from engine.clients.redis.helper import convert_to_redis_coords
14+
15+
16+
class RedisVsetUploader(BaseUploader):
17+
client = None
18+
upload_params = {}
19+
20+
@classmethod
21+
def init_client(cls, host, distance, connection_params, upload_params):
22+
redis_constructor = RedisCluster if REDIS_CLUSTER else Redis
23+
cls.client = redis_constructor(
24+
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
25+
)
26+
cls.upload_params = upload_params
27+
28+
@classmethod
29+
def upload_batch(
30+
cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]]
31+
):
32+
upload_params = cls.upload_params
33+
hnsw_params = upload_params.get("hnsw_config")
34+
M = hnsw_params.get("M", 16)
35+
efc = hnsw_params.get("EF_CONSTRUCTION", 200)
36+
quant = hnsw_params.get("quant")
37+
38+
p = cls.client.pipeline(transaction=False)
39+
for i in range(len(ids)):
40+
idx = ids[i]
41+
vec = vectors[i]
42+
vec = np.array(vec).astype(np.float32).tobytes()
43+
p.execute_command("VADD", "idx", "FP32", vec, idx, quant, "M", M, "EF", efc, "CAS")
44+
p.execute()
45+
46+
@classmethod
47+
def post_upload(cls, _distance):
48+
return {}

0 commit comments

Comments
 (0)