Skip to content

Consider vectorizer the owner of dtype #267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 30 additions & 22 deletions redisvl/extensions/llmcache/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
from redisvl.index import AsyncSearchIndex, SearchIndex
from redisvl.query import RangeQuery
from redisvl.query.filter import FilterExpression
from redisvl.utils.utils import current_timestamp, serialize, validate_vector_dims
from redisvl.utils.utils import (
current_timestamp,
deprecated_argument,
serialize,
validate_vector_dims,
)
from redisvl.utils.vectorize import BaseVectorizer, HFTextVectorizer


Expand All @@ -32,6 +37,7 @@ class SemanticCache(BaseLLMCache):
_index: SearchIndex
_aindex: Optional[AsyncSearchIndex] = None

@deprecated_argument("dtype", "vectorizer")
def __init__(
self,
name: str = "llmcache",
Expand Down Expand Up @@ -86,12 +92,26 @@ def __init__(
else:
prefix = name

# Set vectorizer default
if vectorizer is None:
dtype = kwargs.get("dtype")

# Validate a provided vectorizer or set the default
if vectorizer:
if not isinstance(vectorizer, BaseVectorizer):
raise TypeError("Must provide a valid redisvl.vectorizer class.")
if dtype and vectorizer.dtype != dtype:
raise ValueError(
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
)
else:
vectorizer_kwargs = {"dtype": dtype} if dtype else {}

vectorizer = HFTextVectorizer(
model="sentence-transformers/all-mpnet-base-v2"
model="sentence-transformers/all-mpnet-base-v2",
**vectorizer_kwargs,
)

self._vectorizer = vectorizer

# Process fields and other settings
self.set_threshold(distance_threshold)
self.return_fields = [
Expand All @@ -104,9 +124,8 @@ def __init__(
]

# Create semantic cache schema and index
dtype = kwargs.get("dtype", "float32")
schema = SemanticCacheIndexSchema.from_params(
name, prefix, vectorizer.dims, dtype
name, prefix, vectorizer.dims, vectorizer.dtype
)
schema = self._modify_schema(schema, filterable_fields)
self._index = SearchIndex(schema=schema)
Expand All @@ -128,20 +147,9 @@ def __init__(
"If you wish to overwrite the index schema, set overwrite=True during initialization."
)

# Create the search index
# Create the search index in Redis
self._index.create(overwrite=overwrite, drop=False)

# Initialize and validate vectorizer
if not isinstance(vectorizer, BaseVectorizer):
raise TypeError("Must provide a valid redisvl.vectorizer class.")

validate_vector_dims(
vectorizer.dims,
self._index.schema.fields[CACHE_VECTOR_FIELD_NAME].attrs.dims, # type: ignore
)
self._vectorizer = vectorizer
self._dtype = self.index.schema.fields[CACHE_VECTOR_FIELD_NAME].attrs.datatype # type: ignore[union-attr]

def _modify_schema(
self,
schema: SemanticCacheIndexSchema,
Expand Down Expand Up @@ -290,7 +298,7 @@ def _vectorize_prompt(self, prompt: Optional[str]) -> List[float]:
if not isinstance(prompt, str):
raise TypeError("Prompt must be a string.")

return self._vectorizer.embed(prompt, dtype=self._dtype)
return self._vectorizer.embed(prompt)

async def _avectorize_prompt(self, prompt: Optional[str]) -> List[float]:
"""Converts a text prompt to its vector representation using the
Expand Down Expand Up @@ -372,7 +380,7 @@ def check(
num_results=num_results,
return_score=True,
filter_expression=filter_expression,
dtype=self._dtype,
dtype=self._vectorizer.dtype,
)

# Search the cache!
Expand Down Expand Up @@ -543,7 +551,7 @@ def store(
# Load cache entry with TTL
ttl = ttl or self._ttl
keys = self._index.load(
data=[cache_entry.to_dict(self._dtype)],
data=[cache_entry.to_dict(self._vectorizer.dtype)],
ttl=ttl,
id_field=ENTRY_ID_FIELD_NAME,
)
Expand Down Expand Up @@ -607,7 +615,7 @@ async def astore(
# Load cache entry with TTL
ttl = ttl or self._ttl
keys = await aindex.load(
data=[cache_entry.to_dict(self._dtype)],
data=[cache_entry.to_dict(self._vectorizer.dtype)],
ttl=ttl,
id_field=ENTRY_ID_FIELD_NAME,
)
Expand Down
33 changes: 19 additions & 14 deletions redisvl/extensions/router/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from redisvl.query import RangeQuery
from redisvl.redis.utils import convert_bytes, hashify, make_dict
from redisvl.utils.log import get_logger
from redisvl.utils.utils import model_to_dict
from redisvl.utils.utils import deprecated_argument, model_to_dict
from redisvl.utils.vectorize import (
BaseVectorizer,
HFTextVectorizer,
Expand All @@ -47,6 +47,7 @@ class SemanticRouter(BaseModel):
class Config:
arbitrary_types_allowed = True

@deprecated_argument("dtype", "vectorizer")
def __init__(
self,
name: str,
Expand All @@ -72,9 +73,19 @@ def __init__(
connection_kwargs (Dict[str, Any]): The connection arguments
for the redis client. Defaults to empty {}.
"""
# Set vectorizer default
if vectorizer is None:
vectorizer = HFTextVectorizer()
dtype = kwargs.get("dtype")

# Validate a provided vectorizer or set the default
if vectorizer:
if not isinstance(vectorizer, BaseVectorizer):
raise TypeError("Must provide a valid redisvl.vectorizer class.")
if dtype and vectorizer.dtype != dtype:
raise ValueError(
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
)
else:
vectorizer_kwargs = {"dtype": dtype} if dtype else {}
vectorizer = HFTextVectorizer(**vectorizer_kwargs)

if routing_config is None:
routing_config = RoutingConfig()
Expand All @@ -85,11 +96,9 @@ def __init__(
vectorizer=vectorizer,
routing_config=routing_config,
)
dtype = kwargs.get("dtype", "float32")
self._initialize_index(
redis_client, redis_url, overwrite, dtype, **connection_kwargs
)
self._initialize_index(redis_client, redis_url, overwrite, **connection_kwargs)

@deprecated_argument("dtype")
def _initialize_index(
self,
redis_client: Optional[Redis] = None,
Expand All @@ -100,7 +109,7 @@ def _initialize_index(
):
"""Initialize the search index and handle Redis connection."""
schema = SemanticRouterIndexSchema.from_params(
self.name, self.vectorizer.dims, dtype
self.name, self.vectorizer.dims, self.vectorizer.dtype
)
self._index = SearchIndex(schema=schema)

Expand Down Expand Up @@ -169,9 +178,7 @@ def _add_routes(self, routes: List[Route]):
for route in routes:
# embed route references as a single batch
reference_vectors = self.vectorizer.embed_many(
[reference for reference in route.references],
as_buffer=True,
dtype=self._index.schema.fields[ROUTE_VECTOR_FIELD_NAME].attrs.datatype, # type: ignore[union-attr]
[reference for reference in route.references], as_buffer=True
)
# set route references
for i, reference in enumerate(route.references):
Expand Down Expand Up @@ -248,7 +255,6 @@ def _classify_route(
vector_field_name=ROUTE_VECTOR_FIELD_NAME,
distance_threshold=distance_threshold,
return_fields=["route_name"],
dtype=self._index.schema.fields[ROUTE_VECTOR_FIELD_NAME].attrs.datatype, # type: ignore[union-attr]
)

aggregate_request = self._build_aggregate_request(
Expand Down Expand Up @@ -301,7 +307,6 @@ def _classify_multi_route(
vector_field_name=ROUTE_VECTOR_FIELD_NAME,
distance_threshold=distance_threshold,
return_fields=["route_name"],
dtype=self._index.schema.fields[ROUTE_VECTOR_FIELD_NAME].attrs.datatype, # type: ignore[union-attr]
)
aggregate_request = self._build_aggregate_request(
vector_range_query, aggregation_method, max_k
Expand Down
31 changes: 23 additions & 8 deletions redisvl/extensions/session_manager/semantic_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
from redisvl.index import SearchIndex
from redisvl.query import FilterQuery, RangeQuery
from redisvl.query.filter import Tag
from redisvl.utils.utils import validate_vector_dims
from redisvl.utils.utils import deprecated_argument, validate_vector_dims
from redisvl.utils.vectorize import BaseVectorizer, HFTextVectorizer


class SemanticSessionManager(BaseSessionManager):

@deprecated_argument("dtype", "vectorizer")
def __init__(
self,
name: str,
Expand Down Expand Up @@ -70,16 +71,30 @@ def __init__(
super().__init__(name, session_tag)

prefix = prefix or name
dtype = kwargs.get("dtype")

self._vectorizer = vectorizer or HFTextVectorizer(
model="sentence-transformers/msmarco-distilbert-cos-v5"
)
# Validate a provided vectorizer or set the default
if vectorizer:
if not isinstance(vectorizer, BaseVectorizer):
raise TypeError("Must provide a valid redisvl.vectorizer class.")
if dtype and vectorizer.dtype != dtype:
raise ValueError(
f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
)
else:
vectorizer_kwargs = {"dtype": dtype} if dtype else {}

vectorizer = HFTextVectorizer(
model="sentence-transformers/msmarco-distilbert-cos-v5",
**vectorizer_kwargs,
)

self._vectorizer = vectorizer

self.set_distance_threshold(distance_threshold)

dtype = kwargs.get("dtype", "float32")
schema = SemanticSessionIndexSchema.from_params(
name, prefix, self._vectorizer.dims, dtype
name, prefix, self._vectorizer.dims, vectorizer.dtype
)

self._index = SearchIndex(schema=schema)
Expand Down Expand Up @@ -215,7 +230,7 @@ def get_relevant(
num_results=top_k,
return_score=True,
filter_expression=session_filter,
dtype=self._index.schema.fields[SESSION_VECTOR_FIELD_NAME].attrs.datatype, # type: ignore[union-attr]
dtype=self._vectorizer.dtype,
)
messages = self._index.query(query)

Expand Down Expand Up @@ -341,7 +356,7 @@ def add_messages(
if TOOL_FIELD_NAME in message:
chat_message.tool_call_id = message[TOOL_FIELD_NAME]

chat_messages.append(chat_message.to_dict(dtype=self._index.schema.fields[SESSION_VECTOR_FIELD_NAME].attrs.datatype)) # type: ignore[union-attr]
chat_messages.append(chat_message.to_dict(dtype=self._vectorizer.dtype))

self._index.load(data=chat_messages, id_field=ID_FIELD_NAME)

Expand Down
33 changes: 32 additions & 1 deletion redisvl/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import json
from enum import Enum
from functools import wraps
from time import time
from typing import Any, Dict
from typing import Any, Callable, Dict, Optional
from uuid import uuid4
from warnings import warn

from pydantic.v1 import BaseModel

Expand Down Expand Up @@ -57,3 +59,32 @@ def serialize(data: Dict[str, Any]) -> str:
def deserialize(data: str) -> Dict[str, Any]:
"""Deserialize the input from a string."""
return json.loads(data)


def deprecated_argument(argument: str, replacement: Optional[str] = None) -> Callable:
"""
Decorator to warn if a deprecated argument is passed.

When the wrapped function is called, the decorator will warn if the
deprecated argument is passed as an argument or keyword argument.
"""

message = f"Argument {argument} is deprecated and will be removed in the next major release."
if replacement:
message += f" Use {replacement} instead."

def wrapper(func):
@wraps(func)
def inner(*args, **kwargs):
argument_names = func.__code__.co_varnames

if argument in argument_names:
warn(message, DeprecationWarning, stacklevel=2)
elif argument in kwargs:
warn(message, DeprecationWarning, stacklevel=2)

return func(*args, **kwargs)

return inner

return wrapper
40 changes: 40 additions & 0 deletions tests/integration/test_llmcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from collections import namedtuple
from time import sleep, time
import warnings

import pytest
from pydantic.v1 import ValidationError
Expand Down Expand Up @@ -71,6 +72,13 @@ def cache_with_redis_client(vectorizer, client):
cache_instance._index.delete(True) # Clean up index


@pytest.fixture(autouse=True)
def disable_deprecation_warnings():
with warnings.catch_warnings():
warnings.simplefilter("ignore")
yield


def test_bad_ttl(cache):
with pytest.raises(ValueError):
cache.set_ttl(2.5)
Expand Down Expand Up @@ -884,3 +892,35 @@ def test_bad_dtype_connecting_to_existing_cache(redis_url):
bad_type = SemanticCache(
name="float64_cache", dtype="float16", redis_url=redis_url
)


def test_vectorizer_dtype_mismatch():
with pytest.raises(ValueError):
SemanticCache(
name="test_dtype_mismatch",
dtype="float32",
vectorizer=HFTextVectorizer(dtype="float16"),
overwrite=True,
)


def test_invalid_vectorizer():
with pytest.raises(TypeError):
SemanticCache(
name="test_invalid_vectorizer",
vectorizer="invalid_vectorizer", # type: ignore
overwrite=True,
)


def test_passes_through_dtype_to_default_vectorizer():
# The default is float32, so we should see float64 if we pass it in.
cache = SemanticCache(
name="test_pass_through_dtype", dtype="float64", overwrite=True
)
assert cache._vectorizer.dtype == "float64"


def test_deprecated_dtype_argument():
with pytest.warns(DeprecationWarning):
SemanticCache(name="test_deprecated_dtype", dtype="float32", overwrite=True)
Loading