Skip to content

Commit 54ef1c2

Browse files
fixup docstrings
1 parent b869075 commit 54ef1c2

File tree

1 file changed

+36
-9
lines changed

1 file changed

+36
-9
lines changed

redisvl/utils/vectorize/text/custom.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,29 @@ async def wrapper(self, *args, **kwargs):
5656

5757

5858
class CustomTextVectorizer(BaseVectorizer):
59-
"""
60-
CustomTextVectorizer handles user-provided embedding callables (sync and async).
61-
Synchronous methods are validated during initialization to determine dimensions.
62-
Asynchronous methods are validated lazily on first usage.
59+
"""The CustomTextVectorizer class wraps user-defined embedding methods to create
60+
embeddings for text data.
61+
62+
This vectorizer is designed to accept a provided callable text vectorizer and
63+
provides a class definition to allow for compatibility with RedisVL.
64+
The vectorizer may support both synchronous and asynchronous operations which
65+
allows for batch processing of texts, but at a minimum only syncronous embedding
66+
is required to satisfy the 'embed()' method.
67+
68+
.. code-block:: python
69+
70+
# Synchronous embedding of a single text
71+
vectorizer = CustomTextVectorizer(
72+
embed = my_vectorizer.generate_embedding
73+
)
74+
embedding = vectorizer.embed("Hello, world!")
75+
76+
# Asynchronous batch embedding of multiple texts
77+
embeddings = await vectorizer.aembed_many(
78+
["Hello, world!", "How are you?"],
79+
batch_size=2
80+
)
81+
6382
"""
6483

6584
# User-provided callables
@@ -80,11 +99,19 @@ def __init__(
8099
aembed_many: Optional[Callable] = None,
81100
dtype: str = "float32",
82101
):
83-
"""
84-
1. Store the provided functions for synergy or lazy usage.
85-
2. Manually validate the sync callables to discover the embedding dimension.
86-
3. Call the base initializer with the discovered dimension and provided dtype.
87-
4. Async callables remain lazy until first call.
102+
"""Initialize the Custom vectorizer.
103+
104+
Args:
105+
embed (Callable): a Callable function that accepts a string object and returns a list of floats.
106+
embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
107+
aembed (Optional[Callable]): an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None.
108+
aembed_many (Optional[Callable]): an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
109+
dtype (str): the default datatype to use when embedding text as byte arrays.
110+
Used when setting `as_buffer=True` in calls to embed() and embed_many().
111+
Defaults to 'float32'.
112+
113+
Raises:
114+
ValueError: if embedding validation fails.
88115
"""
89116
# Store user-provided callables
90117
self._embed = embed

0 commit comments

Comments
 (0)