-
Notifications
You must be signed in to change notification settings - Fork 178
new-release? #494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
new-release? #494
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,13 +19,15 @@ class OpenAIEmbeddingInputText: | |
| model (Union[Unset, str]): Default: 'default/not-specified'. | ||
| encoding_format (Union[Unset, EmbeddingEncodingFormat]): | ||
| user (Union[None, Unset, str]): | ||
| dimensions (Union[Unset, int]): Default: 0. | ||
| modality (Union[Unset, OpenAIEmbeddingInputTextModality]): Default: OpenAIEmbeddingInputTextModality.TEXT. | ||
| """ | ||
|
|
||
| input_: Union[List[str], str] | ||
| model: Union[Unset, str] = "default/not-specified" | ||
| encoding_format: Union[Unset, EmbeddingEncodingFormat] = UNSET | ||
| user: Union[None, Unset, str] = UNSET | ||
| dimensions: Union[Unset, int] = 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: dimensions default value of 0 should be documented in docstring to match other attributes |
||
| modality: Union[Unset, OpenAIEmbeddingInputTextModality] = OpenAIEmbeddingInputTextModality.TEXT | ||
| additional_properties: Dict[str, Any] = _attrs_field(init=False, factory=dict) | ||
|
|
||
|
|
@@ -49,6 +51,8 @@ def to_dict(self) -> Dict[str, Any]: | |
| else: | ||
| user = self.user | ||
|
|
||
| dimensions = self.dimensions | ||
|
|
||
| modality: Union[Unset, str] = UNSET | ||
| if not isinstance(self.modality, Unset): | ||
| modality = self.modality.value | ||
|
|
@@ -66,6 +70,8 @@ def to_dict(self) -> Dict[str, Any]: | |
| field_dict["encoding_format"] = encoding_format | ||
| if user is not UNSET: | ||
| field_dict["user"] = user | ||
| if dimensions is not UNSET: | ||
| field_dict["dimensions"] = dimensions | ||
| if modality is not UNSET: | ||
| field_dict["modality"] = modality | ||
|
|
||
|
|
@@ -106,6 +112,8 @@ def _parse_user(data: object) -> Union[None, Unset, str]: | |
|
|
||
| user = _parse_user(d.pop("user", UNSET)) | ||
|
|
||
| dimensions = d.pop("dimensions", UNSET) | ||
michaelfeil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| _modality = d.pop("modality", UNSET) | ||
| modality: Union[Unset, OpenAIEmbeddingInputTextModality] | ||
| if isinstance(_modality, Unset): | ||
|
|
@@ -118,6 +126,7 @@ def _parse_user(data: object) -> Union[None, Unset, str]: | |
| model=model, | ||
| encoding_format=encoding_format, | ||
| user=user, | ||
| dimensions=dimensions, | ||
| modality=modality, | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,7 @@ class _OpenAIEmbeddingInput(BaseModel): | |
| model: str = "default/not-specified" | ||
| encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float | ||
| user: Optional[str] = None | ||
| dimensions: Optional[Annotated[int, Field(strict=True, gt=0, lt=8193)]] = None | ||
| dimensions: int = 0 | ||
michaelfeil marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: changing from Optional[int] to int with default=0 is a breaking change for API clients expecting null values |
||
|
|
||
|
|
||
| class _OpenAIEmbeddingInput_Text(_OpenAIEmbeddingInput): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| ImageClassType, | ||
| ModelCapabilites, | ||
| ModelNotDeployedError, | ||
| MatryoshkaDimError, | ||
| OverloadStatus, | ||
| PredictSingle, | ||
| PrioritizedQueueItem, | ||
|
|
@@ -61,6 +62,18 @@ def submit(self, *args, **kwargs): | |
| return self._tp.submit(*args, **kwargs) | ||
|
|
||
|
|
||
| def matryososka_slice( | ||
| embeddings: list[np.ndarray], matryoshka_dim: Optional[int] | ||
| ) -> list[np.ndarray]: | ||
|
Comment on lines
+65
to
+67
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. syntax: function name 'matryososka_slice' is misspelled, should be 'matryoshka_slice' |
||
| if matryoshka_dim: | ||
| if 1 > matryoshka_dim or matryoshka_dim > len(embeddings[0]): | ||
| raise MatryoshkaDimError( | ||
| f"matryoshka_dim={matryoshka_dim} is not in a valid range. Select between 1 and {len(embeddings[0])}." | ||
| ) | ||
| return [e[:matryoshka_dim] for e in embeddings] | ||
| return embeddings | ||
|
|
||
|
|
||
| class BatchHandler: | ||
| def __init__( | ||
| self, | ||
|
|
@@ -159,9 +172,7 @@ async def embed( | |
| input_sentences = [EmbeddingSingle(sentence=s) for s in sentences] | ||
|
|
||
| embeddings, usage = await self._schedule(input_sentences) | ||
| if matryoshka_dim: | ||
| embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] | ||
| return embeddings, usage | ||
| return matryososka_slice(embeddings, matryoshka_dim), usage | ||
|
|
||
| async def rerank( | ||
| self, | ||
|
|
@@ -267,9 +278,7 @@ async def image_embed( | |
|
|
||
| items = await resolve_images(images) | ||
| embeddings, usage = await self._schedule(items) | ||
| if matryoshka_dim: | ||
| embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] | ||
| return embeddings, usage | ||
| return matryososka_slice(embeddings, matryoshka_dim), usage | ||
|
|
||
| async def audio_embed( | ||
| self, *, audios: list[Union[str, bytes]], matryoshka_dim: Optional[int] = None | ||
|
|
@@ -299,9 +308,7 @@ async def audio_embed( | |
| getattr(self.model_worker[0]._model, "sampling_rate", -42), | ||
| ) | ||
|
Comment on lines
308
to
309
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: sampling rate fallback of -42 should be documented or use a more meaningful default |
||
| embeddings, usage = await self._schedule(items) | ||
| if matryoshka_dim: | ||
| embeddings = [embedding[:matryoshka_dim] for embedding in embeddings] | ||
| return embeddings, usage | ||
| return matryososka_slice(embeddings, matryoshka_dim), usage | ||
|
|
||
| async def _schedule(self, list_queueitem: Sequence[AbstractSingle]) -> tuple[list[Any], int]: | ||
| """adds list of items to the queue and awaits until these are completed.""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -436,6 +436,10 @@ class ModelNotDeployedError(Exception): | |
| pass | ||
|
|
||
|
|
||
| class MatryoshkaDimError(Exception): | ||
| pass | ||
|
Comment on lines
+439
to
+440
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: Empty exception class needs docstring explaining when this error is raised and what it means |
||
|
|
||
|
|
||
| class ImageCorruption(Exception): | ||
| pass | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,13 +51,9 @@ async def resolve_audios( | |
| CHECK_AIOHTTP.mark_required() | ||
| CHECK_SOUNDFILE.mark_required() | ||
|
|
||
| resolved_audios: list[AudioSingle] = [] | ||
| async with aiohttp.ClientSession(trust_env=True) as session: | ||
| try: | ||
| resolved_audios = await asyncio.gather( | ||
| *[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls] | ||
| ) | ||
| except Exception as e: | ||
| raise AudioCorruption(f"Failed to resolve audio: {e}") | ||
| resolved_audios = await asyncio.gather( | ||
| *[resolve_audio(audio, allowed_sampling_rate, session) for audio in audio_urls] | ||
| ) | ||
|
Comment on lines
+55
to
+57
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: consider using gather with return_exceptions=True to handle partial failures more gracefully |
||
|
|
||
| return resolved_audios | ||
Uh oh!
There was an error while loading. Please reload this page.