From a8a817c17a06b6cc3993e406378de038c8c92873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=84=8D=F0=9D=95=A0=F0=9D=95=9D=F0=9D=95=9D=F0=9D=95=A0?= =?UTF-8?q?=F0=9D=95=A8=20=F0=9D=95=84=F0=9D=95=92=F0=9D=95=9F?= Date: Mon, 26 Aug 2024 06:16:38 +0300 Subject: [PATCH] [Bugfix]: Use float32 for base64 embedding (#7855) Signed-off-by: Hollow Man --- examples/openai_embedding_client.py | 1 - tests/entrypoints/openai/test_embedding.py | 11 ++++++++++- vllm/entrypoints/openai/serving_embedding.py | 4 +++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/openai_embedding_client.py b/examples/openai_embedding_client.py index b4f4c7ad6beb2..4bd7ca01d750d 100644 --- a/examples/openai_embedding_client.py +++ b/examples/openai_embedding_client.py @@ -19,7 +19,6 @@ "The best thing about vLLM is that it supports many different models" ], model=model, - encoding_format="float", ) for data in responses.data: diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py index c9747339bbf15..6bf170b94c0d7 100644 --- a/tests/entrypoints/openai/test_embedding.py +++ b/tests/entrypoints/openai/test_embedding.py @@ -128,9 +128,18 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI, for data in responses_base64.data: decoded_responses_base64_data.append( np.frombuffer(base64.b64decode(data.embedding), - dtype="float").tolist()) + dtype="float32").tolist()) assert responses_float.data[0].embedding == decoded_responses_base64_data[ 0] assert responses_float.data[1].embedding == decoded_responses_base64_data[ 1] + + # Default response is float32 decoded from base64 by OpenAI Client + responses_default = await embedding_client.embeddings.create( + input=input_texts, model=model_name) + + assert responses_float.data[0].embedding == responses_default.data[ + 0].embedding + assert responses_float.data[1].embedding == responses_default.data[ + 1].embedding diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index b0f70ff43e228..12ec6be03cd62 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -31,7 +31,9 @@ def _get_embedding( if encoding_format == "float": return output.embedding elif encoding_format == "base64": - embedding_bytes = np.array(output.embedding).tobytes() + # Force to use float32 for base64 encoding + # to match the OpenAI python client behavior + embedding_bytes = np.array(output.embedding, dtype="float32").tobytes() return base64.b64encode(embedding_bytes).decode("utf-8") assert_never(encoding_format)