Skip to content

Commit bef319a

Browse files
committed
Expanded estimate_tokens test to compare against tiktoken
1 parent 1fdb5b0 commit bef319a

File tree

1 file changed

+22
-9
lines changed

1 file changed

+22
-9
lines changed

packages/lmi/tests/test_embeddings.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import litellm
66
import pytest
7+
import tiktoken
78
from litellm.caching import Cache, InMemoryCache
89
from pytest_subtests import SubTests
910

@@ -20,17 +21,29 @@
2021
from lmi.utils import VCR_DEFAULT_MATCH_ON, encode_image_as_url
2122

2223

23-
def test_estimate_tokens(stub_png_image: bytes) -> None:
24-
# Test text-only
25-
text_only = "Hello world"
26-
assert estimate_tokens(text_only) == 2.75
24+
def test_estimate_tokens(subtests: SubTests, stub_png_image: bytes) -> None:
25+
with subtests.test(msg="text only"):
26+
text_only = "Hello world"
27+
text_only_estimated_token_count = estimate_tokens(text_only)
28+
assert text_only_estimated_token_count == 2.75, (
29+
"Expected a reasonable token estimate"
30+
)
31+
text_only_actual_token_count = len(
32+
tiktoken.get_encoding("cl100k_base").encode(text_only)
33+
)
34+
assert text_only_estimated_token_count == pytest.approx(
35+
text_only_actual_token_count, abs=1
36+
), "Estimation should be within one token of what tiktoken"
2737

2838
# Test multimodal (text + image)
29-
multimodal = [
30-
"What is in this image?",
31-
encode_image_as_url(image_type="png", image_data=stub_png_image),
32-
]
33-
assert estimate_tokens(multimodal) == 90.5
39+
with subtests.test(msg="multimodal"): # Text + image
40+
multimodal = [
41+
"What is in this image?",
42+
encode_image_as_url(image_type="png", image_data=stub_png_image),
43+
]
44+
assert estimate_tokens(multimodal) == 90.5, (
45+
"Expected a reasonable token estimate"
46+
)
3447

3548

3649
class TestLiteLLMEmbeddingModel:

0 commit comments

Comments
 (0)