Skip to content

Commit

Permalink
Test: Re-seed NumPy PRNG
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Apr 11, 2024
1 parent 7db0c39 commit 8de87df
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ cmake --build ./build_release --config Release
Similarly, to use the most recent Clang compiler version from HomeBrew on MacOS:

```sh
brew install clang++ clang cmake
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER="/opt/homebrew/opt/llvm/bin/clang" \
Expand Down
38 changes: 29 additions & 9 deletions python/scripts/test_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from time import time

import pytest
import numpy as np
Expand Down Expand Up @@ -40,12 +41,18 @@
]


def reset_randomness():
np.random.seed(int(time()))


@pytest.mark.parametrize("ndim", [3, 97, 256])
@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
@pytest.mark.parametrize("batch_size", [1, 7, 1024])
@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
def test_index_initialization_and_addition(ndim, metric, quantization, dtype, batch_size):
reset_randomness()

index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
keys = np.arange(batch_size)
vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
Expand All @@ -59,25 +66,27 @@ def test_index_initialization_and_addition(ndim, metric, quantization, dtype, ba
@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
def test_index_retrieval(ndim, metric, quantization, dtype, batch_size):
reset_randomness()

index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
keys = np.arange(batch_size)
vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
index.add(keys, vectors, threads=threads)
vectors_retrived = np.vstack(index.get(keys, dtype))
assert np.allclose(vectors_retrived, vectors, atol=0.1)
vectors_retrieved = np.vstack(index.get(keys, dtype))
assert np.allclose(vectors_retrieved, vectors, atol=0.1)

# Try retrieving all the keys
keys_retrived = index.keys
keys_retrived = np.array(keys_retrived)
assert np.all(np.sort(keys_retrived) == keys)
keys_retrieved = index.keys
keys_retrieved = np.array(keys_retrieved)
assert np.all(np.sort(keys_retrieved) == keys)

# Try retrieving all of them
if quantization != ScalarKind.I8:
# The returned vectors can be in a different order
vectors_batch_retrived = index.vectors
vectors_reordering = np.argsort(keys_retrived)
vectors_batch_retrived = vectors_batch_retrived[vectors_reordering]
assert np.allclose(vectors_batch_retrived, vectors, atol=0.1)
vectors_batch_retrieved = index.vectors
vectors_reordering = np.argsort(keys_retrieved)
vectors_batch_retrieved = vectors_batch_retrieved[vectors_reordering]
assert np.allclose(vectors_batch_retrieved, vectors, atol=0.1)


@pytest.mark.parametrize("ndim", [3, 97, 256])
Expand All @@ -86,6 +95,8 @@ def test_index_retrieval(ndim, metric, quantization, dtype, batch_size):
@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
def test_index_search(ndim, metric, quantization, dtype, batch_size):
reset_randomness()

index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
keys = np.arange(batch_size)
vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
Expand All @@ -112,6 +123,8 @@ def test_index_self_recall(ndim: int, batch_size: int):
"""
Test self-recall evaluation scripts.
"""
reset_randomness()

index = Index(ndim=ndim, multi=False)
keys = np.arange(batch_size)
vectors = random_vectors(count=batch_size, ndim=ndim)
Expand All @@ -126,6 +139,8 @@ def test_index_self_recall(ndim: int, batch_size: int):

@pytest.mark.parametrize("batch_size", [1, 7, 1024])
def test_index_duplicates(batch_size):
reset_randomness()

ndim = 8
index = Index(ndim=ndim, multi=False)
keys = np.arange(batch_size)
Expand All @@ -147,6 +162,8 @@ def test_index_duplicates(batch_size):

@pytest.mark.parametrize("batch_size", [1, 7, 1024])
def test_index_stats(batch_size):
reset_randomness()

ndim = 8
index = Index(ndim=ndim, multi=False)
keys = np.arange(batch_size)
Expand All @@ -165,6 +182,7 @@ def test_index_stats(batch_size):
@pytest.mark.parametrize("batch_size", [0, 1, 7, 1024])
@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
def test_index_save_load_restore_copy(ndim, quantization, batch_size):
reset_randomness()
index = Index(ndim=ndim, dtype=quantization, multi=False)

if batch_size > 0:
Expand Down Expand Up @@ -213,6 +231,7 @@ def test_index_save_load_restore_copy(ndim, quantization, batch_size):

@pytest.mark.parametrize("batch_size", [32])
def test_index_contains_remove_rename(batch_size):
reset_randomness()
if batch_size <= 1:
return

Expand Down Expand Up @@ -246,6 +265,7 @@ def test_index_contains_remove_rename(batch_size):
@pytest.mark.parametrize("batch_size", [3, 17, 33])
@pytest.mark.parametrize("threads", [1, 4])
def test_index_oversubscribed_search(batch_size: int, threads: int):
reset_randomness()
if batch_size <= 1:
return

Expand Down

0 comments on commit 8de87df

Please sign in to comment.