Skip to content

Commit

Permalink
Fixing index_prefix_path bug in python for StaticMemoryIndex (#491)
Browse files Browse the repository at this point in the history
* Fixing the same bug I had in static disk index inside of static memory index as well.

* Unit tests and a better understanding of why the unit tests were successful despite this bug
  • Loading branch information
daxpryce authored Nov 9, 2023
1 parent 4a57e89 commit 35f8cf7
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "diskannpy"
version = "0.7.0rc1"
version = "0.7.0rc2"

description = "DiskANN Python extension module"
readme = "python/README.md"
Expand Down
6 changes: 3 additions & 3 deletions python/src/_static_disk_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def __init__(
does not exist, you are required to provide it.
- **index_prefix**: The prefix of the index files. Defaults to "ann".
"""
index_prefix = _valid_index_prefix(index_directory, index_prefix)
index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
vector_dtype, metric, _, _ = _ensure_index_metadata(
index_prefix,
index_prefix_path,
vector_dtype,
distance_metric,
1, # it doesn't matter because we don't need it in this context anyway
Expand All @@ -101,7 +101,7 @@ def __init__(
_index = _native_dap.StaticDiskFloatIndex
self._index = _index(
distance_metric=dap_metric,
index_path_prefix=index_prefix,
index_path_prefix=index_prefix_path,
num_threads=num_threads,
num_nodes_to_cache=num_nodes_to_cache,
cache_mechanism=cache_mechanism,
Expand Down
10 changes: 5 additions & 5 deletions python/src/_static_memory_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,22 @@ def __init__(
does not exist, you are required to provide it.
- **enable_filters**: Indexes built with filters can also be used for filtered search.
"""
index_prefix = _valid_index_prefix(index_directory, index_prefix)
index_prefix_path = _valid_index_prefix(index_directory, index_prefix)
self._labels_map = {}
self._labels_metadata = {}
if enable_filters:
try:
with open(index_prefix + "_labels_map.txt", "r") as labels_map_if:
with open(f"{index_prefix_path}_labels_map.txt", "r") as labels_map_if:
for line in labels_map_if:
(key, val) = line.split("\t")
self._labels_map[key] = int(val)
with open(f"{index_prefix}_label_metadata.json", "r") as labels_metadata_if:
with open(f"{index_prefix_path}_label_metadata.json", "r") as labels_metadata_if:
self._labels_metadata = json.load(labels_metadata_if)
except: # noqa: E722
# exceptions are basically presumed to be either file not found or file not formatted correctly
raise RuntimeException("Filter labels file was unable to be processed.")
vector_dtype, metric, num_points, dims = _ensure_index_metadata(
index_prefix,
index_prefix_path,
vector_dtype,
distance_metric,
1, # it doesn't matter because we don't need it in this context anyway
Expand All @@ -119,7 +119,7 @@ def __init__(
distance_metric=dap_metric,
num_points=num_points,
dimensions=dims,
index_path=os.path.join(index_directory, index_prefix),
index_path=index_prefix_path,
num_threads=num_threads,
initial_search_complexity=initial_search_complexity,
)
Expand Down
32 changes: 31 additions & 1 deletion python/tests/test_static_disk_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import shutil
import unittest
from pathlib import Path
from tempfile import mkdtemp

import diskannpy as dap
Expand Down Expand Up @@ -165,4 +166,33 @@ def test_zero_threads(self):
k = 5
ids, dists = index.batch_search(
query_vectors, k_neighbors=k, complexity=5, beam_width=2, num_threads=0
)
)

def test_relative_paths(self):
# Issue 483 and 491 both fixed errors that were somehow slipping past our unit tests
# os.path.join() acts as a semi-merge if you give it two paths that look absolute.
# since our unit tests are using absolute paths via tempfile.mkdtemp(), the double os.path.join() was never
# caught by our tests, but was very easy to trip when using relative paths
rel_dir = "tmp"
Path(rel_dir).mkdir(exist_ok=True)
try:
tiny_index_vecs = random_vectors(20, 10, dtype=np.float32, seed=12345)
dap.build_disk_index(
data=tiny_index_vecs,
distance_metric="l2",
index_directory=rel_dir,
graph_degree=16,
complexity=32,
search_memory_maximum=0.00003,
build_memory_maximum=1,
num_threads=0,
pq_disk_bytes=0,
)
index = dap.StaticDiskIndex(
index_directory=rel_dir,
num_threads=16,
num_nodes_to_cache=10,
)

finally:
shutil.rmtree(rel_dir, ignore_errors=True)
28 changes: 28 additions & 0 deletions python/tests/test_static_memory_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shutil
import unittest

from pathlib import Path
from tempfile import mkdtemp

import diskannpy as dap
Expand Down Expand Up @@ -191,6 +192,33 @@ def test_zero_threads(self):
k = 5
ids, dists = index.batch_search(query_vectors, k_neighbors=k, complexity=5, num_threads=0)

def test_relative_paths(self):
# Issue 483 and 491 both fixed errors that were somehow slipping past our unit tests
# os.path.join() acts as a semi-merge if you give it two paths that look absolute.
# since our unit tests are using absolute paths via tempfile.mkdtemp(), the double os.path.join() was never
# caught by our tests, but was very easy to trip when using relative paths
rel_dir = "tmp"
Path(rel_dir).mkdir(exist_ok=True)
try:
tiny_index_vecs = random_vectors(20, 10, dtype=np.float32, seed=12345)
dap.build_memory_index(
data=tiny_index_vecs,
distance_metric="l2",
index_directory=rel_dir,
graph_degree=16,
complexity=32,
num_threads=0,
)
index = dap.StaticMemoryIndex(
index_directory=rel_dir,
num_threads=0,
initial_search_complexity=32,
)

finally:
shutil.rmtree(rel_dir, ignore_errors=True)



class TestFilteredStaticMemoryIndex(unittest.TestCase):
def test_simple_scenario(self):
Expand Down

0 comments on commit 35f8cf7

Please sign in to comment.