Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Soft deprecation of load methods requiring metadata parameters + Fix TypedIndex constructor #112

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
104 changes: 67 additions & 37 deletions cpp/src/TypedIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,17 @@ class TypedIndex : public Index {
*
* This constructor is only used to load a V0-type index from file.
*/
TypedIndex(const std::string &indexFilename, const SpaceType space,
const int dimensions, bool searchOnly = false)
[[deprecated("Use the TypedIndex constructor without metadata parameters "
"instead.")]] TypedIndex(const std::string &indexFilename,
const SpaceType space,
const int dimensions,
bool searchOnly = false)
: TypedIndex(space, dimensions, /* M */ 12, /* efConstruction */ 200,
/* randomSeed */ 1, /* maxElements */ 1,
/* enableOrderPreservingTransform */ false) {
auto inputStream = std::make_shared<FileInputStream>(indexFilename);
algorithmImpl = std::make_unique<hnswlib::HierarchicalNSW<dist_t, data_t>>(
spaceImpl.get(), indexFilename, 0, searchOnly);
spaceImpl.get(), inputStream, 0, searchOnly);
Comment on lines +155 to +157
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constructor for HierarchicalNSW is

HierarchicalNSW(Space<dist_t, data_t> *s,
                  std::shared_ptr<InputStream> inputStream,
                  size_t max_elements = 0, bool search_only = false)

currentLabel = algorithmImpl->cur_element_count;
}

Expand All @@ -160,8 +164,11 @@ class TypedIndex : public Index {
*
* This constructor is only used to load a V0-type index from a stream.
*/
TypedIndex(std::shared_ptr<InputStream> inputStream, const SpaceType space,
const int dimensions, bool searchOnly = false)
[[deprecated(
"Use the TypedIndex constructor without metadata parameters "
"instead.")]] TypedIndex(std::shared_ptr<InputStream> inputStream,
const SpaceType space, const int dimensions,
bool searchOnly = false)
: TypedIndex(space, dimensions, /* M */ 12, /* efConstruction */ 200,
/* randomSeed */ 1, /* maxElements */ 1,
/* enableOrderPreservingTransform */ false) {
Expand All @@ -187,11 +194,11 @@ class TypedIndex : public Index {
currentLabel = algorithmImpl->cur_element_count;
}

int getNumDimensions() const { return dimensions; }
int getNumDimensions() const override { return dimensions; }

SpaceType getSpace() const { return space; }
SpaceType getSpace() const override { return space; }

std::string getSpaceName() const {
std::string getSpaceName() const override {
// TODO: Use magic_enum?
switch (space) {
case SpaceType::Euclidean:
Expand All @@ -205,35 +212,38 @@ class TypedIndex : public Index {
}
}

StorageDataType getStorageDataType() const {
StorageDataType getStorageDataType() const override {
return storageDataType<data_t>();
}

std::string getStorageDataTypeName() const {
std::string getStorageDataTypeName() const override {
return storageDataTypeName<data_t>();
}

void setEF(size_t ef) {
void setEF(size_t ef) override {
defaultEF = ef;
if (algorithmImpl)
algorithmImpl->ef_ = ef;
}

void setNumThreads(int numThreads) { numThreadsDefault = numThreads; }
void setNumThreads(int numThreads) override {
numThreadsDefault = numThreads;
}

void loadIndex(const std::string &pathToIndex, bool searchOnly = false) {
void loadIndex(const std::string &pathToIndex,
bool searchOnly = false) override {
throw std::runtime_error("Not implemented.");
}

void loadIndex(std::shared_ptr<InputStream> inputStream,
bool searchOnly = false) {
bool searchOnly = false) override {
throw std::runtime_error("Not implemented.");
}

/**
* Save this index to the provided file path on disk.
*/
void saveIndex(const std::string &pathToIndex) {
void saveIndex(const std::string &pathToIndex) override {
algorithmImpl->saveIndex(pathToIndex);
saveIndex(std::make_shared<FileOutputStream>(pathToIndex));
}
Expand All @@ -243,14 +253,14 @@ class TypedIndex : public Index {
* The bytes written to the given output stream can be passed to the
* TypedIndex constructor to reload this index.
*/
void saveIndex(std::shared_ptr<OutputStream> outputStream) {
void saveIndex(std::shared_ptr<OutputStream> outputStream) override {
metadata->setMaxNorm(max_norm);
metadata->setUseOrderPreservingTransform(useOrderPreservingTransform);
metadata->serializeToStream(outputStream);
algorithmImpl->saveIndex(outputStream);
}

float getDistance(std::vector<float> _a, std::vector<float> _b) {
float getDistance(std::vector<float> _a, std::vector<float> _b) override {
if ((int)_a.size() != dimensions || (int)_b.size() != dimensions) {
throw std::runtime_error("Index has " + std::to_string(dimensions) +
" dimensions, but received vectors of size: " +
Expand Down Expand Up @@ -285,7 +295,7 @@ class TypedIndex : public Index {
}

hnswlib::labeltype addItem(std::vector<float> vector,
std::optional<hnswlib::labeltype> id) {
std::optional<hnswlib::labeltype> id) override {
std::vector<size_t> ids;

if (id) {
Expand All @@ -297,13 +307,15 @@ class TypedIndex : public Index {

std::vector<hnswlib::labeltype>
addItems(const std::vector<std::vector<float>> vectors,
std::vector<hnswlib::labeltype> ids = {}, int numThreads = -1) {
std::vector<hnswlib::labeltype> ids = {},
int numThreads = -1) override {
return addItems(vectorsToNDArray(vectors), ids, numThreads);
}

std::vector<hnswlib::labeltype>
addItems(NDArray<float, 2> floatInput,
std::vector<hnswlib::labeltype> ids = {}, int numThreads = -1) {
std::vector<hnswlib::labeltype> ids = {},
int numThreads = -1) override {
if (numThreads <= 0)
numThreads = numThreadsDefault;

Expand Down Expand Up @@ -477,13 +489,13 @@ class TypedIndex : public Index {
return algorithmImpl->getDataByLabel(id);
}

std::vector<float> getVector(hnswlib::labeltype id) {
std::vector<float> getVector(hnswlib::labeltype id) override {
std::vector<data_t> rawData = getRawVector(id);
NDArray<data_t, 2> output(rawData.data(), {1, (int)dimensions});
return dataTypeToFloat<data_t, scalefactor>(output).data;
}

NDArray<float, 2> getVectors(std::vector<hnswlib::labeltype> ids) {
NDArray<float, 2> getVectors(std::vector<hnswlib::labeltype> ids) override {
NDArray<float, 2> output = NDArray<float, 2>({(int)ids.size(), dimensions});

for (unsigned long i = 0; i < ids.size(); i++) {
Expand All @@ -495,7 +507,7 @@ class TypedIndex : public Index {
return output;
}

std::vector<hnswlib::labeltype> getIDs() const {
std::vector<hnswlib::labeltype> getIDs() const override {
std::vector<hnswlib::labeltype> ids;
ids.reserve(algorithmImpl->label_lookup_.size());

Expand All @@ -506,22 +518,24 @@ class TypedIndex : public Index {
return ids;
}

long long getIDsCount() const { return algorithmImpl->label_lookup_.size(); }
long long getIDsCount() const override {
return algorithmImpl->label_lookup_.size();
}

const std::unordered_map<hnswlib::labeltype, hnswlib::tableint> &
getIDsMap() const {
getIDsMap() const override {
return algorithmImpl->label_lookup_;
}

std::tuple<NDArray<hnswlib::labeltype, 2>, NDArray<dist_t, 2>>
query(std::vector<std::vector<float>> floatQueryVectors, int k = 1,
int numThreads = -1, long queryEf = -1) {
int numThreads = -1, long queryEf = -1) override {
return query(vectorsToNDArray(floatQueryVectors), k, numThreads, queryEf);
}

std::tuple<NDArray<hnswlib::labeltype, 2>, NDArray<dist_t, 2>>
query(NDArray<float, 2> floatQueryVectors, int k = 1, int numThreads = -1,
long queryEf = -1) {
long queryEf = -1) override {
if (queryEf > 0 && queryEf < k) {
throw std::runtime_error("queryEf must be equal to or greater than the "
"requested number of neighbors");
Expand Down Expand Up @@ -636,7 +650,8 @@ class TypedIndex : public Index {
}

std::tuple<std::vector<hnswlib::labeltype>, std::vector<float>>
query(std::vector<float> floatQueryVector, int k = 1, long queryEf = -1) {
query(std::vector<float> floatQueryVector, int k = 1,
long queryEf = -1) override {
if (queryEf > 0 && queryEf < k) {
throw std::runtime_error("queryEf must be equal to or greater than the "
"requested number of neighbors");
Expand Down Expand Up @@ -710,32 +725,40 @@ class TypedIndex : public Index {
return {labels, distances};
}

void markDeleted(hnswlib::labeltype label) {
void markDeleted(hnswlib::labeltype label) override {
algorithmImpl->markDelete(label);
}

void unmarkDeleted(hnswlib::labeltype label) {
void unmarkDeleted(hnswlib::labeltype label) override {
algorithmImpl->unmarkDelete(label);
}

void resizeIndex(size_t new_size) { algorithmImpl->resizeIndex(new_size); }
void resizeIndex(size_t new_size) override {
algorithmImpl->resizeIndex(new_size);
}

size_t getMaxElements() const { return algorithmImpl->max_elements_; }
size_t getMaxElements() const override {
return algorithmImpl->max_elements_;
}

size_t getNumElements() const { return algorithmImpl->cur_element_count; }
size_t getNumElements() const override {
return algorithmImpl->cur_element_count;
}

int getEF() const {
int getEF() const override {
if (algorithmImpl)
return algorithmImpl->ef_;
else
return defaultEF;
}

int getNumThreads() { return numThreadsDefault; }
int getNumThreads() override { return numThreadsDefault; }

size_t getEfConstruction() const { return algorithmImpl->ef_construction_; }
size_t getEfConstruction() const override {
return algorithmImpl->ef_construction_;
}

size_t getM() const { return algorithmImpl->M_; }
size_t getM() const override { return algorithmImpl->M_; }
};

std::unique_ptr<Index>
Expand Down Expand Up @@ -782,3 +805,10 @@ loadTypedIndexFromStream(std::shared_ptr<InputStream> inputStream) {
return loadTypedIndexFromMetadata(
voyager::Metadata::loadFromStream(inputStream), inputStream);
}

std::unique_ptr<Index>
loadTypedIndexFromFile(const std::string &indexFilename) {
auto inputStream = std::make_shared<FileInputStream>(indexFilename);
return loadTypedIndexFromMetadata(
voyager::Metadata::loadFromStream(inputStream), inputStream);
}
7 changes: 4 additions & 3 deletions docs/java/allclasses-index.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) on Tue Dec 17 13:17:12 EST 2024 -->
<!-- Generated by javadoc (17) on Tue Feb 25 13:08:03 EST 2025 -->
<title>All Classes and Interfaces (voyager 2.1.0 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="dc.created" content="2024-12-17">
<meta name="dc.created" content="2025-02-25">
<meta name="description" content="class index">
<meta name="generator" content="javadoc/AllClassesIndexWriter">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
Expand Down Expand Up @@ -37,6 +37,7 @@
<li>Class</li>
<li>Use</li>
<li><a href="overview-tree.html">Tree</a></li>
<li><a href="deprecated-list.html">Deprecated</a></li>
<li><a href="index-all.html">Index</a></li>
<li><a href="help-doc.html#all-classes">Help</a></li>
</ul>
Expand Down Expand Up @@ -104,7 +105,7 @@ <h1 title="All Classes and Interfaces" class="title">All Classes and Interfaces<
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2024. All rights reserved.</small></p>
<p class="legal-copy"><small>Copyright &#169; 2025. All rights reserved.</small></p>
</footer>
</div>
</div>
Expand Down
7 changes: 4 additions & 3 deletions docs/java/allpackages-index.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) on Tue Dec 17 13:17:12 EST 2024 -->
<!-- Generated by javadoc (17) on Tue Feb 25 13:08:03 EST 2025 -->
<title>All Packages (voyager 2.1.0 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="dc.created" content="2024-12-17">
<meta name="dc.created" content="2025-02-25">
<meta name="description" content="package index">
<meta name="generator" content="javadoc/AllPackagesIndexWriter">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
Expand Down Expand Up @@ -33,6 +33,7 @@
<li>Class</li>
<li>Use</li>
<li><a href="overview-tree.html">Tree</a></li>
<li><a href="deprecated-list.html">Deprecated</a></li>
<li><a href="index-all.html">Index</a></li>
<li><a href="help-doc.html#all-packages">Help</a></li>
</ul>
Expand Down Expand Up @@ -72,7 +73,7 @@ <h1 title="All&amp;nbsp;Packages" class="title">All&nbsp;Packages</h1>
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2024. All rights reserved.</small></p>
<p class="legal-copy"><small>Copyright &#169; 2025. All rights reserved.</small></p>
</footer>
</div>
</div>
Expand Down
7 changes: 4 additions & 3 deletions docs/java/com/spotify/voyager/jni/Index.QueryResults.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) on Tue Dec 17 13:17:12 EST 2024 -->
<!-- Generated by javadoc (17) on Tue Feb 25 13:08:03 EST 2025 -->
<title>Index.QueryResults (voyager 2.1.0 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="dc.created" content="2024-12-17">
<meta name="dc.created" content="2025-02-25">
<meta name="description" content="declaration: package: com.spotify.voyager.jni, class: Index, class: QueryResults">
<meta name="generator" content="javadoc/ClassWriterImpl">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
Expand Down Expand Up @@ -37,6 +37,7 @@
<li class="nav-bar-cell1-rev">Class</li>
<li><a href="class-use/Index.QueryResults.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html#class">Help</a></li>
</ul>
Expand Down Expand Up @@ -253,7 +254,7 @@ <h3>getDistances</h3>
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2024. All rights reserved.</small></p>
<p class="legal-copy"><small>Copyright &#169; 2025. All rights reserved.</small></p>
</footer>
</div>
</div>
Expand Down
7 changes: 4 additions & 3 deletions docs/java/com/spotify/voyager/jni/Index.SpaceType.html
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) on Tue Dec 17 13:17:12 EST 2024 -->
<!-- Generated by javadoc (17) on Tue Feb 25 13:08:03 EST 2025 -->
<title>Index.SpaceType (voyager 2.1.0 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="dc.created" content="2024-12-17">
<meta name="dc.created" content="2025-02-25">
<meta name="description" content="declaration: package: com.spotify.voyager.jni, class: Index, enum: SpaceType">
<meta name="generator" content="javadoc/ClassWriterImpl">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
Expand Down Expand Up @@ -37,6 +37,7 @@
<li class="nav-bar-cell1-rev">Class</li>
<li><a href="class-use/Index.SpaceType.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html#class">Help</a></li>
</ul>
Expand Down Expand Up @@ -236,7 +237,7 @@ <h3>valueOf</h3>
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2024. All rights reserved.</small></p>
<p class="legal-copy"><small>Copyright &#169; 2025. All rights reserved.</small></p>
</footer>
</div>
</div>
Expand Down
Loading
Loading