-
Notifications
You must be signed in to change notification settings - Fork 722
Add cpp examples #435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Add cpp examples #435
Changes from 4 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# C++ examples | ||
|
||
Creating index, inserting elements, searching and serialization | ||
```cpp | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Query the elements for themselves and measure recall | ||
float correct = 0; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1); | ||
hnswlib::labeltype label = result.top().second; | ||
if (label == i) correct++; | ||
} | ||
float recall = correct / max_elements; | ||
std::cout << "Recall: " << recall << "\n"; | ||
|
||
// Serialize index | ||
std::string hnsw_path = "hnsw.bin"; | ||
alg_hnsw->saveIndex(hnsw_path); | ||
delete alg_hnsw; | ||
|
||
// Deserialize index and check recall | ||
alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, hnsw_path); | ||
correct = 0; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1); | ||
hnswlib::labeltype label = result.top().second; | ||
if (label == i) correct++; | ||
} | ||
recall = (float)correct / max_elements; | ||
std::cout << "Recall of deserialized index: " << recall << "\n"; | ||
|
||
delete[] data; | ||
delete alg_hnsw; | ||
return 0; | ||
} | ||
``` | ||
|
||
An example with a filter during the search: | ||
dyashuni marked this conversation as resolved.
Show resolved
Hide resolved
|
||
```cpp | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
// Filter that allows labels divisible by divisor | ||
class PickDivisibleIds: public hnswlib::BaseFilterFunctor { | ||
unsigned int divisor = 1; | ||
public: | ||
PickDivisibleIds(unsigned int divisor): divisor(divisor) { | ||
assert(divisor != 0); | ||
} | ||
bool operator()(hnswlib::labeltype label_id) { | ||
return label_id % divisor == 0; | ||
} | ||
}; | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Create filter that allows only even labels | ||
PickDivisibleIds pickIdsDivisibleByTwo(2); | ||
|
||
// Query the elements for themselves with filter and check returned labels | ||
int k = 10; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo); | ||
for (auto item: result) { | ||
if (item.second % 2 == 1) std::cout << "Error: found odd label\n"; | ||
} | ||
} | ||
|
||
delete[] data; | ||
delete alg_hnsw; | ||
return 0; | ||
} | ||
``` | ||
|
||
An example with reusing the memory of the deleted elements when new elements are being added (via `allow_replace_deleted` flag): | ||
```cpp | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 100, true); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Mark first half of elements as deleted | ||
int num_deleted = max_elements / 2; | ||
for (int i = 0; i < num_deleted; i++) { | ||
alg_hnsw->markDelete(i); | ||
} | ||
|
||
float* add_data = new float[dim * num_deleted]; | ||
for (int i = 0; i < dim * num_deleted; i++) { | ||
add_data[i] = distrib_real(rng); | ||
} | ||
|
||
// Replace deleted data with new elements | ||
// Maximum number of elements is reached therefore we cannot add new items, | ||
// but we can replace the deleted ones by using replace_deleted=true | ||
for (int i = 0; i < num_deleted; i++) { | ||
int label = max_elements + i; | ||
alg_hnsw->addPoint(add_data + i * dim, label, true); | ||
} | ||
|
||
delete[] data; | ||
delete[] add_data; | ||
delete alg_hnsw; | ||
return 0; | ||
} | ||
``` | ||
|
||
Multithreaded examples: | ||
* Creating index, inserting elements, searching [example_mt_search.cpp](example_mt_search.cpp) | ||
* Filtering during the search [example_mt_filter.cpp](example_mt_filter.cpp) | ||
* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
// Filter that allows labels divisible by divisor | ||
class PickDivisibleIds: public hnswlib::BaseFilterFunctor { | ||
unsigned int divisor = 1; | ||
public: | ||
PickDivisibleIds(unsigned int divisor): divisor(divisor) { | ||
assert(divisor != 0); | ||
} | ||
bool operator()(hnswlib::labeltype label_id) { | ||
return label_id % divisor == 0; | ||
} | ||
}; | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Create filter that allows only even labels | ||
PickDivisibleIds pickIdsDivisibleByTwo(2); | ||
|
||
// Query the elements for themselves with filter and check returned labels | ||
int k = 10; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo); | ||
for (auto item: result) { | ||
if (item.second % 2 == 1) std::cout << "Error: found odd label\n"; | ||
} | ||
} | ||
|
||
delete[] data; | ||
delete alg_hnsw; | ||
return 0; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.