Skip to content

Add cpp examples #435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Test
timeout-minutes: 15
run: |
python -m unittest discover -v --start-directory examples --pattern "example*.py"
python -m unittest discover -v --start-directory examples/python --pattern "example*.py"
python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py"

test_cpp:
Expand Down Expand Up @@ -61,6 +61,12 @@ jobs:
if [ "$RUNNER_OS" == "Windows" ]; then
cp ./Release/* ./
fi
./example_search
./example_filter
./example_replace_deleted
./example_mt_search
./example_mt_filter
./example_mt_replace_deleted
./searchKnnCloserFirst_test
./searchKnnWithFilter_test
./multiThreadLoad_test
Expand Down
2 changes: 1 addition & 1 deletion ALGO_PARAMS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ ef_construction leads to longer construction, but better index quality. At some
not improve the quality of the index. One way to check if the selection of ef_construction was ok is to measure a recall
for M nearest neighbor search when ```ef``` =```ef_construction```: if the recall is lower than 0.9, than there is room
for improvement.
* ```num_elements``` - defines the maximum number of elements in the index. The index can be extened by saving/loading(load_index
* ```num_elements``` - defines the maximum number of elements in the index. The index can be extended by saving/loading (load_index
function has a parameter which defines the new maximum number of elements).
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,26 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize" )
endif()

# examples
add_executable(example_search examples/cpp/example_search.cpp)
target_link_libraries(example_search hnswlib)

add_executable(example_filter examples/cpp/example_filter.cpp)
target_link_libraries(example_filter hnswlib)

add_executable(example_replace_deleted examples/cpp/example_replace_deleted.cpp)
target_link_libraries(example_replace_deleted hnswlib)

add_executable(example_mt_search examples/cpp/example_mt_search.cpp)
target_link_libraries(example_mt_search hnswlib)

add_executable(example_mt_filter examples/cpp/example_mt_filter.cpp)
target_link_libraries(example_mt_filter hnswlib)

add_executable(example_mt_replace_deleted examples/cpp/example_mt_replace_deleted.cpp)
target_link_libraries(example_mt_replace_deleted hnswlib)

# tests
add_executable(test_updates tests/cpp/updates_test.cpp)
target_link_libraries(test_updates hnswlib)

Expand Down
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,12 @@ Properties of `hnswlib.Index` that support reading and writing:


#### Python bindings examples
[See more examples here](examples/EXAMPLES.md)
[See more examples here](examples/python/EXAMPLES.md):
* creating index, inserting elements, searching, serialization/deserialization
* filtering during the search
* delete elements and reusing the memory of the deleted elements when new elements are being added

An example of creating index, inserting elements, searching and pickle serialization:
```python
import hnswlib
import numpy as np
Expand Down Expand Up @@ -230,6 +235,14 @@ labels, distances = p.knn_query(data, k=1)
print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n")
```

#### C++ examples
[See examples here](examples/cpp/EXAMPLES.md):
* creating index, inserting elements, searching, serialization/deserialization
* filtering during the search
* delete elements and reusing the memory of the deleted elements when new elements are being added
* multithreaded usage


### Bindings installation

You can install from sources:
Expand Down
185 changes: 185 additions & 0 deletions examples/cpp/EXAMPLES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# C++ examples

Creating index, inserting elements, searching and serialization
```cpp
#include "../../hnswlib/hnswlib.h"


int main() {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff

// Initing index
hnswlib::L2Space space(dim);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);

// Generate random data
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
float* data = new float[dim * max_elements];
for (int i = 0; i < dim * max_elements; i++) {
data[i] = distrib_real(rng);
}

// Add data to index
for (int i = 0; i < max_elements; i++) {
alg_hnsw->addPoint(data + i * dim, i);
}

// Query the elements for themselves and measure recall
float correct = 0;
for (int i = 0; i < max_elements; i++) {
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1);
hnswlib::labeltype label = result.top().second;
if (label == i) correct++;
}
float recall = correct / max_elements;
std::cout << "Recall: " << recall << "\n";

// Serialize index
std::string hnsw_path = "hnsw.bin";
alg_hnsw->saveIndex(hnsw_path);
delete alg_hnsw;

// Deserialize index and check recall
alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, hnsw_path);
correct = 0;
for (int i = 0; i < max_elements; i++) {
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1);
hnswlib::labeltype label = result.top().second;
if (label == i) correct++;
}
recall = (float)correct / max_elements;
std::cout << "Recall of deserialized index: " << recall << "\n";

delete[] data;
delete alg_hnsw;
return 0;
}
```

An example with a filter during the search:
```cpp
#include "../../hnswlib/hnswlib.h"


// Filter that allows labels divisible by divisor
class PickDivisibleIds: public hnswlib::BaseFilterFunctor {
unsigned int divisor = 1;
public:
PickDivisibleIds(unsigned int divisor): divisor(divisor) {
assert(divisor != 0);
}
bool operator()(hnswlib::labeltype label_id) {
return label_id % divisor == 0;
}
};


int main() {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff

// Initing index
hnswlib::L2Space space(dim);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);

// Generate random data
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
float* data = new float[dim * max_elements];
for (int i = 0; i < dim * max_elements; i++) {
data[i] = distrib_real(rng);
}

// Add data to index
for (int i = 0; i < max_elements; i++) {
alg_hnsw->addPoint(data + i * dim, i);
}

// Create filter that allows only even labels
PickDivisibleIds pickIdsDivisibleByTwo(2);

// Query the elements for themselves with filter and check returned labels
int k = 10;
for (int i = 0; i < max_elements; i++) {
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo);
for (auto item: result) {
if (item.second % 2 == 1) std::cout << "Error: found odd label\n";
}
}

delete[] data;
delete alg_hnsw;
return 0;
}
```

An example with reusing the memory of the deleted elements when new elements are being added (via `allow_replace_deleted` flag):
```cpp
#include "../../hnswlib/hnswlib.h"


int main() {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff

// Initing index
hnswlib::L2Space space(dim);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 100, true);

// Generate random data
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
float* data = new float[dim * max_elements];
for (int i = 0; i < dim * max_elements; i++) {
data[i] = distrib_real(rng);
}

// Add data to index
for (int i = 0; i < max_elements; i++) {
alg_hnsw->addPoint(data + i * dim, i);
}

// Mark first half of elements as deleted
int num_deleted = max_elements / 2;
for (int i = 0; i < num_deleted; i++) {
alg_hnsw->markDelete(i);
}

float* add_data = new float[dim * num_deleted];
for (int i = 0; i < dim * num_deleted; i++) {
add_data[i] = distrib_real(rng);
}

// Replace deleted data with new elements
// Maximum number of elements is reached therefore we cannot add new items,
// but we can replace the deleted ones by using replace_deleted=true
for (int i = 0; i < num_deleted; i++) {
int label = max_elements + i;
alg_hnsw->addPoint(add_data + i * dim, label, true);
}

delete[] data;
delete[] add_data;
delete alg_hnsw;
return 0;
}
```

Multithreaded examples:
* Creating index, inserting elements, searching [example_mt_search.cpp](example_mt_search.cpp)
* Filtering during the search [example_mt_filter.cpp](example_mt_filter.cpp)
* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp)
57 changes: 57 additions & 0 deletions examples/cpp/example_filter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "../../hnswlib/hnswlib.h"


// Filter that allows labels divisible by divisor
class PickDivisibleIds: public hnswlib::BaseFilterFunctor {
unsigned int divisor = 1;
public:
PickDivisibleIds(unsigned int divisor): divisor(divisor) {
assert(divisor != 0);
}
bool operator()(hnswlib::labeltype label_id) {
return label_id % divisor == 0;
}
};


int main() {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff

// Initing index
hnswlib::L2Space space(dim);
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);

// Generate random data
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
float* data = new float[dim * max_elements];
for (int i = 0; i < dim * max_elements; i++) {
data[i] = distrib_real(rng);
}

// Add data to index
for (int i = 0; i < max_elements; i++) {
alg_hnsw->addPoint(data + i * dim, i);
}

// Create filter that allows only even labels
PickDivisibleIds pickIdsDivisibleByTwo(2);

// Query the elements for themselves with filter and check returned labels
int k = 10;
for (int i = 0; i < max_elements; i++) {
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo);
for (auto item: result) {
if (item.second % 2 == 1) std::cout << "Error: found odd label\n";
}
}

delete[] data;
delete alg_hnsw;
return 0;
}
Loading