Merge pull request #390 from unum-cloud/main-dev

Concurrency & Docs
unum-cloud · Apr 10, 2024 · 0189fbf · 0189fbf
2 parents 7917489 + f4df7ac
commit 0189fbf
Show file tree

Hide file tree

Showing 8 changed files with 64 additions and 17 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -778,6 +778,10 @@ jobs:
           npm install -g jsdoc typedoc
       - name: Install USearch from PyPi
         run: pip install usearch
+      - name: Build CJS JS
+        run: |
+          npm ci --ignore-scripts
+          npm run build-js
       - name: Build documentation
         run: cd docs && doxygen conf.dox && make html
       - name: Copy assets

diff --git a/README.md b/README.md
@@ -94,9 +94,10 @@ USearch is compact and broadly compatible without sacrificing performance, prima
 | Supported metrics ²                          |         9 fixed metrics |               any metric |              extendible |
 | Supported languages ³                        |             C++, Python |             10 languages |                portable |
 | Supported ID types ⁴                         |          32-bit, 64-bit |   32-bit, 40-bit, 64-bit |               efficient |
-| Required dependencies ⁵                      |            BLAS, OpenMP |                        - |            light-weight |
-| Bindings ⁶                                   |                    SWIG |                   Native |             low-latency |
-| Python binding size ⁷                        | [~ 10 MB][faiss-weight] | [< 1 MB][usearch-weight] |              deployable |
+| Filtering ⁵                                  |               ban-lists |           any predicates |              composable |
+| Required dependencies ⁶                      |            BLAS, OpenMP |                        - |            light-weight |
+| Bindings ⁷                                   |                    SWIG |                   Native |             low-latency |
+| Python binding size ⁸                        | [~ 10 MB][faiss-weight] | [< 1 MB][usearch-weight] |              deployable |
 
 [sloc]: https://en.wikipedia.org/wiki/Source_lines_of_code
 [faiss-weight]: https://pypi.org/project/faiss-cpu/#files
@@ -107,9 +108,10 @@ USearch is compact and broadly compatible without sacrificing performance, prima
 > ² User-defined metrics allow you to customize your search for various applications, from GIS to creating custom metrics for composite embeddings from multiple AI models or hybrid full-text and semantic search.
 > ³ With USearch, you can reuse the same preconstructed index in various programming languages.
 > ⁴ The 40-bit integer allows you to store 4B+ vectors without allocating 8 bytes for every neighbor reference in the proximity graph.
-> ⁵ Lack of obligatory dependencies makes USearch much more portable.
-> ⁶ Native bindings introduce lower call latencies than more straightforward approaches.
-> ⁷ Lighter bindings make downloads and deployments faster.
+> ⁵ With USearch the index can be combined with arbitrary external containers, like Bloom filters or third-party databases, to filter out irrelevant keys during index traversal.
+> ⁶ Lack of obligatory dependencies makes USearch much more portable.
+> ⁷ Native bindings introduce lower call latencies than more straightforward approaches.
+> ⁸ Lighter bindings make downloads and deployments faster.
 
 [intel-benchmarks]: https://www.unum.cloud/blog/2023-11-07-scaling-vector-search-with-intel
 
@@ -192,7 +194,7 @@ When compared to FAISS's `IndexFlatL2` in Google Colab, __[USearch may offer up
 - `faiss.IndexFlatL2`: __55.3 ms__.
 - `usearch.index.search`: __2.54 ms__.
 
-## User-Defined Functions
+## User-Defined Metrics
 
 While most vector search packages concentrate on just two metrics, "Inner Product distance" and "Euclidean distance", USearch allows arbitrary user-defined metrics.
 This flexibility allows you to customize your search for various applications, from computing geospatial coordinates with the rare [Haversine][haversine] distance to creating custom metrics for composite embeddings from multiple AI models, like joint image-text embeddings.
@@ -228,6 +230,23 @@ So you can apply it in [obscure][obscure] applications, like searching for simil
 [cppyy]: https://cppyy.readthedocs.io/en/latest/
 [peachpy]: https://github.com/Maratyszcza/PeachPy
 
+## Filtering and Predicate Functions
+
+Sometimes you may want to cross-reference search-results against some external database or filter them based on some criteria.
+In most engines, you'd have to manually perform paging requests, successively filtering the results.
+In USearch you can simply pass a predicate function to the search method, which will be applied directly during graph traversal.
+In Rust that would look like this:
+
+```rust
+let is_odd = |key: Key| key % 2 == 1;
+let query = vec![0.2, 0.1, 0.2, 0.1, 0.3];
+let results = index.filtered_search(&query, 10, is_odd).unwrap();
+assert!(
+    results.keys.iter().all(|&key| key % 2 == 1),
+    "All keys must be odd"
+);
+```
+
 ## Memory Efficiency, Downcasting, and Quantization
 
 Training a quantization model and dimension-reduction is a common approach to accelerate vector search.

diff --git a/c/README.md b/c/README.md
@@ -42,7 +42,7 @@ int main() {
 
     // Search:
     usearch_key_t found_keys[10];
-    float found_distances[10];
+    usearch_distance_t found_distances[10];
     size_t found_count = usearch_search(
         index, &vector[0], usearch_scalar_f32_k, 10, 
         &found_keys[0], &found_distances[0], &error);
@@ -113,7 +113,8 @@ simsimd_distance_t callback(void const* a, void const* b, void* state) {
     // Your custom metric implementation here
 }
 
-usearch_change_metric(index, callback, NULL, usearch_metric_unknown_k, &error);
+void callback_state = NULL;
+usearch_change_metric(index, callback, callback_state, usearch_metric_unknown_k, &error);
 ```
 
 You can always revert back to one of the native metrics by calling:
@@ -133,8 +134,10 @@ int is_odd(usearch_key_t key, void* state) {
     return key % 2;
 }
 
+usearch_key_t found_keys[10];
+usearch_distance_t found_distances[10];
 usearch_filtered_search(
-    index, &vector[0], usearch_scalar_f32_k, 10, 
+    index, &query[0], usearch_scalar_f32_k, 10, 
     &is_odd, NULL, // no state needed for this callback
     &found_keys[0], &found_distances[0], &error);
 ```

diff --git a/cpp/README.md b/cpp/README.md
@@ -71,8 +71,19 @@ Instead of spawning additional threads within USearch, we focus on the thread sa
 
 During initialization, we allocate enough temporary memory for all the cores on the machine.
 On the call, the user can supply the identifier of the current thread, making this library easy to integrate with OpenMP and similar tools.
+Here is how parallel indexing may look like, when dealing with the low-level engine:
 
-Moreover, you can take advantage of one of the provided "executors" to parallelize the search:
+```cpp
+std::size_t executor_threads = std::thread::hardware_concurrency() * 4;
+executor_default_t executor(executor_threads);
+
+index.reserve(index_limits_t {vectors.size(), executor.size()});
+executor.fixed(vectors.size(), [&](std::size_t thread, std::size_t task) {
+    index.add(task, vectors[task + 3].data(), index_update_config_t { .thread = thread });
+});
+```
+
+Aside from the `executor_default_t`, you can take advantage of one of the provided "executors" to parallelize the search:
 
 - `executor_openmp_t`, that would use OpenMP under the hood.
 - `executor_stl_t`, that will spawn `std::thread` instances.

diff --git a/cpp/test.cpp b/cpp/test.cpp
@@ -114,8 +114,9 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
         expect(std::equal(vector_second, vector_second + dimensions, vec_recovered_from_load.data()));
     }
 
-    // Try batch requests
-    executor_default_t executor;
+    // Try batch requests, heavily obersubscribing the CPU cores
+    std::size_t executor_threads = std::thread::hardware_concurrency() * 4;
+    executor_default_t executor(executor_threads);
     index.reserve({vectors.size(), executor.size()});
     executor.fixed(vectors.size() - 3, [&](std::size_t thread, std::size_t task) {
         if constexpr (punned_ak) {

diff --git a/docs/conf.py b/docs/conf.py
@@ -52,6 +52,9 @@
 breathe_projects = {"USearch": "../build/xml"}
 breathe_default_project = "USearch"
 
-js_language = "typescript"
-js_source_path = "../javascript/usearch.ts"
-jsdoc_config_path = "../javascript/tsconfig-cjs.json"
+# To switch to TypeScript, uncomment the following lines:
+#
+#   js_language = "typescript"
+#   js_source_path = "../javascript/usearch.ts"
+#   jsdoc_config_path = "../javascript/tsconfig-cjs.json"
+js_source_path = "../javascript/dist/cjs/usearch.js"
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
@@ -742,6 +742,11 @@ class index_dense_gt {
             unique_lock_t lock(slot_lookup_mutex_);
             slot_lookup_.reserve(limits.members);
             vectors_lookup_.resize(limits.members);
+
+            // During reserve, no insertions may be happening, so we can safely overwrite the whole collection.
+            std::unique_lock<std::mutex> available_threads_lock(available_threads_mutex_);
+            available_threads_.resize(limits.threads());
+            std::iota(available_threads_.begin(), available_threads_.end(), 0ul);
         }
         return typed_->reserve(limits);
     }

diff --git a/rust/README.md b/rust/README.md
@@ -124,8 +124,9 @@ let weighted_distance = Box::new(move |a: *const f32, b: *const f32| unsafe {
 
     let image_similarity = f32::cosine(a_slice[0..image_dimensions], b_slice[0..image_dimensions]);
     let text_similarity = f32::cosine(a_slice[image_dimensions..], b_slice[image_dimensions..]);
+    let similarity = image_weights * image_similarity + text_weights * text_similarity / (image_weights + text_weights);
 
-    (1.0 - image_similarity) * image_weights + (1.0 - text_similarity) * text_weights
+    1.0 - similarity
 });
 index.change_metric(weighted_distance);
 ```