From 5ace1387584cd12f0fc2fd2e118b11bb393e9f33 Mon Sep 17 00:00:00 2001
From: AleksandrKent <44079602+AleksandrKent@users.noreply.github.com>
Date: Tue, 8 Aug 2023 20:02:43 +0400
Subject: [PATCH 01/70] Improve: Index finalizer method

---
 python/usearch/index.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/usearch/index.py b/python/usearch/index.py
index c76611d7..d8cd5afb 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -829,6 +829,9 @@ def level_stats(self, level: int) -> _CompiledIndexStats:
         """
         return self._compiled.level_stats(level)
 
+    def __del__(self):
+        self.reset()
+
     def __repr__(self) -> str:
         f = "usearch.Index({} x {}, {}, expansion: {} & {}, {} vectors in {} levels)"
         return f.format(

From 962990e3b60aa14131be37e496b9a4d369a76361 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 9 Aug 2023 11:17:56 +0400
Subject: [PATCH 02/70] Make: Avoid OpenMP flags when not needed

---
 cpp/CMakeLists.txt | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index e73a45ef..7844e295 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -19,18 +19,24 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wno-conversion -Wno-unknown-pragmas")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmax-errors=1")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
 
+    if(${USEARCH_USE_OPENMP})
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+    endif()
+
 elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
     set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 -g")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fsanitize=leak -fsanitize=alignment -fsanitize=undefined")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wfatal-errors")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
 
+    if(${USEARCH_USE_OPENMP})
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+    endif()
+
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "NVIDIA" OR CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda")

From be4c87d128b7b6252bcdc7f7dfc37f75fd0f9059 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 9 Aug 2023 11:18:32 +0400
Subject: [PATCH 03/70] Make: Override Clang flags

---
 c/CMakeLists.txt | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
index 5315ec80..f673ada7 100644
--- a/c/CMakeLists.txt
+++ b/c/CMakeLists.txt
@@ -11,14 +11,20 @@ set(USEARCH_PUNNED_INCLUDE_DIRS
 # This article discusses a better way to allow building either static or shared libraries
 # https://alexreinking.com/blog/building-a-dual-shared-and-static-library-with-cmake.html
 if (${USEARCH_BUILD_STATIC})
-    set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -static-libstdc++")
-    add_library(usearch_c STATIC lib.cpp)
-else ()
-    add_library(usearch_c SHARED lib.cpp)
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -static-libstdc++")
+    elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -static")
+    endif()
+
+    add_library(usearch_c STATIC "${CMAKE_CURRENT_SOURCE_DIR}/lib.cpp")
+else()
+    add_library(usearch_c SHARED "${CMAKE_CURRENT_SOURCE_DIR}/lib.cpp")
 endif()
 
 set_target_properties(usearch_c PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(usearch_c PROPERTIES CXX_STANDARD 11)
+set_target_properties(usearch_c PROPERTIES C_STANDARD 99)
 
 target_include_directories(usearch_c PRIVATE ${USEARCH_PUNNED_INCLUDE_DIRS})
 set_target_properties(usearch_c PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
@@ -41,8 +47,7 @@ if (${USEARCH_LOOKUP_LABEL})
 endif()
 
 if(${USEARCH_BUILD_TEST})
-    add_executable(test_c test.c external_retreival_testutils.c)
+    add_executable(test_c "${CMAKE_CURRENT_SOURCE_DIR}/test.c")
     target_link_libraries(test_c usearch_c)
     set_target_properties(test_c PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
-
 endif()

From 4463f0d7c6bfcf8582c777fb3004204fd57d7fe2 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 9 Aug 2023 11:29:18 +0400
Subject: [PATCH 04/70] Make: GoLang tests

---
 .github/workflows/prerelease.yml | 18 +++++++++++++++++-
 c/Makefile                       | 18 +++++++++++-------
 docs/compilation.md              |  5 +++--
 golang/lib.go                    |  2 +-
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
index df06b94b..fe8fcfe1 100644
--- a/.github/workflows/prerelease.yml
+++ b/.github/workflows/prerelease.yml
@@ -122,6 +122,22 @@ jobs:
           toolchain: stable
           override: true
 
+  test_golang:
+    name: Test GoLang
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - run: git submodule update --init --recursive
+    - name: Set up Go
+      uses: actions/setup-go@v4
+      with:
+        go-version: '1.15'
+    - name: Build C library for cGo
+      run: |
+        make -C ./c libusearch_c.so
+        mv ./c/libusearch_c.so ./golang/libusearch_c.so
+        cd golang && ls && go test -v
+
   test_java:
     name: Test Java
     runs-on: ubuntu-latest
@@ -160,7 +176,7 @@ jobs:
         sudo apt install -y nodejs
         git clone https://github.com/emscripten-core/emsdk.git
         ./emsdk/emsdk install latest
-    - name: Build USearch by Emscripten
+    - name: Build USearch using Emscripten
       run: |
         ./emsdk/emsdk activate latest && source ./emsdk/emsdk_env.sh
         emcmake cmake -DUSEARCH_BUILD_BENCHMARK=0 -DUSEARCH_BUILD_WASM=1 -B ./build -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -s TOTAL_MEMORY=64MB" && emmake make -C ./build
diff --git a/c/Makefile b/c/Makefile
index 5973f0e7..c752edee 100644
--- a/c/Makefile
+++ b/c/Makefile
@@ -4,14 +4,18 @@ CXX = g++
 C_FLAGS = -std=c99
 CXX_FLAGS = -std=c++11
 
-CXX_FLAGS += -Wall -Wextra -Wno-conversion -Wno-unknown-pragmas -O3 -march=native -pedantic
+CXX_FLAGS += -Wall -Wextra -Wno-conversion -Wno-unknown-pragmas -pedantic
 
 HEADER_INCLUDES = -I.  -I ../include/  -I ../fp16/include/ -I ../robin-map/include/
 
-.PHONY: build
-build:
-	$(CXX) $(CXX_FLAGS) -o libusearch.so -O3 lib.cpp $(HEADER_INCLUDES) -shared -fPIC
+.PHONY: libusearch_c.a
+libusearch_c.a:
+	$(CXX) $(CXX_FLAGS) -o libusearch_c.a -O3 lib.cpp $(HEADER_INCLUDES) -static
 
-.PHONY: test
-test:
-	$(CC) $(C_FLAGS) test.c -L. -lusearch -Wl,-rpath,. -o test
+.PHONY: libusearch_c.so
+libusearch_c.so:
+	$(CXX) $(CXX_FLAGS) -o libusearch_c.so -O3 lib.cpp $(HEADER_INCLUDES) -shared -fPIC
+
+.PHONY: test_c
+test_c:
+	$(CC) $(C_FLAGS) test.c -L. -lusearch_c -Wl,-rpath,. -o test
diff --git a/docs/compilation.md b/docs/compilation.md
index 6d00aff0..51d3e24e 100644
--- a/docs/compilation.md
+++ b/docs/compilation.md
@@ -168,7 +168,7 @@ cmake -B ./build_release -DUSEARCH_BUILD_C=1 && make -C ./build_release -j
 Linux:
 
 ```sh
-g++ -std=c++11 -shared -fPIC c/lib.cpp -I ./include/  -I ./fp16/include/ -I ./robin-map/include/ -o libusearch.so
+g++ -std=c++11 -shared -fPIC c/lib.cpp -I ./include/  -I ./fp16/include/ -I ./robin-map/include/ -o libusearch_c.a
 ```
 
 
@@ -178,7 +178,8 @@ GoLang bindings are based on C.
 So one should first compile the C library, link it with GoLang, and only then run tests.
 
 ```sh
-cd golang && make -C ../c build && mv ../c/libusearch.so libusearch.so && go test -v
+make -C ./c libusearch_c.so && mv ./c/libusearch_c.so ./golang/libusearch_c.so 
+cd golang && go test -v ; cd ..
 ```
 
 ## Wolfram
diff --git a/golang/lib.go b/golang/lib.go
index 08890f91..39739e84 100644
--- a/golang/lib.go
+++ b/golang/lib.go
@@ -7,7 +7,7 @@ import (
 
 /*
 #cgo CFLAGS: -I${SRCDIR}/../c/
-#cgo LDFLAGS: -L./ -lusearch
+#cgo LDFLAGS: -L${SRCDIR}/. -Wl,-rpath,$SRCDIR/. -lusearch_c
 #include <usearch.h>
 #include <stdlib.h>
 */

From e89aa8e5a92cf9768f5855a1464ae4cfebfc0416 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 9 Aug 2023 18:48:19 +0400
Subject: [PATCH 05/70] Make: Drop Pandas dependency

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7215d0e4..f60903dc 100644
--- a/setup.py
+++ b/setup.py
@@ -94,7 +94,6 @@
     ext_modules=ext_modules,
     install_requires=[
         "numpy",
-        "pandas",
         "tqdm",
         'ucall; python_version >= "3.9"',
     ],

From 4b17ded156c7b514a3cdf6d04236a83adc64f132 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 9 Aug 2023 20:17:52 +0400
Subject: [PATCH 06/70] Improve: Multi-threading and error handling

---
 cpp/test.cpp                      |  5 +++++
 include/usearch/index.hpp         |  4 ++++
 include/usearch/index_plugins.hpp | 29 ++++++++++++++++++++---------
 python/lib.cpp                    |  3 +++
 python/scripts/test.py            | 16 +++++++++++++++-
 5 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 82075c02..0c2edd0b 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -39,6 +39,11 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     index.reserve(10);
     index.add(key_first, vector_first, args...);
 
+    if constexpr (punned_ak) {
+        auto result = index.add(key_first, vector_first, args...);
+        expect(!result);
+    }
+
     // Default approximate search
     key_t matched_labels[10] = {0};
     distance_t matched_distances[10] = {0};
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 3be878f0..39931d3e 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -324,6 +324,9 @@ class error_t {
         message_ = message;
         return *this;
     }
+
+    error_t(error_t const&) = delete;
+    error_t& operator=(error_t const&) = delete;
     error_t(error_t&& other) noexcept : message_(exchange(other.message_, nullptr)) {}
     error_t& operator=(error_t&& other) noexcept {
         std::swap(message_, other.message_);
@@ -331,6 +334,7 @@ class error_t {
     }
     explicit operator bool() const noexcept { return message_ != nullptr; }
     char const* what() const noexcept { return message_; }
+    char const* release() noexcept { return exchange(message_, nullptr); }
 
 #if defined(__cpp_exceptions) || defined(__EXCEPTIONS)
     ~error_t() noexcept(false) {
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 6d7e2b04..7eb5f3a8 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -429,6 +429,20 @@ inline f16_bits_t::f16_bits_t(i8_bits_t v) noexcept : f16_bits_t(float(v)) {}
 class executor_stl_t {
     std::size_t threads_count_{};
 
+    struct jthread_t {
+        std::thread native_;
+
+        jthread_t() = default;
+        jthread_t(jthread_t&&) = default;
+        jthread_t(jthread_t const&) = delete;
+        template <typename callable_at> jthread_t(callable_at&& func) : native_([=]() { func(); }) {}
+
+        ~jthread_t() {
+            if (native_.joinable())
+                native_.join();
+        }
+    };
+
   public:
     /**
      *  @param threads_count The number of threads to be used for parallel execution.
@@ -449,11 +463,12 @@ class executor_stl_t {
      */
     template <typename thread_aware_function_at>
     void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
-        std::vector<std::thread> threads_pool;
+        std::vector<jthread_t> threads_pool;
         std::size_t tasks_per_thread = tasks;
-        if (threads_count_ > 1) {
-            tasks_per_thread = (tasks / threads_count_) + ((tasks % threads_count_) != 0);
-            for (std::size_t thread_idx = 1; thread_idx < threads_count_; ++thread_idx) {
+        std::size_t threads_count = (std::min)(threads_count_, tasks);
+        if (threads_count > 1) {
+            tasks_per_thread = (tasks / threads_count) + ((tasks % threads_count) != 0);
+            for (std::size_t thread_idx = 1; thread_idx < threads_count; ++thread_idx) {
                 threads_pool.emplace_back([=]() {
                     for (std::size_t task_idx = thread_idx * tasks_per_thread;
                          task_idx < (std::min)(tasks, thread_idx * tasks_per_thread + tasks_per_thread); ++task_idx)
@@ -463,8 +478,6 @@ class executor_stl_t {
         }
         for (std::size_t task_idx = 0; task_idx < (std::min)(tasks, tasks_per_thread); ++task_idx)
             thread_aware_function(0, task_idx);
-        for (std::thread& thread : threads_pool)
-            thread.join();
     }
 
     /**
@@ -476,12 +489,10 @@ class executor_stl_t {
     void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept(false) {
         if (threads_count_ == 1)
             return thread_aware_function(0);
-        std::vector<std::thread> threads_pool;
+        std::vector<jthread_t> threads_pool;
         for (std::size_t thread_idx = 1; thread_idx < threads_count_; ++thread_idx)
             threads_pool.emplace_back([=]() { thread_aware_function(thread_idx); });
         thread_aware_function(0);
-        for (std::thread& thread : threads_pool)
-            thread.join();
     }
 };
 
diff --git a/python/lib.cpp b/python/lib.cpp
index b238d65d..b06a9cd5 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -12,7 +12,10 @@
  *
  *  @copyright Copyright (c) 2023
  */
+#if !defined(__cpp_exceptions)
 #define __cpp_exceptions 1
+#endif
+
 #include <limits> // `std::numeric_limits`
 #include <thread> // `std::thread`
 
diff --git a/python/scripts/test.py b/python/scripts/test.py
index 9ec0ea9c..06437512 100644
--- a/python/scripts/test.py
+++ b/python/scripts/test.py
@@ -108,7 +108,7 @@ def test_exact_search(rows: int, cols: int):
 @pytest.mark.parametrize("index_type", index_types)
 @pytest.mark.parametrize("numpy_type", numpy_types)
 @pytest.mark.parametrize("connectivity", connectivity_options)
-def test_index(
+def test_minimal_index(
     ndim: int,
     metric: MetricKind,
     index_type: ScalarKind,
@@ -129,6 +129,16 @@ def test_index(
     vector = random_vectors(count=1, ndim=ndim, dtype=numpy_type).flatten()
     index.add(42, vector)
 
+    # Ban vectors with a wrong number of dimensions
+    with pytest.raises(Exception):
+        index.add(
+            42, random_vectors(count=1, ndim=(ndim * 2), dtype=numpy_type).flatten()
+        )
+
+    # Ban duplicates unless explicitly allowed
+    with pytest.raises(Exception):
+        index.add(42, vector)
+
     assert len(index) == 1, "Size after addition"
     assert 42 in index, "Presence in the index"
     assert 42 in index.keys, "Presence among keys"
@@ -239,6 +249,10 @@ def test_index_batch(
     assert len(index) == batch_size
     assert np.allclose(index.get_vectors(keys).astype(numpy_type), vectors, atol=0.1)
 
+    # Ban duplicates unless explicitly allowed
+    with pytest.raises(Exception):
+        index.add(keys, vectors, threads=2)
+
     matches: BatchMatches = index.search(vectors, 10, threads=2)
     assert matches.keys.shape[0] == matches.distances.shape[0]
     assert len(matches) == batch_size

From a0c705bfa2df07999488df44dd64f1ca86f64756 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 9 Aug 2023 20:18:18 +0400
Subject: [PATCH 07/70] Fix: Normalizing path type in Python

---
 python/usearch/index.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/usearch/index.py b/python/usearch/index.py
index d8cd5afb..a9719024 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -456,6 +456,7 @@ def __init__(
 
         self.path = path
         if path and os.path.exists(path):
+            path = os.fspath(path)
             if view:
                 self._compiled.view(path)
             else:
@@ -463,6 +464,7 @@ def __init__(
 
     @staticmethod
     def metadata(path: os.PathLike) -> Optional[dict]:
+        path = os.fspath(path)
         if not os.path.exists(path):
             return None
         try:
@@ -472,6 +474,7 @@ def metadata(path: os.PathLike) -> Optional[dict]:
 
     @staticmethod
     def restore(path: os.PathLike, view: bool = False) -> Optional[Index]:
+        path = os.fspath(path)
         meta = Index.metadata(path)
         if not meta:
             return None

From 9163d79e76de4b22e1ca7156e81574a871ec657f Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 14:42:18 +0400
Subject: [PATCH 08/70] Fix: Concurrent interruptions & error handling

---
 c/lib.cpp                         |  12 +--
 cpp/test.cpp                      |   5 +-
 include/usearch/index.hpp         |  23 +++--
 include/usearch/index_dense.hpp   |   2 +-
 include/usearch/index_plugins.hpp |  75 ++++++++++++++--
 javascript/lib.cpp                |  38 ++++++--
 objc/USearchObjective.mm          |  22 ++---
 python/lib.cpp                    | 138 ++++++++++++++++++++++--------
 8 files changed, 235 insertions(+), 80 deletions(-)

diff --git a/c/lib.cpp b/c/lib.cpp
index 76b9c703..282bcf29 100644
--- a/c/lib.cpp
+++ b/c/lib.cpp
@@ -111,7 +111,7 @@ USEARCH_EXPORT void usearch_save(usearch_index_t index, char const* path, usearc
     assert(index && path && error);
     serialization_result_t result = reinterpret_cast<index_dense_t*>(index)->save(path);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT void usearch_load(usearch_index_t index, char const* path, usearch_error_t* error) {
@@ -119,7 +119,7 @@ USEARCH_EXPORT void usearch_load(usearch_index_t index, char const* path, usearc
     assert(index && path && error);
     serialization_result_t result = reinterpret_cast<index_dense_t*>(index)->load(path);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT void usearch_view(usearch_index_t index, char const* path, usearch_error_t* error) {
@@ -127,7 +127,7 @@ USEARCH_EXPORT void usearch_view(usearch_index_t index, char const* path, usearc
     assert(index && path && error);
     serialization_result_t result = reinterpret_cast<index_dense_t*>(index)->view(path);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT size_t usearch_size(usearch_index_t index, usearch_error_t*) { //
@@ -159,7 +159,7 @@ USEARCH_EXPORT void usearch_add(
     assert(index && vector && error);
     add_result_t result = add_(reinterpret_cast<index_dense_t*>(index), key, vector, to_native_scalar(kind));
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT bool usearch_contains(usearch_index_t index, usearch_key_t key, usearch_error_t*) {
@@ -175,7 +175,7 @@ USEARCH_EXPORT size_t usearch_search(
     search_result_t result =
         search_(reinterpret_cast<index_dense_t*>(index), vector, to_native_scalar(kind), results_limit);
     if (!result) {
-        *error = result.error.what();
+        *error = result.error.release();
         return 0;
     }
 
@@ -195,7 +195,7 @@ USEARCH_EXPORT bool usearch_remove(usearch_index_t index, usearch_key_t key, use
     assert(index && error);
     labeling_result_t result = reinterpret_cast<index_dense_t*>(index)->remove(key);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
     return result.completed;
 }
 }
diff --git a/cpp/test.cpp b/cpp/test.cpp
index 0c2edd0b..8d73abc7 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -42,6 +42,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     if constexpr (punned_ak) {
         auto result = index.add(key_first, vector_first, args...);
         expect(!result);
+        result.error.release();
     }
 
     // Default approximate search
@@ -100,7 +101,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     // Try batch requests
     executor_default_t executor;
     index.reserve({vectors.size(), executor.size()});
-    executor.execute_bulk(vectors.size() - 3, [&](std::size_t thread, std::size_t task) {
+    executor.fixed(vectors.size() - 3, [&](std::size_t thread, std::size_t task) {
         index_update_config_t config;
         config.thread = thread;
         index.add(key_max - task - 3, vectors[task + 3].data(), args..., config);
@@ -208,7 +209,7 @@ template <typename key_at, typename slot_at> void test_tanimoto(std::size_t dime
     std::generate(scalars.begin(), scalars.end(), [] { return static_cast<b1x8_t>(std::rand()); });
 
     index.reserve({batch_size + index.size(), executor.size()});
-    executor.execute_bulk(batch_size, [&](std::size_t thread, std::size_t task) {
+    executor.fixed(batch_size, [&](std::size_t thread, std::size_t task) {
         index_update_config_t config;
         config.thread = thread;
         index.add(task + 25000, scalars.data() + index.scalar_words() * task, config);
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 39931d3e..5f2709bd 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1082,9 +1082,9 @@ struct dummy_prefetch_t {
 /**
  *  @brief  An example of what a USearch-compatible executor (thread-pool) should look like.
  *
- *  It's expected to have `execute_bulk(callback)` API to schedule one task per thread;
- *  an identical `execute_bulk(count, callback)` overload that also accepts the number
- *  of tasks, and somehow schedules them between threads; as well as `size()` to
+ *  It's expected to have `parallel(callback)` API to schedule one task per thread;
+ *  an identical `fixed(count, callback)` and `dynamic(count, callback)` overloads that also accepts
+ *  the number of tasks, and somehow schedules them between threads; as well as `size()` to
  *  determine the number of available threads.
  */
 struct dummy_executor_t {
@@ -1092,13 +1092,20 @@ struct dummy_executor_t {
     std::size_t size() const noexcept { return 1; }
 
     template <typename thread_aware_function_at>
-    void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept {
+    void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept {
         for (std::size_t task_idx = 0; task_idx != tasks; ++task_idx)
             thread_aware_function(0, task_idx);
     }
 
     template <typename thread_aware_function_at>
-    void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept {
+    void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept {
+        for (std::size_t task_idx = 0; task_idx != tasks; ++task_idx)
+            if (!thread_aware_function(0, task_idx))
+                break;
+    }
+
+    template <typename thread_aware_function_at>
+    void parallel(thread_aware_function_at&& thread_aware_function) noexcept {
         thread_aware_function(0);
     }
 };
@@ -2605,7 +2612,7 @@ class index_gt {
         buffer_gt<slot_level_t, slot_level_allocator_t> slots_and_levels(size());
 
         // For every bottom level node, determine its parent cluster
-        executor.execute_bulk(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) {
+        executor.fixed(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) {
             context_t& context = contexts_[thread_idx];
             std::size_t cluster = search_for_one_( //
                 values[citerator_at(old_slot)],    //
@@ -2684,7 +2691,7 @@ class index_gt {
 
         // Erase all the incoming links
         std::size_t nodes_count = size();
-        executor.execute_bulk(nodes_count, [&](std::size_t, std::size_t node_idx) {
+        executor.fixed(nodes_count, [&](std::size_t, std::size_t node_idx) {
             node_t node = node_at_(node_idx);
             for (level_t level = 0; level <= node.level(); ++level) {
                 neighbors_ref_t neighbors = neighbors_(node, level);
@@ -3294,7 +3301,7 @@ static join_result_t join(               //
     std::atomic<std::size_t> visited_members{0};
 
     // Concurrently process all the men
-    executor.execute_bulk([&](std::size_t thread_idx) {
+    executor.parallel([&](std::size_t thread_idx) {
         index_search_config_t search_config;
         search_config.expansion = config.expansion;
         search_config.exact = config.exact;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 38c0ab23..6effbb37 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -617,7 +617,7 @@ class index_dense_gt {
      *  @brief  Saves serialized binary index representation to a stream.
      */
     template <typename output_callback_at, typename progress_at = dummy_progress_t>
-    serialization_result_t stream(output_callback_at&& callback, serialization_config_t config = {}) const noexcept {
+    serialization_result_t stream(output_callback_at&& callback, serialization_config_t config = {}) const {
 
         serialization_result_t result;
         std::uint64_t matrix_rows = 0;
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 7eb5f3a8..567ada5d 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -456,13 +456,13 @@ class executor_stl_t {
     std::size_t size() const noexcept { return threads_count_; }
 
     /**
-     *  @brief Executes tasks in bulk using the specified thread-aware function.
+     *  @brief Executes a fixed number of tasks using the specified thread-aware function.
      *  @param tasks                 The total number of tasks to be executed.
      *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+    void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
         std::vector<jthread_t> threads_pool;
         std::size_t tasks_per_thread = tasks;
         std::size_t threads_count = (std::min)(threads_count_, tasks);
@@ -480,13 +480,44 @@ class executor_stl_t {
             thread_aware_function(0, task_idx);
     }
 
+    /**
+     *  @brief Executes limited number of tasks using the specified thread-aware function.
+     *  @param tasks                 The upper bound on the number of tasks.
+     *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
+     *  @throws If an exception occurs during execution of the thread-aware function.
+     */
+    template <typename thread_aware_function_at>
+    void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+        std::vector<jthread_t> threads_pool;
+        std::size_t tasks_per_thread = tasks;
+        std::size_t threads_count = (std::min)(threads_count_, tasks);
+        std::atomic_bool stop{false};
+        if (threads_count > 1) {
+            tasks_per_thread = (tasks / threads_count) + ((tasks % threads_count) != 0);
+            for (std::size_t thread_idx = 1; thread_idx < threads_count; ++thread_idx) {
+                threads_pool.emplace_back([=, &stop]() {
+                    for (std::size_t task_idx = thread_idx * tasks_per_thread;
+                         task_idx < (std::min)(tasks, thread_idx * tasks_per_thread + tasks_per_thread) &&
+                         !stop.load(std::memory_order_relaxed);
+                         ++task_idx)
+                        if (!thread_aware_function(thread_idx, task_idx))
+                            stop.store(true, std::memory_order_relaxed);
+                });
+            }
+        }
+        for (std::size_t task_idx = 0;
+             task_idx < (std::min)(tasks, tasks_per_thread) && !stop.load(std::memory_order_relaxed); ++task_idx)
+            if (!thread_aware_function(0, task_idx))
+                stop.store(true, std::memory_order_relaxed);
+    }
+
     /**
      *  @brief Saturates every available thread with the given workload, until they finish.
      *  @param thread_aware_function The thread-aware function to be called for each thread index.
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept(false) {
+    void parallel(thread_aware_function_at&& thread_aware_function) noexcept(false) {
         if (threads_count_ == 1)
             return thread_aware_function(0);
         std::vector<jthread_t> threads_pool;
@@ -523,20 +554,52 @@ class executor_openmp_t {
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
-#pragma omp parallel for schedule(dynamic)
+    void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+#pragma omp parallel for schedule(dynamic, 1)
         for (std::size_t i = 0; i != tasks; ++i) {
             thread_aware_function(omp_get_thread_num(), i);
         }
     }
 
+    /**
+     *  @brief Executes tasks in bulk using the specified thread-aware function.
+     *  @param tasks                 The total number of tasks to be executed.
+     *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
+     *  @throws If an exception occurs during execution of the thread-aware function.
+     *
+     *  Uses OpenMP cancellation points, if `OMP_CANCELLATION` environment variable is set.
+     *  http://jakascorner.com/blog/2016/08/omp-cancel.html
+     */
+    template <typename thread_aware_function_at>
+    void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+        if (omp_get_cancellation()) {
+#pragma omp parallel for schedule(dynamic, 1)
+            for (std::size_t i = 0; i != tasks; ++i) {
+#pragma omp cancellation point for
+                if (!thread_aware_function(omp_get_thread_num(), i)) {
+#pragma omp cancel for
+                }
+            }
+        } else {
+            bool stop{false};
+#pragma omp parallel for schedule(dynamic, 1) shared(stop)
+            for (std::size_t i = 0; i != tasks; ++i) {
+#pragma omp atomic read
+                bool local_stop = stop;
+                if (!local_stop && !thread_aware_function(omp_get_thread_num(), i))
+#pragma omp atomic write
+                    stop = true;
+            }
+        }
+    }
+
     /**
      *  @brief Saturates every available thread with the given workload, until they finish.
      *  @param thread_aware_function The thread-aware function to be called for each thread index.
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept(false) {
+    void parallel(thread_aware_function_at&& thread_aware_function) noexcept(false) {
 #pragma omp parallel
         { thread_aware_function(omp_get_thread_num()); }
     }
diff --git a/javascript/lib.cpp b/javascript/lib.cpp
index afdb1bb6..95778007 100644
--- a/javascript/lib.cpp
+++ b/javascript/lib.cpp
@@ -106,7 +106,7 @@ Index::Index(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap<Index>(ctx) {
         std::string quantization_str = params.Get("quantization").As<Napi::String>().Utf8Value();
         expected_gt<scalar_kind_t> expected = scalar_kind_from_name(quantization_str.c_str(), quantization_str.size());
         if (!expected) {
-            Napi::TypeError::New(env, expected.error.what()).ThrowAsJavaScriptException();
+            Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException();
             return;
         }
         quantization = *expected;
@@ -118,7 +118,7 @@ Index::Index(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap<Index>(ctx) {
         std::string metric_str = params.Get("metric").As<Napi::String>().Utf8Value();
         expected_gt<metric_kind_t> expected = metric_from_name(metric_str.c_str(), metric_str.size());
         if (!expected) {
-            Napi::TypeError::New(env, expected.error.what()).ThrowAsJavaScriptException();
+            Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException();
             return;
         }
         metric_kind = *expected;
@@ -154,7 +154,10 @@ void Index::Save(Napi::CallbackInfo const& ctx) {
 
     try {
         std::string path = ctx[0].As<Napi::String>();
-        native_->save(path.c_str());
+        auto result = native_->save(path.c_str());
+        if (!result)
+            return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
     } catch (...) {
         Napi::TypeError::New(env, "Serialization failed").ThrowAsJavaScriptException();
     }
@@ -171,7 +174,10 @@ void Index::Load(Napi::CallbackInfo const& ctx) {
 
     try {
         std::string path = ctx[0].As<Napi::String>();
-        native_->load(path.c_str());
+        auto result = native_->load(path.c_str());
+        if (!result)
+            return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
     } catch (...) {
         Napi::TypeError::New(env, "Loading failed").ThrowAsJavaScriptException();
     }
@@ -188,7 +194,10 @@ void Index::View(Napi::CallbackInfo const& ctx) {
 
     try {
         std::string path = ctx[0].As<Napi::String>();
-        native_->view(path.c_str());
+        auto result = native_->view(path.c_str());
+        if (!result)
+            return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
     } catch (...) {
         Napi::TypeError::New(env, "Memory-mapping failed").ThrowAsJavaScriptException();
     }
@@ -216,7 +225,10 @@ void Index::Add(Napi::CallbackInfo const& ctx) {
             return Napi::TypeError::New(env, "Wrong number of dimensions").ThrowAsJavaScriptException();
 
         try {
-            native_->add(key, vector);
+            auto result = native_->add(key, vector);
+            if (!result)
+                return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
         } catch (std::bad_alloc const&) {
             return Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException();
         } catch (...) {
@@ -234,7 +246,8 @@ void Index::Add(Napi::CallbackInfo const& ctx) {
                 .ThrowAsJavaScriptException();
 
         if (native_->size() + length >= native_->capacity())
-            native_->reserve(ceil2(native_->size() + length));
+            if (!native_->reserve(ceil2(native_->size() + length)))
+                return Napi::TypeError::New(env, "Out of memory!").ThrowAsJavaScriptException();
 
         for (std::size_t i = 0; i < length; i++) {
             Napi::Value key_js = keys_js[i];
@@ -280,7 +293,14 @@ Napi::Value Index::Search(Napi::CallbackInfo const& ctx) {
     static_assert(std::is_same<std::uint64_t, key_t>::value, "Matches.key interface expects BigUint64Array");
     Napi::Float32Array distances_js = Napi::Float32Array::New(env, wanted);
     try {
-        std::uint64_t count = native_->search(vector, wanted).dump_to(matches_js.Data(), distances_js.Data());
+
+        auto result = native_->search(vector, wanted);
+        if (!result) {
+            Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+            return {};
+        }
+
+        std::uint64_t count = result.dump_to(matches_js.Data(), distances_js.Data());
         Napi::Object result_js = Napi::Object::New(env);
         result_js.Set("keys", matches_js);
         result_js.Set("distances", distances_js);
@@ -313,7 +333,7 @@ Napi::Value Index::Remove(Napi::CallbackInfo const& ctx) {
     try {
         auto result = native_->remove(key);
         if (!result) {
-            Napi::TypeError::New(env, "Removal has failed").ThrowAsJavaScriptException();
+            Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
             return {};
         }
         return Napi::Boolean::New(env, result.completed);
diff --git a/objc/USearchObjective.mm b/objc/USearchObjective.mm
index bc2160bf..ac8315f4 100644
--- a/objc/USearchObjective.mm
+++ b/objc/USearchObjective.mm
@@ -128,7 +128,7 @@ - (void)addSingle:(USearchKey)key
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't add to index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -141,7 +141,7 @@ - (UInt32)searchSingle:(Float32 const *_Nonnull)vector
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't find in index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 
@@ -155,7 +155,7 @@ - (void)addDouble:(USearchKey)key
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't add to index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -168,7 +168,7 @@ - (UInt32)searchDouble:(Float64 const *_Nonnull)vector
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't find in index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 
@@ -182,7 +182,7 @@ - (void)addHalf:(USearchKey)key
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't add to index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -195,7 +195,7 @@ - (UInt32)searchHalf:(void const *_Nonnull)vector
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't find in index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 
@@ -224,7 +224,7 @@ - (void)remove:(USearchKey)key {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't remove an entry"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -234,7 +234,7 @@ - (void)rename:(USearchKey)key to:(USearchKey)to {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't rename the entry"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -252,7 +252,7 @@ - (void)save:(NSString *)path {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't save to disk"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -270,7 +270,7 @@ - (void)load:(NSString *)path {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't load from disk"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -288,7 +288,7 @@ - (void)view:(NSString *)path {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't view from disk"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
diff --git a/python/lib.cpp b/python/lib.cpp
index b06a9cd5..9da0dfbc 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -77,15 +77,16 @@ struct dense_indexes_py_t {
 
         shards_.reserve(shards_.size() + paths.size());
         std::mutex shards_mutex;
-        executor_default_t{threads}.execute_bulk(paths.size(), [&](std::size_t, std::size_t task_idx) {
+        executor_default_t{threads}.dynamic(paths.size(), [&](std::size_t, std::size_t task_idx) {
             index_dense_t index = index_dense_t::make(paths[task_idx].c_str(), view);
             if (!index)
-                return;
+                return false;
             auto shared_index = std::make_shared<dense_index_py_t>(std::move(index));
             std::unique_lock<std::mutex> lock(shards_mutex);
             shards_.push_back(shared_index);
             if (PyErr_CheckSignals() != 0)
                 throw py::error_already_set();
+            return true;
         });
     }
 
@@ -180,6 +181,18 @@ scalar_kind_t numpy_string_to_kind(std::string const& name) {
         return scalar_kind_t::unknown_k;
 }
 
+template <typename result_at> void forward_error(result_at&& result) {
+
+    if (!result)
+        throw std::invalid_argument(result.error.release());
+
+    int signals = PyErr_CheckSignals();
+    if (signals != 0)
+        throw py::error_already_set();
+}
+
+using atomic_error_t = std::atomic<char const*>;
+
 template <typename scalar_at>
 static void add_typed_to_index(                                            //
     dense_index_py_t& index,                                               //
@@ -189,18 +202,33 @@ static void add_typed_to_index(                                            //
     Py_ssize_t vectors_count = vectors_info.shape[0];
     byte_t const* vectors_data = reinterpret_cast<byte_t const*>(vectors_info.ptr);
     byte_t const* keys_data = reinterpret_cast<byte_t const*>(keys_info.ptr);
+    atomic_error_t atomic_error{nullptr};
 
-    executor_default_t{threads}.execute_bulk(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
+    executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
         index_dense_update_config_t config;
         config.force_vector_copy = force_copy;
         config.thread = thread_idx;
         key_t key = *reinterpret_cast<key_t const*>(keys_data + task_idx * keys_info.strides[0]);
         scalar_at const* vector = reinterpret_cast<scalar_at const*>(vectors_data + task_idx * vectors_info.strides[0]);
         dense_add_result_t result = index.add(key, vector, config);
-        result.error.raise();
-        if (PyErr_CheckSignals() != 0)
-            throw py::error_already_set();
+        if (!result) {
+            atomic_error = result.error.release();
+            return false;
+        }
+
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
     });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
 }
 
 template <typename index_at>
@@ -265,21 +293,37 @@ static void search_typed(                                   //
     if (!index.reserve(index_limits_t(index.size(), threads)))
         throw std::invalid_argument("Out of memory!");
 
-    executor_default_t{threads}.execute_bulk(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
+    atomic_error_t atomic_error{nullptr};
+    executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
         index_search_config_t config;
         config.thread = thread_idx;
         config.exact = exact;
         scalar_at const* vector = (scalar_at const*)(vectors_data + task_idx * vectors_info.strides[0]);
         dense_search_result_t result = index.search(vector, wanted, config);
-        result.error.raise();
+        if (!result) {
+            atomic_error = result.error.release();
+            return false;
+        }
+
         counts_py1d(task_idx) =
             static_cast<Py_ssize_t>(result.dump_to(&keys_py2d(task_idx, 0), &distances_py2d(task_idx, 0)));
 
         stats_visited_members += result.visited_members;
         stats_computed_distances += result.computed_distances;
-        if (PyErr_CheckSignals() != 0)
-            throw py::error_already_set();
+
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
     });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
 }
 
 template <typename scalar_at>
@@ -305,15 +349,18 @@ static void search_typed(                                       //
     if (!query_mutexes)
         throw std::bad_alloc();
 
-    executor_default_t{threads}.execute_bulk(indexes.shards_.size(), [&](std::size_t, std::size_t task_idx) {
+    atomic_error_t atomic_error{nullptr};
+    executor_default_t{threads}.dynamic(indexes.shards_.size(), [&](std::size_t thread_idx, std::size_t task_idx) {
         dense_index_py_t& index = *indexes.shards_[task_idx].get();
 
         index_limits_t limits;
         limits.members = index.size();
         limits.threads_add = 0;
         limits.threads_search = 1;
-        if (!index.reserve(limits))
-            throw std::bad_alloc();
+        if (!index.reserve(limits)) {
+            atomic_error = "Out of memory!";
+            return false;
+        }
 
         index_search_config_t config;
         config.thread = 0;
@@ -322,7 +369,11 @@ static void search_typed(                                       //
         for (std::size_t vector_idx = 0; vector_idx != static_cast<std::size_t>(vectors_count); ++vector_idx) {
             scalar_at const* vector = (scalar_at const*)(vectors_data + vector_idx * vectors_info.strides[0]);
             dense_search_result_t result = index.search(vector, wanted, config);
-            result.error.raise();
+            if (!result) {
+                atomic_error = result.error.release();
+                return false;
+            }
+
             {
                 auto lock = query_mutexes.lock(vector_idx);
                 counts_py1d(vector_idx) = static_cast<Py_ssize_t>(result.merge_into( //
@@ -334,10 +385,21 @@ static void search_typed(                                       //
 
             stats_visited_members += result.visited_members;
             stats_computed_distances += result.computed_distances;
-            if (PyErr_CheckSignals() != 0)
-                throw py::error_already_set();
+
+            // We don't want to check for signals from multiple threads
+            if (thread_idx == 0)
+                if (PyErr_CheckSignals() != 0)
+                    return false;
+            return true;
         }
     });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
 }
 
 /**
@@ -421,7 +483,7 @@ static void search_typed_brute_force(                                //
     if (!query_mutexes)
         throw std::bad_alloc();
 
-    executor_default_t{threads}.execute_bulk(tasks_count, [&](std::size_t, std::size_t task_idx) {
+    executor_default_t{threads}.dynamic(tasks_count, [&](std::size_t thread_idx, std::size_t task_idx) {
         //
         std::size_t dataset_idx = task_idx / queries_count;
         std::size_t query_idx = task_idx % queries_count;
@@ -437,7 +499,7 @@ static void search_typed_brute_force(                                //
             std::size_t& matches = reinterpret_cast<std::size_t&>(counts_py1d(query_idx));
             if (matches == wanted)
                 if (distances[wanted - 1] <= distance)
-                    return;
+                    return true;
 
             std::size_t offset = std::lower_bound(distances, distances + matches, distance) - distances;
 
@@ -449,8 +511,11 @@ static void search_typed_brute_force(                                //
             matches += matches != wanted;
         }
 
-        if (PyErr_CheckSignals() != 0)
-            throw py::error_already_set();
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
     });
 }
 
@@ -526,7 +591,7 @@ static std::unordered_map<key_t, key_t> join_index(       //
     std::size_t threads = (std::min)(a.limits().threads(), b.limits().threads());
     executor_default_t executor{threads};
     join_result_t result = a.join(b, config, a_to_b, b_to_a, executor);
-    result.error.raise();
+    forward_error(result);
 
     return a_to_b;
 }
@@ -536,7 +601,7 @@ static dense_index_py_t copy_index(dense_index_py_t const& index) {
     using copy_result_t = typename dense_index_py_t::copy_result_t;
     index_copy_config_t config;
     copy_result_t result = index.copy(config);
-    result.error.raise();
+    forward_error(result);
     return std::move(result.index);
 }
 
@@ -662,7 +727,8 @@ PYBIND11_MODULE(compiled, m) {
 
     m.def("index_dense_metadata", [](std::string const& path) -> py::dict {
         index_dense_metadata_result_t meta = index_dense_metadata(path.c_str());
-        meta.error.raise();
+        forward_error(meta);
+
         index_dense_head_t const& head = meta.head;
 
         py::dict result;
@@ -731,7 +797,7 @@ PYBIND11_MODULE(compiled, m) {
         "rename",
         [](dense_index_py_t& index, key_t from, key_t to) -> bool {
             dense_labeling_result_t result = index.rename(from, to);
-            result.error.raise();
+            forward_error(result);
             return result.completed;
         },
         py::arg("from"), py::arg("to"));
@@ -740,7 +806,7 @@ PYBIND11_MODULE(compiled, m) {
         "remove",
         [](dense_index_py_t& index, key_t key, bool compact, std::size_t threads) -> bool {
             dense_labeling_result_t result = index.remove(key);
-            result.error.raise();
+            forward_error(result);
             if (!compact)
                 return result.completed;
 
@@ -758,7 +824,7 @@ PYBIND11_MODULE(compiled, m) {
         "remove",
         [](dense_index_py_t& index, std::vector<key_t> const& keys, bool compact, std::size_t threads) -> std::size_t {
             dense_labeling_result_t result = index.remove(keys.begin(), keys.end());
-            result.error.raise();
+            forward_error(result);
             if (!compact)
                 return result.completed;
 
@@ -781,8 +847,7 @@ PYBIND11_MODULE(compiled, m) {
     i.def_property_readonly( //
         "dtype", [](dense_index_py_t const& index) -> scalar_kind_t { return index.scalar_kind(); });
     i.def_property_readonly( //
-        "memory_usage", [](dense_index_py_t const& index) -> std::size_t { return index.memory_usage(); },
-        py::call_guard<py::gil_scoped_release>());
+        "memory_usage", [](dense_index_py_t const& index) -> std::size_t { return index.memory_usage(); });
 
     i.def_property("expansion_add", &dense_index_py_t::expansion_add, &dense_index_py_t::change_expansion_add);
     i.def_property("expansion_search", &dense_index_py_t::expansion_search, &dense_index_py_t::change_expansion_search);
@@ -810,15 +875,14 @@ PYBIND11_MODULE(compiled, m) {
     i.def("__contains__", &dense_index_py_t::contains);
     i.def("__getitem__", &get_member<dense_index_py_t>, py::arg("key"), py::arg("dtype") = scalar_kind_t::f32_k);
 
-    i.def("save", &save_index<dense_index_py_t>, py::arg("path"), py::call_guard<py::gil_scoped_release>());
-    i.def("load", &load_index<dense_index_py_t>, py::arg("path"), py::call_guard<py::gil_scoped_release>());
-    i.def("view", &view_index<dense_index_py_t>, py::arg("path"), py::call_guard<py::gil_scoped_release>());
-    i.def("reset", &reset_index<dense_index_py_t>, py::call_guard<py::gil_scoped_release>());
-    i.def("clear", &clear_index<dense_index_py_t>, py::call_guard<py::gil_scoped_release>());
-    i.def("copy", &copy_index, py::call_guard<py::gil_scoped_release>());
-    i.def("compact", &compact_index, py::call_guard<py::gil_scoped_release>());
-    i.def("join", &join_index, py::arg("other"), py::arg("max_proposals") = 0, py::arg("exact") = false,
-          py::call_guard<py::gil_scoped_release>());
+    i.def("save", &save_index<dense_index_py_t>, py::arg("path"));
+    i.def("load", &load_index<dense_index_py_t>, py::arg("path"));
+    i.def("view", &view_index<dense_index_py_t>, py::arg("path"));
+    i.def("reset", &reset_index<dense_index_py_t>);
+    i.def("clear", &clear_index<dense_index_py_t>);
+    i.def("copy", &copy_index);
+    i.def("compact", &compact_index);
+    i.def("join", &join_index, py::arg("other"), py::arg("max_proposals") = 0, py::arg("exact") = false);
 
     using punned_index_stats_t = typename dense_index_py_t::stats_t;
     auto i_stats = py::class_<punned_index_stats_t>(m, "IndexStats");

From bc26b4e68560c65f992ea1b02979b40cdf65a3bd Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 14:42:18 +0400
Subject: [PATCH 09/70] Fix: Concurrent interruptions & error handling

---
 c/lib.cpp                         |  12 +--
 cpp/test.cpp                      |   5 +-
 include/usearch/index.hpp         |  23 +++--
 include/usearch/index_dense.hpp   |   2 +-
 include/usearch/index_plugins.hpp |  72 ++++++++++++++--
 javascript/lib.cpp                |  38 ++++++--
 objc/USearchObjective.mm          |  22 ++---
 python/lib.cpp                    | 138 ++++++++++++++++++++++--------
 8 files changed, 232 insertions(+), 80 deletions(-)

diff --git a/c/lib.cpp b/c/lib.cpp
index 76b9c703..282bcf29 100644
--- a/c/lib.cpp
+++ b/c/lib.cpp
@@ -111,7 +111,7 @@ USEARCH_EXPORT void usearch_save(usearch_index_t index, char const* path, usearc
     assert(index && path && error);
     serialization_result_t result = reinterpret_cast<index_dense_t*>(index)->save(path);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT void usearch_load(usearch_index_t index, char const* path, usearch_error_t* error) {
@@ -119,7 +119,7 @@ USEARCH_EXPORT void usearch_load(usearch_index_t index, char const* path, usearc
     assert(index && path && error);
     serialization_result_t result = reinterpret_cast<index_dense_t*>(index)->load(path);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT void usearch_view(usearch_index_t index, char const* path, usearch_error_t* error) {
@@ -127,7 +127,7 @@ USEARCH_EXPORT void usearch_view(usearch_index_t index, char const* path, usearc
     assert(index && path && error);
     serialization_result_t result = reinterpret_cast<index_dense_t*>(index)->view(path);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT size_t usearch_size(usearch_index_t index, usearch_error_t*) { //
@@ -159,7 +159,7 @@ USEARCH_EXPORT void usearch_add(
     assert(index && vector && error);
     add_result_t result = add_(reinterpret_cast<index_dense_t*>(index), key, vector, to_native_scalar(kind));
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
 }
 
 USEARCH_EXPORT bool usearch_contains(usearch_index_t index, usearch_key_t key, usearch_error_t*) {
@@ -175,7 +175,7 @@ USEARCH_EXPORT size_t usearch_search(
     search_result_t result =
         search_(reinterpret_cast<index_dense_t*>(index), vector, to_native_scalar(kind), results_limit);
     if (!result) {
-        *error = result.error.what();
+        *error = result.error.release();
         return 0;
     }
 
@@ -195,7 +195,7 @@ USEARCH_EXPORT bool usearch_remove(usearch_index_t index, usearch_key_t key, use
     assert(index && error);
     labeling_result_t result = reinterpret_cast<index_dense_t*>(index)->remove(key);
     if (!result)
-        *error = result.error.what();
+        *error = result.error.release();
     return result.completed;
 }
 }
diff --git a/cpp/test.cpp b/cpp/test.cpp
index 0c2edd0b..8d73abc7 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -42,6 +42,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     if constexpr (punned_ak) {
         auto result = index.add(key_first, vector_first, args...);
         expect(!result);
+        result.error.release();
     }
 
     // Default approximate search
@@ -100,7 +101,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     // Try batch requests
     executor_default_t executor;
     index.reserve({vectors.size(), executor.size()});
-    executor.execute_bulk(vectors.size() - 3, [&](std::size_t thread, std::size_t task) {
+    executor.fixed(vectors.size() - 3, [&](std::size_t thread, std::size_t task) {
         index_update_config_t config;
         config.thread = thread;
         index.add(key_max - task - 3, vectors[task + 3].data(), args..., config);
@@ -208,7 +209,7 @@ template <typename key_at, typename slot_at> void test_tanimoto(std::size_t dime
     std::generate(scalars.begin(), scalars.end(), [] { return static_cast<b1x8_t>(std::rand()); });
 
     index.reserve({batch_size + index.size(), executor.size()});
-    executor.execute_bulk(batch_size, [&](std::size_t thread, std::size_t task) {
+    executor.fixed(batch_size, [&](std::size_t thread, std::size_t task) {
         index_update_config_t config;
         config.thread = thread;
         index.add(task + 25000, scalars.data() + index.scalar_words() * task, config);
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 39931d3e..5f2709bd 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1082,9 +1082,9 @@ struct dummy_prefetch_t {
 /**
  *  @brief  An example of what a USearch-compatible executor (thread-pool) should look like.
  *
- *  It's expected to have `execute_bulk(callback)` API to schedule one task per thread;
- *  an identical `execute_bulk(count, callback)` overload that also accepts the number
- *  of tasks, and somehow schedules them between threads; as well as `size()` to
+ *  It's expected to have `parallel(callback)` API to schedule one task per thread;
+ *  an identical `fixed(count, callback)` and `dynamic(count, callback)` overloads that also accepts
+ *  the number of tasks, and somehow schedules them between threads; as well as `size()` to
  *  determine the number of available threads.
  */
 struct dummy_executor_t {
@@ -1092,13 +1092,20 @@ struct dummy_executor_t {
     std::size_t size() const noexcept { return 1; }
 
     template <typename thread_aware_function_at>
-    void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept {
+    void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept {
         for (std::size_t task_idx = 0; task_idx != tasks; ++task_idx)
             thread_aware_function(0, task_idx);
     }
 
     template <typename thread_aware_function_at>
-    void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept {
+    void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept {
+        for (std::size_t task_idx = 0; task_idx != tasks; ++task_idx)
+            if (!thread_aware_function(0, task_idx))
+                break;
+    }
+
+    template <typename thread_aware_function_at>
+    void parallel(thread_aware_function_at&& thread_aware_function) noexcept {
         thread_aware_function(0);
     }
 };
@@ -2605,7 +2612,7 @@ class index_gt {
         buffer_gt<slot_level_t, slot_level_allocator_t> slots_and_levels(size());
 
         // For every bottom level node, determine its parent cluster
-        executor.execute_bulk(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) {
+        executor.fixed(slots_and_levels.size(), [&](std::size_t thread_idx, std::size_t old_slot) {
             context_t& context = contexts_[thread_idx];
             std::size_t cluster = search_for_one_( //
                 values[citerator_at(old_slot)],    //
@@ -2684,7 +2691,7 @@ class index_gt {
 
         // Erase all the incoming links
         std::size_t nodes_count = size();
-        executor.execute_bulk(nodes_count, [&](std::size_t, std::size_t node_idx) {
+        executor.fixed(nodes_count, [&](std::size_t, std::size_t node_idx) {
             node_t node = node_at_(node_idx);
             for (level_t level = 0; level <= node.level(); ++level) {
                 neighbors_ref_t neighbors = neighbors_(node, level);
@@ -3294,7 +3301,7 @@ static join_result_t join(               //
     std::atomic<std::size_t> visited_members{0};
 
     // Concurrently process all the men
-    executor.execute_bulk([&](std::size_t thread_idx) {
+    executor.parallel([&](std::size_t thread_idx) {
         index_search_config_t search_config;
         search_config.expansion = config.expansion;
         search_config.exact = config.exact;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 38c0ab23..6effbb37 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -617,7 +617,7 @@ class index_dense_gt {
      *  @brief  Saves serialized binary index representation to a stream.
      */
     template <typename output_callback_at, typename progress_at = dummy_progress_t>
-    serialization_result_t stream(output_callback_at&& callback, serialization_config_t config = {}) const noexcept {
+    serialization_result_t stream(output_callback_at&& callback, serialization_config_t config = {}) const {
 
         serialization_result_t result;
         std::uint64_t matrix_rows = 0;
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 7eb5f3a8..7cb0dbdd 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -456,13 +456,13 @@ class executor_stl_t {
     std::size_t size() const noexcept { return threads_count_; }
 
     /**
-     *  @brief Executes tasks in bulk using the specified thread-aware function.
+     *  @brief Executes a fixed number of tasks using the specified thread-aware function.
      *  @param tasks                 The total number of tasks to be executed.
      *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+    void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
         std::vector<jthread_t> threads_pool;
         std::size_t tasks_per_thread = tasks;
         std::size_t threads_count = (std::min)(threads_count_, tasks);
@@ -480,13 +480,44 @@ class executor_stl_t {
             thread_aware_function(0, task_idx);
     }
 
+    /**
+     *  @brief Executes limited number of tasks using the specified thread-aware function.
+     *  @param tasks                 The upper bound on the number of tasks.
+     *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
+     *  @throws If an exception occurs during execution of the thread-aware function.
+     */
+    template <typename thread_aware_function_at>
+    void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+        std::vector<jthread_t> threads_pool;
+        std::size_t tasks_per_thread = tasks;
+        std::size_t threads_count = (std::min)(threads_count_, tasks);
+        std::atomic_bool stop{false};
+        if (threads_count > 1) {
+            tasks_per_thread = (tasks / threads_count) + ((tasks % threads_count) != 0);
+            for (std::size_t thread_idx = 1; thread_idx < threads_count; ++thread_idx) {
+                threads_pool.emplace_back([=, &stop]() {
+                    for (std::size_t task_idx = thread_idx * tasks_per_thread;
+                         task_idx < (std::min)(tasks, thread_idx * tasks_per_thread + tasks_per_thread) &&
+                         !stop.load(std::memory_order_relaxed);
+                         ++task_idx)
+                        if (!thread_aware_function(thread_idx, task_idx))
+                            stop.store(true, std::memory_order_relaxed);
+                });
+            }
+        }
+        for (std::size_t task_idx = 0;
+             task_idx < (std::min)(tasks, tasks_per_thread) && !stop.load(std::memory_order_relaxed); ++task_idx)
+            if (!thread_aware_function(0, task_idx))
+                stop.store(true, std::memory_order_relaxed);
+    }
+
     /**
      *  @brief Saturates every available thread with the given workload, until they finish.
      *  @param thread_aware_function The thread-aware function to be called for each thread index.
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept(false) {
+    void parallel(thread_aware_function_at&& thread_aware_function) noexcept(false) {
         if (threads_count_ == 1)
             return thread_aware_function(0);
         std::vector<jthread_t> threads_pool;
@@ -523,20 +554,49 @@ class executor_openmp_t {
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
-#pragma omp parallel for schedule(dynamic)
+    void fixed(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+#pragma omp parallel for schedule(dynamic, 1)
         for (std::size_t i = 0; i != tasks; ++i) {
             thread_aware_function(omp_get_thread_num(), i);
         }
     }
 
+    /**
+     *  @brief Executes tasks in bulk using the specified thread-aware function.
+     *  @param tasks                 The total number of tasks to be executed.
+     *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
+     *  @throws If an exception occurs during execution of the thread-aware function.
+     *
+     *  Uses OpenMP cancellation points, if `OMP_CANCELLATION` environment variable is set.
+     *  http://jakascorner.com/blog/2016/08/omp-cancel.html
+     */
+    template <typename thread_aware_function_at>
+    void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
+        if (omp_get_cancellation()) {
+#pragma omp parallel for schedule(dynamic, 1)
+            for (std::size_t i = 0; i != tasks; ++i) {
+#pragma omp cancellation point for
+                if (!thread_aware_function(omp_get_thread_num(), i)) {
+#pragma omp cancel for
+                }
+            }
+        } else {
+            std::atomic_bool stop{false};
+#pragma omp parallel for schedule(dynamic, 1) shared(stop)
+            for (std::size_t i = 0; i != tasks; ++i) {
+                if (!stop.load(std::memory_order_relaxed) && !thread_aware_function(omp_get_thread_num(), i))
+                    stop.store(true, std::memory_order_relaxed);
+            }
+        }
+    }
+
     /**
      *  @brief Saturates every available thread with the given workload, until they finish.
      *  @param thread_aware_function The thread-aware function to be called for each thread index.
      *  @throws If an exception occurs during execution of the thread-aware function.
      */
     template <typename thread_aware_function_at>
-    void execute_bulk(thread_aware_function_at&& thread_aware_function) noexcept(false) {
+    void parallel(thread_aware_function_at&& thread_aware_function) noexcept(false) {
 #pragma omp parallel
         { thread_aware_function(omp_get_thread_num()); }
     }
diff --git a/javascript/lib.cpp b/javascript/lib.cpp
index afdb1bb6..95778007 100644
--- a/javascript/lib.cpp
+++ b/javascript/lib.cpp
@@ -106,7 +106,7 @@ Index::Index(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap<Index>(ctx) {
         std::string quantization_str = params.Get("quantization").As<Napi::String>().Utf8Value();
         expected_gt<scalar_kind_t> expected = scalar_kind_from_name(quantization_str.c_str(), quantization_str.size());
         if (!expected) {
-            Napi::TypeError::New(env, expected.error.what()).ThrowAsJavaScriptException();
+            Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException();
             return;
         }
         quantization = *expected;
@@ -118,7 +118,7 @@ Index::Index(Napi::CallbackInfo const& ctx) : Napi::ObjectWrap<Index>(ctx) {
         std::string metric_str = params.Get("metric").As<Napi::String>().Utf8Value();
         expected_gt<metric_kind_t> expected = metric_from_name(metric_str.c_str(), metric_str.size());
         if (!expected) {
-            Napi::TypeError::New(env, expected.error.what()).ThrowAsJavaScriptException();
+            Napi::TypeError::New(env, expected.error.release()).ThrowAsJavaScriptException();
             return;
         }
         metric_kind = *expected;
@@ -154,7 +154,10 @@ void Index::Save(Napi::CallbackInfo const& ctx) {
 
     try {
         std::string path = ctx[0].As<Napi::String>();
-        native_->save(path.c_str());
+        auto result = native_->save(path.c_str());
+        if (!result)
+            return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
     } catch (...) {
         Napi::TypeError::New(env, "Serialization failed").ThrowAsJavaScriptException();
     }
@@ -171,7 +174,10 @@ void Index::Load(Napi::CallbackInfo const& ctx) {
 
     try {
         std::string path = ctx[0].As<Napi::String>();
-        native_->load(path.c_str());
+        auto result = native_->load(path.c_str());
+        if (!result)
+            return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
     } catch (...) {
         Napi::TypeError::New(env, "Loading failed").ThrowAsJavaScriptException();
     }
@@ -188,7 +194,10 @@ void Index::View(Napi::CallbackInfo const& ctx) {
 
     try {
         std::string path = ctx[0].As<Napi::String>();
-        native_->view(path.c_str());
+        auto result = native_->view(path.c_str());
+        if (!result)
+            return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
     } catch (...) {
         Napi::TypeError::New(env, "Memory-mapping failed").ThrowAsJavaScriptException();
     }
@@ -216,7 +225,10 @@ void Index::Add(Napi::CallbackInfo const& ctx) {
             return Napi::TypeError::New(env, "Wrong number of dimensions").ThrowAsJavaScriptException();
 
         try {
-            native_->add(key, vector);
+            auto result = native_->add(key, vector);
+            if (!result)
+                return Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+
         } catch (std::bad_alloc const&) {
             return Napi::TypeError::New(env, "Out of memory").ThrowAsJavaScriptException();
         } catch (...) {
@@ -234,7 +246,8 @@ void Index::Add(Napi::CallbackInfo const& ctx) {
                 .ThrowAsJavaScriptException();
 
         if (native_->size() + length >= native_->capacity())
-            native_->reserve(ceil2(native_->size() + length));
+            if (!native_->reserve(ceil2(native_->size() + length)))
+                return Napi::TypeError::New(env, "Out of memory!").ThrowAsJavaScriptException();
 
         for (std::size_t i = 0; i < length; i++) {
             Napi::Value key_js = keys_js[i];
@@ -280,7 +293,14 @@ Napi::Value Index::Search(Napi::CallbackInfo const& ctx) {
     static_assert(std::is_same<std::uint64_t, key_t>::value, "Matches.key interface expects BigUint64Array");
     Napi::Float32Array distances_js = Napi::Float32Array::New(env, wanted);
     try {
-        std::uint64_t count = native_->search(vector, wanted).dump_to(matches_js.Data(), distances_js.Data());
+
+        auto result = native_->search(vector, wanted);
+        if (!result) {
+            Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
+            return {};
+        }
+
+        std::uint64_t count = result.dump_to(matches_js.Data(), distances_js.Data());
         Napi::Object result_js = Napi::Object::New(env);
         result_js.Set("keys", matches_js);
         result_js.Set("distances", distances_js);
@@ -313,7 +333,7 @@ Napi::Value Index::Remove(Napi::CallbackInfo const& ctx) {
     try {
         auto result = native_->remove(key);
         if (!result) {
-            Napi::TypeError::New(env, "Removal has failed").ThrowAsJavaScriptException();
+            Napi::TypeError::New(env, result.error.release()).ThrowAsJavaScriptException();
             return {};
         }
         return Napi::Boolean::New(env, result.completed);
diff --git a/objc/USearchObjective.mm b/objc/USearchObjective.mm
index bc2160bf..ac8315f4 100644
--- a/objc/USearchObjective.mm
+++ b/objc/USearchObjective.mm
@@ -128,7 +128,7 @@ - (void)addSingle:(USearchKey)key
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't add to index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -141,7 +141,7 @@ - (UInt32)searchSingle:(Float32 const *_Nonnull)vector
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't find in index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 
@@ -155,7 +155,7 @@ - (void)addDouble:(USearchKey)key
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't add to index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -168,7 +168,7 @@ - (UInt32)searchDouble:(Float64 const *_Nonnull)vector
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't find in index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 
@@ -182,7 +182,7 @@ - (void)addHalf:(USearchKey)key
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't add to index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -195,7 +195,7 @@ - (UInt32)searchHalf:(void const *_Nonnull)vector
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't find in index"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 
@@ -224,7 +224,7 @@ - (void)remove:(USearchKey)key {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't remove an entry"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -234,7 +234,7 @@ - (void)rename:(USearchKey)key to:(USearchKey)to {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't rename the entry"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -252,7 +252,7 @@ - (void)save:(NSString *)path {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't save to disk"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -270,7 +270,7 @@ - (void)load:(NSString *)path {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't load from disk"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
@@ -288,7 +288,7 @@ - (void)view:(NSString *)path {
 
     if (!result) {
         @throw [NSException exceptionWithName:@"Can't view from disk"
-                                       reason:[NSString stringWithUTF8String:result.error.what()]
+                                       reason:[NSString stringWithUTF8String:result.error.release()]
                                      userInfo:nil];
     }
 }
diff --git a/python/lib.cpp b/python/lib.cpp
index b06a9cd5..9da0dfbc 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -77,15 +77,16 @@ struct dense_indexes_py_t {
 
         shards_.reserve(shards_.size() + paths.size());
         std::mutex shards_mutex;
-        executor_default_t{threads}.execute_bulk(paths.size(), [&](std::size_t, std::size_t task_idx) {
+        executor_default_t{threads}.dynamic(paths.size(), [&](std::size_t, std::size_t task_idx) {
             index_dense_t index = index_dense_t::make(paths[task_idx].c_str(), view);
             if (!index)
-                return;
+                return false;
             auto shared_index = std::make_shared<dense_index_py_t>(std::move(index));
             std::unique_lock<std::mutex> lock(shards_mutex);
             shards_.push_back(shared_index);
             if (PyErr_CheckSignals() != 0)
                 throw py::error_already_set();
+            return true;
         });
     }
 
@@ -180,6 +181,18 @@ scalar_kind_t numpy_string_to_kind(std::string const& name) {
         return scalar_kind_t::unknown_k;
 }
 
+template <typename result_at> void forward_error(result_at&& result) {
+
+    if (!result)
+        throw std::invalid_argument(result.error.release());
+
+    int signals = PyErr_CheckSignals();
+    if (signals != 0)
+        throw py::error_already_set();
+}
+
+using atomic_error_t = std::atomic<char const*>;
+
 template <typename scalar_at>
 static void add_typed_to_index(                                            //
     dense_index_py_t& index,                                               //
@@ -189,18 +202,33 @@ static void add_typed_to_index(                                            //
     Py_ssize_t vectors_count = vectors_info.shape[0];
     byte_t const* vectors_data = reinterpret_cast<byte_t const*>(vectors_info.ptr);
     byte_t const* keys_data = reinterpret_cast<byte_t const*>(keys_info.ptr);
+    atomic_error_t atomic_error{nullptr};
 
-    executor_default_t{threads}.execute_bulk(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
+    executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
         index_dense_update_config_t config;
         config.force_vector_copy = force_copy;
         config.thread = thread_idx;
         key_t key = *reinterpret_cast<key_t const*>(keys_data + task_idx * keys_info.strides[0]);
         scalar_at const* vector = reinterpret_cast<scalar_at const*>(vectors_data + task_idx * vectors_info.strides[0]);
         dense_add_result_t result = index.add(key, vector, config);
-        result.error.raise();
-        if (PyErr_CheckSignals() != 0)
-            throw py::error_already_set();
+        if (!result) {
+            atomic_error = result.error.release();
+            return false;
+        }
+
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
     });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
 }
 
 template <typename index_at>
@@ -265,21 +293,37 @@ static void search_typed(                                   //
     if (!index.reserve(index_limits_t(index.size(), threads)))
         throw std::invalid_argument("Out of memory!");
 
-    executor_default_t{threads}.execute_bulk(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
+    atomic_error_t atomic_error{nullptr};
+    executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
         index_search_config_t config;
         config.thread = thread_idx;
         config.exact = exact;
         scalar_at const* vector = (scalar_at const*)(vectors_data + task_idx * vectors_info.strides[0]);
         dense_search_result_t result = index.search(vector, wanted, config);
-        result.error.raise();
+        if (!result) {
+            atomic_error = result.error.release();
+            return false;
+        }
+
         counts_py1d(task_idx) =
             static_cast<Py_ssize_t>(result.dump_to(&keys_py2d(task_idx, 0), &distances_py2d(task_idx, 0)));
 
         stats_visited_members += result.visited_members;
         stats_computed_distances += result.computed_distances;
-        if (PyErr_CheckSignals() != 0)
-            throw py::error_already_set();
+
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
     });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
 }
 
 template <typename scalar_at>
@@ -305,15 +349,18 @@ static void search_typed(                                       //
     if (!query_mutexes)
         throw std::bad_alloc();
 
-    executor_default_t{threads}.execute_bulk(indexes.shards_.size(), [&](std::size_t, std::size_t task_idx) {
+    atomic_error_t atomic_error{nullptr};
+    executor_default_t{threads}.dynamic(indexes.shards_.size(), [&](std::size_t thread_idx, std::size_t task_idx) {
         dense_index_py_t& index = *indexes.shards_[task_idx].get();
 
         index_limits_t limits;
         limits.members = index.size();
         limits.threads_add = 0;
         limits.threads_search = 1;
-        if (!index.reserve(limits))
-            throw std::bad_alloc();
+        if (!index.reserve(limits)) {
+            atomic_error = "Out of memory!";
+            return false;
+        }
 
         index_search_config_t config;
         config.thread = 0;
@@ -322,7 +369,11 @@ static void search_typed(                                       //
         for (std::size_t vector_idx = 0; vector_idx != static_cast<std::size_t>(vectors_count); ++vector_idx) {
             scalar_at const* vector = (scalar_at const*)(vectors_data + vector_idx * vectors_info.strides[0]);
             dense_search_result_t result = index.search(vector, wanted, config);
-            result.error.raise();
+            if (!result) {
+                atomic_error = result.error.release();
+                return false;
+            }
+
             {
                 auto lock = query_mutexes.lock(vector_idx);
                 counts_py1d(vector_idx) = static_cast<Py_ssize_t>(result.merge_into( //
@@ -334,10 +385,21 @@ static void search_typed(                                       //
 
             stats_visited_members += result.visited_members;
             stats_computed_distances += result.computed_distances;
-            if (PyErr_CheckSignals() != 0)
-                throw py::error_already_set();
+
+            // We don't want to check for signals from multiple threads
+            if (thread_idx == 0)
+                if (PyErr_CheckSignals() != 0)
+                    return false;
+            return true;
         }
     });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
 }
 
 /**
@@ -421,7 +483,7 @@ static void search_typed_brute_force(                                //
     if (!query_mutexes)
         throw std::bad_alloc();
 
-    executor_default_t{threads}.execute_bulk(tasks_count, [&](std::size_t, std::size_t task_idx) {
+    executor_default_t{threads}.dynamic(tasks_count, [&](std::size_t thread_idx, std::size_t task_idx) {
         //
         std::size_t dataset_idx = task_idx / queries_count;
         std::size_t query_idx = task_idx % queries_count;
@@ -437,7 +499,7 @@ static void search_typed_brute_force(                                //
             std::size_t& matches = reinterpret_cast<std::size_t&>(counts_py1d(query_idx));
             if (matches == wanted)
                 if (distances[wanted - 1] <= distance)
-                    return;
+                    return true;
 
             std::size_t offset = std::lower_bound(distances, distances + matches, distance) - distances;
 
@@ -449,8 +511,11 @@ static void search_typed_brute_force(                                //
             matches += matches != wanted;
         }
 
-        if (PyErr_CheckSignals() != 0)
-            throw py::error_already_set();
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
     });
 }
 
@@ -526,7 +591,7 @@ static std::unordered_map<key_t, key_t> join_index(       //
     std::size_t threads = (std::min)(a.limits().threads(), b.limits().threads());
     executor_default_t executor{threads};
     join_result_t result = a.join(b, config, a_to_b, b_to_a, executor);
-    result.error.raise();
+    forward_error(result);
 
     return a_to_b;
 }
@@ -536,7 +601,7 @@ static dense_index_py_t copy_index(dense_index_py_t const& index) {
     using copy_result_t = typename dense_index_py_t::copy_result_t;
     index_copy_config_t config;
     copy_result_t result = index.copy(config);
-    result.error.raise();
+    forward_error(result);
     return std::move(result.index);
 }
 
@@ -662,7 +727,8 @@ PYBIND11_MODULE(compiled, m) {
 
     m.def("index_dense_metadata", [](std::string const& path) -> py::dict {
         index_dense_metadata_result_t meta = index_dense_metadata(path.c_str());
-        meta.error.raise();
+        forward_error(meta);
+
         index_dense_head_t const& head = meta.head;
 
         py::dict result;
@@ -731,7 +797,7 @@ PYBIND11_MODULE(compiled, m) {
         "rename",
         [](dense_index_py_t& index, key_t from, key_t to) -> bool {
             dense_labeling_result_t result = index.rename(from, to);
-            result.error.raise();
+            forward_error(result);
             return result.completed;
         },
         py::arg("from"), py::arg("to"));
@@ -740,7 +806,7 @@ PYBIND11_MODULE(compiled, m) {
         "remove",
         [](dense_index_py_t& index, key_t key, bool compact, std::size_t threads) -> bool {
             dense_labeling_result_t result = index.remove(key);
-            result.error.raise();
+            forward_error(result);
             if (!compact)
                 return result.completed;
 
@@ -758,7 +824,7 @@ PYBIND11_MODULE(compiled, m) {
         "remove",
         [](dense_index_py_t& index, std::vector<key_t> const& keys, bool compact, std::size_t threads) -> std::size_t {
             dense_labeling_result_t result = index.remove(keys.begin(), keys.end());
-            result.error.raise();
+            forward_error(result);
             if (!compact)
                 return result.completed;
 
@@ -781,8 +847,7 @@ PYBIND11_MODULE(compiled, m) {
     i.def_property_readonly( //
         "dtype", [](dense_index_py_t const& index) -> scalar_kind_t { return index.scalar_kind(); });
     i.def_property_readonly( //
-        "memory_usage", [](dense_index_py_t const& index) -> std::size_t { return index.memory_usage(); },
-        py::call_guard<py::gil_scoped_release>());
+        "memory_usage", [](dense_index_py_t const& index) -> std::size_t { return index.memory_usage(); });
 
     i.def_property("expansion_add", &dense_index_py_t::expansion_add, &dense_index_py_t::change_expansion_add);
     i.def_property("expansion_search", &dense_index_py_t::expansion_search, &dense_index_py_t::change_expansion_search);
@@ -810,15 +875,14 @@ PYBIND11_MODULE(compiled, m) {
     i.def("__contains__", &dense_index_py_t::contains);
     i.def("__getitem__", &get_member<dense_index_py_t>, py::arg("key"), py::arg("dtype") = scalar_kind_t::f32_k);
 
-    i.def("save", &save_index<dense_index_py_t>, py::arg("path"), py::call_guard<py::gil_scoped_release>());
-    i.def("load", &load_index<dense_index_py_t>, py::arg("path"), py::call_guard<py::gil_scoped_release>());
-    i.def("view", &view_index<dense_index_py_t>, py::arg("path"), py::call_guard<py::gil_scoped_release>());
-    i.def("reset", &reset_index<dense_index_py_t>, py::call_guard<py::gil_scoped_release>());
-    i.def("clear", &clear_index<dense_index_py_t>, py::call_guard<py::gil_scoped_release>());
-    i.def("copy", &copy_index, py::call_guard<py::gil_scoped_release>());
-    i.def("compact", &compact_index, py::call_guard<py::gil_scoped_release>());
-    i.def("join", &join_index, py::arg("other"), py::arg("max_proposals") = 0, py::arg("exact") = false,
-          py::call_guard<py::gil_scoped_release>());
+    i.def("save", &save_index<dense_index_py_t>, py::arg("path"));
+    i.def("load", &load_index<dense_index_py_t>, py::arg("path"));
+    i.def("view", &view_index<dense_index_py_t>, py::arg("path"));
+    i.def("reset", &reset_index<dense_index_py_t>);
+    i.def("clear", &clear_index<dense_index_py_t>);
+    i.def("copy", &copy_index);
+    i.def("compact", &compact_index);
+    i.def("join", &join_index, py::arg("other"), py::arg("max_proposals") = 0, py::arg("exact") = false);
 
     using punned_index_stats_t = typename dense_index_py_t::stats_t;
     auto i_stats = py::class_<punned_index_stats_t>(m, "IndexStats");

From e686a3dd66e2be5f7b376a56e779de90ce15c969 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 11:39:05 +0000
Subject: [PATCH 10/70] Fix: Loading empty index

Closes #195
---
 include/usearch/index.hpp | 10 ++++++++++
 python/scripts/test.py    |  6 ++++++
 2 files changed, 16 insertions(+)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5f2709bd..e0685418 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2468,6 +2468,11 @@ class index_gt {
         if (!result)
             return result;
 
+        if (!header.size) {
+            reset();
+            return result;
+        }
+
         // Allocate some dynamic memory to read all the levels
         using levels_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<level_t>;
         buffer_gt<level_t, levels_allocator_t> levels(header.size);
@@ -2526,6 +2531,11 @@ class index_gt {
             return result.failed("File is corrupted and lacks a header");
         std::memcpy(&header, file.data() + offset, sizeof(header));
 
+        if (!header.size) {
+            reset();
+            return result;
+        }
+
         // Precompute offsets of every node, but before that we need to update the configs
         // This could have been done with `std::exclusive_scan`, but it's only available from C++17.
         using offsets_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<std::size_t>;
diff --git a/python/scripts/test.py b/python/scripts/test.py
index 06437512..2d16556e 100644
--- a/python/scripts/test.py
+++ b/python/scripts/test.py
@@ -224,6 +224,12 @@ def test_minimal_index(
     assert meta is None
     index = Index.restore(temporary_usearch_filename)
 
+    # Try saving and opening and empty index
+    index_copy.reset()
+    index_copy.save(temporary_usearch_filename)
+    assert Index.restore(temporary_usearch_filename, view=False) is not None
+    assert Index.restore(temporary_usearch_filename, view=True) is not None
+
     assert index is None
     os.remove(temporary_usearch_filename)
 

From 531d2bc9e7baa0246ca134730b5280b48a9d580b Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 17:10:37 +0400
Subject: [PATCH 11/70] Fix: Counting nodes per level

---
 include/usearch/index.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5f2709bd..869145b5 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2336,7 +2336,6 @@ class index_gt {
 
     stats_t stats(std::size_t level) const noexcept {
         stats_t result{};
-        result.nodes = size();
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
         for (std::size_t i = 0; i != result.nodes; ++i) {
@@ -2344,6 +2343,7 @@ class index_gt {
             if (static_cast<std::size_t>(node.level()) < level)
                 continue;
 
+            ++result.nodes;
             result.edges += neighbors_(node, level).size();
             result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
         }

From 7723ce587a8fa8480a9ea1d29e37f9193a40442e Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 17:12:49 +0400
Subject: [PATCH 12/70] Fix: MetricKind name collision

---
 python/lib.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/lib.cpp b/python/lib.cpp
index 9da0dfbc..590601ed 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -468,7 +468,6 @@ static void search_typed_brute_force(                                //
 
     std::size_t dataset_count = static_cast<std::size_t>(dataset_info.shape[0]);
     std::size_t queries_count = static_cast<std::size_t>(queries_info.shape[0]);
-    std::size_t dimensions = static_cast<std::size_t>(dataset_info.shape[1]);
 
     byte_t const* dataset_data = reinterpret_cast<byte_t const*>(dataset_info.ptr);
     byte_t const* queries_data = reinterpret_cast<byte_t const*>(queries_info.ptr);
@@ -693,7 +692,7 @@ PYBIND11_MODULE(compiled, m) {
     py::enum_<metric_kind_t>(m, "MetricKind")
         .value("Unknown", metric_kind_t::unknown_k)
 
-        .value("IP", metric_kind_t::cos_k)
+        .value("IP", metric_kind_t::ip_k)
         .value("Cos", metric_kind_t::cos_k)
         .value("L2sq", metric_kind_t::l2sq_k)
 
@@ -705,7 +704,7 @@ PYBIND11_MODULE(compiled, m) {
         .value("Sorensen", metric_kind_t::sorensen_k)
 
         .value("Cosine", metric_kind_t::cos_k)
-        .value("InnerProduct", metric_kind_t::cos_k);
+        .value("InnerProduct", metric_kind_t::ip_k);
 
     py::enum_<scalar_kind_t>(m, "ScalarKind")
         .value("Unknown", scalar_kind_t::unknown_k)

From a823d0647f18bb9cbac87d50b3df67eb8e283436 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 17:13:40 +0400
Subject: [PATCH 13/70] Fix: Pretty-printing metadata

---
 include/usearch/index.hpp | 15 ++++++++-------
 python/lib.cpp            |  4 ++++
 python/usearch/index.py   | 24 ++++++++++++++++++------
 3 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 869145b5..28bdcbae 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2311,22 +2311,23 @@ class index_gt {
 #pragma region Metadata
 
     struct stats_t {
-        std::size_t nodes;
-        std::size_t edges;
-        std::size_t max_edges;
-        std::size_t allocated_bytes;
+        std::size_t nodes{};
+        std::size_t edges{};
+        std::size_t max_edges{};
+        std::size_t allocated_bytes{};
     };
 
     stats_t stats() const noexcept {
         stats_t result{};
-        result.nodes = size();
-        for (std::size_t i = 0; i != result.nodes; ++i) {
+
+        for (std::size_t i = 0; i != size(); ++i) {
             node_t node = node_at_(i);
             std::size_t max_edges = node.level() * config_.connectivity + config_.connectivity_base;
             std::size_t edges = 0;
             for (level_t level = 0; level <= node.level(); ++level)
                 edges += neighbors_(node, level).size();
 
+            ++result.nodes;
             result.allocated_bytes += node_bytes_(node).size();
             result.edges += edges;
             result.max_edges += max_edges;
@@ -2338,7 +2339,7 @@ class index_gt {
         stats_t result{};
 
         std::size_t neighbors_bytes = !level ? pre_.neighbors_base_bytes : pre_.neighbors_bytes;
-        for (std::size_t i = 0; i != result.nodes; ++i) {
+        for (std::size_t i = 0; i != size(); ++i) {
             node_t node = node_at_(i);
             if (static_cast<std::size_t>(node.level()) < level)
                 continue;
diff --git a/python/lib.cpp b/python/lib.cpp
index 590601ed..71484bc1 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -868,6 +868,10 @@ PYBIND11_MODULE(compiled, m) {
         py::arg("metric_pointer") = 0                                    //
     );
 
+    i.def_property_readonly("hardware_acceleration", [](dense_index_py_t const& index) -> py::str {
+        return isa_name(index.metric().isa_kind());
+    });
+
     i.def_property_readonly("keys", &get_all_keys<dense_index_py_t>);
     i.def("get_keys", &get_keys<dense_index_py_t>, py::arg("offset") = 0,
           py::arg("limit") = std::numeric_limits<std::size_t>::max());
diff --git a/python/usearch/index.py b/python/usearch/index.py
index a9719024..cd0ad2e6 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -639,6 +639,10 @@ def __getitem__(self, key: int) -> np.ndarray:
     def jit(self) -> bool:
         return self._metric_jit is not None
 
+    @property
+    def hardware_acceleration(self) -> str:
+        return self._compiled.hardware_acceleration
+
     @property
     def size(self) -> int:
         return self._compiled.size
@@ -847,25 +851,33 @@ def __repr__(self) -> str:
             self.max_level + 1,
         )
 
-    def _repr_pretty_(self) -> str:
+    def _repr_pretty_(self, printer, cycle) -> str:
         level_stats = [
-            f"--- {i}. {self.level_stats(i).nodes} nodes" for i in range(self.max_level)
+            f"--- {i}. {self.level_stats(i).nodes:,} nodes"
+            for i in range(self.max_level)
         ]
-        return "\n".join(
+        lines = "\n".join(
             [
                 "usearch.Index",
-                "- config" f"-- data type: {self.dtype}",
+                "- config",
+                f"-- data type: {self.dtype}",
                 f"-- dimensions: {self.ndim}",
                 f"-- metric: {self.metric}",
                 f"-- expansion on addition:{self.expansion_add} candidates",
                 f"-- expansion on search: {self.expansion_search} candidates",
+                "- binary",
+                f"-- uses OpenMP: {USES_OPENMP}",
+                f"-- uses SimSIMD: {USES_SIMSIMD}",
+                f"-- supports half-precision: {USES_NATIVE_F16}",
+                f"-- uses hardware acceletion: {self.hardware_acceleration}",
                 "- state",
-                f"-- size: {self.size} vectors",
-                f"-- memory usage: {self.memory_usage} bytes",
+                f"-- size: {self.size:,} vectors",
+                f"-- memory usage: {self.memory_usage:,} bytes",
                 f"-- max level: {self.max_level}",
                 *level_stats,
             ]
         )
+        printer.text(lines)
 
 
 class Indexes:

From ddf1afa95663cfdf3862c47999dbe81ef06b1c48 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 10 Aug 2023 21:29:53 +0400
Subject: [PATCH 14/70] Add: Clustering functionality

---
 include/usearch/index.hpp       |  76 ++++++++++++++++-
 include/usearch/index_dense.hpp | 136 ++++++++++++++++--------------
 python/lib.cpp                  | 144 ++++++++++++++++++++++++++++----
 python/scripts/test.py          |   5 ++
 python/usearch/index.py         |  58 +++++++++++--
 5 files changed, 334 insertions(+), 85 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 28bdcbae..fc118824 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1004,6 +1004,16 @@ struct index_search_config_t {
     bool exact = false;
 };
 
+struct index_cluster_config_t {
+    /// @brief Hyper-parameter controlling the quality of search.
+    /// Defaults to 16 in FAISS and 10 in hnswlib.
+    /// > It is called `ef` in the paper.
+    std::size_t expansion = default_expansion_search();
+
+    /// @brief Optional thread identifier for multi-threaded construction.
+    std::size_t thread = 0;
+};
+
 struct index_copy_config_t {};
 
 struct index_join_config_t {
@@ -1817,7 +1827,7 @@ class index_gt {
         }
     };
 
-    copy_result_t copy(index_copy_config_t /*config*/ = {}) const noexcept {
+    copy_result_t copy(index_copy_config_t config = {}) const noexcept {
         copy_result_t result;
         index_gt& other = result.index;
         other = index_gt(config_, dynamic_allocator_, tape_allocator_);
@@ -1832,6 +1842,9 @@ class index_gt {
         other.nodes_count_ = nodes_count_.load();
         other.max_level_ = max_level_;
         other.entry_slot_ = entry_slot_;
+
+        // This controls nothing for now :)
+        (void)config;
         return result;
     }
 
@@ -2057,6 +2070,19 @@ class index_gt {
         }
     };
 
+    struct cluster_result_t {
+        error_t error{};
+        std::size_t visited_members{};
+        std::size_t computed_distances{};
+        match_t cluster{{nullptr}, 0};
+
+        explicit operator bool() const noexcept { return !error; }
+        cluster_result_t failed(error_t message) noexcept {
+            error = std::move(message);
+            return std::move(*this);
+        }
+    };
+
     /**
      *  @brief  Inserts a new entry into the index. Thread-safe. Supports @b heterogeneous lookups.
      *          Expects needed capacity to be reserved ahead of time: `size() < capacity()`.
@@ -2251,7 +2277,7 @@ class index_gt {
      *  @param[in] wanted The upper bound for the number of results to return.
      *  @param[in] config Configuration options for this specific operation.
      *  @param[in] predicate Optional filtering predicate for `member_cref_t`.
-     *  @return Smart object referencing temporary memory. Valid until next `search()` or `add()`.
+     *  @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`.
      */
     template <                                     //
         typename value_at,                         //
@@ -2306,6 +2332,52 @@ class index_gt {
         return result;
     }
 
+    /**
+     *  @brief Identifies the closest cluster to the gived ::query. Thread-safe.
+     *
+     *  @param[in] query Content that will be compared against other entries in the index.
+     *  @param[in] level The index level to target. Higher means lower resolution.
+     *  @param[in] config Configuration options for this specific operation.
+     *  @param[in] predicate Optional filtering predicate for `member_cref_t`.
+     *  @return Smart object referencing temporary memory. Valid until next `search()`, `add()`, or `cluster()`.
+     */
+    template <                                     //
+        typename value_at,                         //
+        typename metric_at,                        //
+        typename predicate_at = dummy_predicate_t, //
+        typename prefetch_at = dummy_prefetch_t    //
+        >
+    cluster_result_t cluster(                      //
+        value_at&& query,                          //
+        std::size_t level,                         //
+        metric_at&& metric,                        //
+        index_cluster_config_t config = {},        //
+        predicate_at&& predicate = predicate_at{}, //
+        prefetch_at&& prefetch = prefetch_at{}) const noexcept {
+
+        context_t& context = contexts_[config.thread];
+        cluster_result_t result;
+        if (!nodes_count_)
+            return result.failed("No clusters to identify");
+
+        // Go down the level, tracking only the closest match
+        result.computed_distances = context.computed_distances_count;
+        result.visited_members = context.iteration_cycles;
+
+        next_candidates_t& next = context.next_candidates;
+        std::size_t expansion = config.expansion;
+        if (!next.reserve(expansion))
+            return result.failed("Out of memory!");
+
+        result.cluster.member = at(search_for_one_(query, metric, prefetch, entry_slot_, max_level_, level, context));
+        result.cluster.distance = context.measure(query, result.cluster.member, metric);
+
+        // Normalize stats
+        result.computed_distances = context.computed_distances_count - result.computed_distances;
+        result.visited_members = context.iteration_cycles - result.visited_members;
+        return result;
+    }
+
 #pragma endregion
 
 #pragma region Metadata
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 6effbb37..2b9cc414 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -114,13 +114,6 @@ struct index_dense_serialization_config_t {
     bool use_64_bit_dimensions = false;
 };
 
-struct index_dense_update_config_t : public index_update_config_t {
-    bool force_vector_copy = false;
-
-    index_dense_update_config_t() = default;
-    index_dense_update_config_t(index_update_config_t base) noexcept : index_update_config_t(base) {}
-};
-
 struct index_dense_copy_config_t : public index_copy_config_t {
     bool force_vector_copy = true;
 
@@ -364,6 +357,7 @@ class index_dense_gt {
 
   public:
     using search_result_t = typename index_t::search_result_t;
+    using cluster_result_t = typename index_t::cluster_result_t;
     using add_result_t = typename index_t::add_result_t;
     using stats_t = typename index_t::stats_t;
     using match_t = typename index_t::match_t;
@@ -513,30 +507,26 @@ class index_dense_gt {
             vectors_tape_allocator_.total_allocated();
     }
 
+    static constexpr std::size_t any_thread() { return std::numeric_limits<std::size_t>::max(); }
+
     // clang-format off
-    add_result_t add(key_t key, b1x8_t const* vector) { return add_(key, vector, casts_.from_b1x8); }
-    add_result_t add(key_t key, i8_bits_t const* vector) { return add_(key, vector, casts_.from_i8); }
-    add_result_t add(key_t key, f16_t const* vector) { return add_(key, vector, casts_.from_f16); }
-    add_result_t add(key_t key, f32_t const* vector) { return add_(key, vector, casts_.from_f32); }
-    add_result_t add(key_t key, f64_t const* vector) { return add_(key, vector, casts_.from_f64); }
-
-    add_result_t add(key_t key, b1x8_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_b1x8); }
-    add_result_t add(key_t key, i8_bits_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_i8); }
-    add_result_t add(key_t key, f16_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f16); }
-    add_result_t add(key_t key, f32_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f32); }
-    add_result_t add(key_t key, f64_t const* vector, index_dense_update_config_t config) { return add_(key, vector, config, casts_.from_f64); }
-
-    search_result_t search(b1x8_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_b1x8); }
-    search_result_t search(i8_bits_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_i8); }
-    search_result_t search(f16_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f16); }
-    search_result_t search(f32_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f32); }
-    search_result_t search(f64_t const* vector, std::size_t wanted) const { return search_(vector, wanted, casts_.from_f64); }
-
-    search_result_t search(b1x8_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_b1x8); }
-    search_result_t search(i8_bits_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_i8); }
-    search_result_t search(f16_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f16); }
-    search_result_t search(f32_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f32); }
-    search_result_t search(f64_t const* vector, std::size_t wanted, index_search_config_t config) const { return search_(vector, wanted, config, casts_.from_f64); }
+    add_result_t add(key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_b1x8); }
+    add_result_t add(key_t key, i8_bits_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_i8); }
+    add_result_t add(key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_f16); }
+    add_result_t add(key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_f32); }
+    add_result_t add(key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_f64); }
+
+    search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_b1x8); }
+    search_result_t search(i8_bits_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_i8); }
+    search_result_t search(f16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_f16); }
+    search_result_t search(f32_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_f32); }
+    search_result_t search(f64_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_f64); }
+
+    cluster_result_t cluster(b1x8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_b1x8); }
+    cluster_result_t cluster(i8_bits_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_i8); }
+    cluster_result_t cluster(f16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f16); }
+    cluster_result_t cluster(f32_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f32); }
+    cluster_result_t cluster(f64_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f64); }
 
     bool get(key_t key, b1x8_t* vector) const { return get_(key, vector, casts_.to_b1x8); }
     bool get(key_t key, i8_bits_t* vector) const { return get_(key, vector, casts_.to_i8); }
@@ -1177,16 +1167,23 @@ class index_dense_gt {
     struct thread_lock_t {
         index_dense_gt const& parent;
         std::size_t thread_id;
+        bool engaged;
 
-        ~thread_lock_t() { parent.thread_unlock_(thread_id); }
+        ~thread_lock_t() {
+            if (engaged)
+                parent.thread_unlock_(thread_id);
+        }
     };
 
-    thread_lock_t thread_lock_() const {
+    thread_lock_t thread_lock_(std::size_t thread_id) const {
+        if (thread_id != any_thread())
+            return {*this, thread_id, false};
+
         available_threads_mutex_.lock();
-        std::size_t thread_id = available_threads_.back();
+        thread_id = available_threads_.back();
         available_threads_.pop_back();
         available_threads_mutex_.unlock();
-        return {*this, thread_id};
+        return {*this, thread_id, true};
     }
 
     void thread_unlock_(std::size_t thread_id) const {
@@ -1196,16 +1193,19 @@ class index_dense_gt {
     }
 
     template <typename scalar_at>
-    add_result_t add_(key_t key, scalar_at const* vector, index_dense_update_config_t config, cast_t const& cast) {
+    add_result_t add_(                      //
+        key_t key, scalar_at const* vector, //
+        std::size_t thread, bool force_vector_copy, cast_t const& cast) {
 
         if (!config_.allow_key_collisions && contains(key))
             return add_result_t{}.failed("Duplicate keys not allowed in high-level wrappers");
 
         // Cast the vector, if needed for compatibility with `metric_`
-        bool copy_vector = !config_.exclude_vectors || config.force_vector_copy;
+        thread_lock_t lock = thread_lock_(thread);
+        bool copy_vector = !config_.exclude_vectors || force_vector_copy;
         byte_t const* vector_data = reinterpret_cast<byte_t const*>(vector);
         {
-            byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * config.thread;
+            byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id;
             bool casted = cast(vector_data, dimensions(), casted_data);
             if (casted)
                 vector_data = casted_data, copy_vector = true;
@@ -1231,28 +1231,61 @@ class index_dense_gt {
                 vectors_lookup_[member.slot] = (byte_t*)vector_data;
         };
 
+        index_update_config_t update_config;
+        update_config.thread = lock.thread_id;
+        update_config.expansion = config_.expansion_add;
+
         metric_proxy_t metric{*this};
         return reuse_node //
-                   ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, metric, config, on_success)
-                   : typed_->add(key, vector_data, metric, config, on_success);
+                   ? typed_->update(typed_->iterator_at(free_slot), key, vector_data, metric, update_config, on_success)
+                   : typed_->add(key, vector_data, metric, update_config, on_success);
     }
 
     template <typename scalar_at>
     search_result_t search_(                         //
         scalar_at const* vector, std::size_t wanted, //
-        index_search_config_t config, cast_t const& cast) const {
+        std::size_t thread, bool exact, cast_t const& cast) const {
 
         // Cast the vector, if needed for compatibility with `metric_`
+        thread_lock_t lock = thread_lock_(thread);
         byte_t const* vector_data = reinterpret_cast<byte_t const*>(vector);
         {
-            byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * config.thread;
+            byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id;
             bool casted = cast(vector_data, dimensions(), casted_data);
             if (casted)
                 vector_data = casted_data;
         }
 
+        index_search_config_t search_config;
+        search_config.thread = lock.thread_id;
+        search_config.expansion = config_.expansion_search;
+        search_config.exact = exact;
+
         auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
-        return typed_->search(vector_data, wanted, metric_proxy_t{*this}, config, allow);
+        return typed_->search(vector_data, wanted, metric_proxy_t{*this}, search_config, allow);
+    }
+
+    template <typename scalar_at>
+    cluster_result_t cluster_(                      //
+        scalar_at const* vector, std::size_t level, //
+        std::size_t thread, cast_t const& cast) const {
+
+        // Cast the vector, if needed for compatibility with `metric_`
+        thread_lock_t lock = thread_lock_(thread);
+        byte_t const* vector_data = reinterpret_cast<byte_t const*>(vector);
+        {
+            byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id;
+            bool casted = cast(vector_data, dimensions(), casted_data);
+            if (casted)
+                vector_data = casted_data;
+        }
+
+        index_cluster_config_t cluster_config;
+        cluster_config.thread = lock.thread_id;
+        cluster_config.expansion = config_.expansion_search;
+
+        auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
+        return typed_->cluster(vector_data, level, metric_proxy_t{*this}, cluster_config, allow);
     }
 
     compressed_slot_t lookup_id_(key_t key) const {
@@ -1304,25 +1337,6 @@ class index_dense_gt {
         return true;
     }
 
-    template <typename scalar_at> add_result_t add_(key_t key, scalar_at const* vector, cast_t const& cast) {
-        thread_lock_t lock = thread_lock_();
-        index_dense_update_config_t update_config;
-        update_config.thread = lock.thread_id;
-        update_config.expansion = config_.expansion_add;
-        return add_(key, vector, update_config, cast);
-    }
-
-    template <typename scalar_at>
-    search_result_t search_(                         //
-        scalar_at const* vector, std::size_t wanted, //
-        cast_t const& cast) const {
-        thread_lock_t lock = thread_lock_();
-        index_search_config_t search_config;
-        search_config.thread = lock.thread_id;
-        search_config.expansion = config_.expansion_search;
-        return search_(vector, wanted, search_config, cast);
-    }
-
     template <typename to_scalar_at> static casts_t make_casts_() {
         casts_t result;
 
diff --git a/python/lib.cpp b/python/lib.cpp
index 71484bc1..1889a637 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -51,6 +51,7 @@ using distance_t = distance_punned_t;
 using dense_add_result_t = typename index_dense_t::add_result_t;
 using dense_search_result_t = typename index_dense_t::search_result_t;
 using dense_labeling_result_t = typename index_dense_t::labeling_result_t;
+using dense_cluster_result_t = typename index_dense_t::cluster_result_t;
 
 struct dense_index_py_t : public index_dense_t {
     using native_t = index_dense_t;
@@ -205,12 +206,9 @@ static void add_typed_to_index(                                            //
     atomic_error_t atomic_error{nullptr};
 
     executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
-        index_dense_update_config_t config;
-        config.force_vector_copy = force_copy;
-        config.thread = thread_idx;
         key_t key = *reinterpret_cast<key_t const*>(keys_data + task_idx * keys_info.strides[0]);
         scalar_at const* vector = reinterpret_cast<scalar_at const*>(vectors_data + task_idx * vectors_info.strides[0]);
-        dense_add_result_t result = index.add(key, vector, config);
+        dense_add_result_t result = index.add(key, vector, thread_idx, force_copy);
         if (!result) {
             atomic_error = result.error.release();
             return false;
@@ -295,11 +293,8 @@ static void search_typed(                                   //
 
     atomic_error_t atomic_error{nullptr};
     executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
-        index_search_config_t config;
-        config.thread = thread_idx;
-        config.exact = exact;
         scalar_at const* vector = (scalar_at const*)(vectors_data + task_idx * vectors_info.strides[0]);
-        dense_search_result_t result = index.search(vector, wanted, config);
+        dense_search_result_t result = index.search(vector, wanted, thread_idx, exact);
         if (!result) {
             atomic_error = result.error.release();
             return false;
@@ -362,13 +357,9 @@ static void search_typed(                                       //
             return false;
         }
 
-        index_search_config_t config;
-        config.thread = 0;
-        config.exact = exact;
-
         for (std::size_t vector_idx = 0; vector_idx != static_cast<std::size_t>(vectors_count); ++vector_idx) {
             scalar_at const* vector = (scalar_at const*)(vectors_data + vector_idx * vectors_info.strides[0]);
-            dense_search_result_t result = index.search(vector, wanted, config);
+            dense_search_result_t result = index.search(vector, wanted, 0, exact);
             if (!result) {
                 atomic_error = result.error.release();
                 return false;
@@ -409,7 +400,9 @@ static void search_typed(                                       //
  *  @return Tuple with:
  *      1. matrix of neighbors,
  *      2. matrix of distances,
- *      3. array with match counts.
+ *      3. array with match counts,
+ *      4. number of visited nodes,
+ *      4. number of computed pairwise distances.
  */
 template <typename index_at>
 static py::tuple search_many_in_index( //
@@ -575,6 +568,115 @@ static py::tuple search_many_brute_force(    //
     return results;
 }
 
+template <typename scalar_at>
+static void cluster_typed(                                              //
+    dense_index_py_t& index, py::buffer_info& vectors_info,             //
+    std::size_t level, std::size_t threads,                             //
+    py::array_t<key_t>& keys_py, py::array_t<distance_t>& distances_py, //
+    std::atomic<std::size_t>& stats_visited_members, std::atomic<std::size_t>& stats_computed_distances) {
+
+    auto keys_py1d = keys_py.template mutable_unchecked<1>();
+    auto distances_py1d = distances_py.template mutable_unchecked<1>();
+
+    Py_ssize_t vectors_count = vectors_info.shape[0];
+    byte_t const* vectors_data = reinterpret_cast<byte_t const*>(vectors_info.ptr);
+
+    if (!threads)
+        threads = std::thread::hardware_concurrency();
+    if (!index.reserve(index_limits_t(index.size(), threads)))
+        throw std::invalid_argument("Out of memory!");
+
+    atomic_error_t atomic_error{nullptr};
+    executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
+        scalar_at const* vector = (scalar_at const*)(vectors_data + task_idx * vectors_info.strides[0]);
+        dense_cluster_result_t result = index.cluster(vector, level, thread_idx);
+        if (!result) {
+            atomic_error = result.error.release();
+            return false;
+        }
+
+        keys_py1d(task_idx) = result.cluster.member.key;
+        distances_py1d(task_idx) = result.cluster.distance;
+
+        stats_visited_members += result.visited_members;
+        stats_computed_distances += result.computed_distances;
+
+        // We don't want to check for signals from multiple threads
+        if (thread_idx == 0)
+            if (PyErr_CheckSignals() != 0)
+                return false;
+        return true;
+    });
+
+    // Raise the error from a single thread
+    auto error = atomic_error.load();
+    if (error) {
+        PyErr_SetString(PyExc_RuntimeError, error);
+        throw py::error_already_set();
+    }
+}
+
+/**
+ *  @param vectors Matrix of vectors to search for.
+ *  @param level Graph level to query.
+ *
+ *  @return Tuple with:
+ *      1. vector of cluster IDs,
+ *      2. vector of distances to those clusters,
+ *      3. array with match counts, set to all ones,
+ *      4. number of visited nodes,
+ *      4. number of computed pairwise distances.
+ */
+template <typename index_at>
+static py::tuple cluster_many_in_index( //
+    index_at& index, py::buffer vectors, std::size_t level, std::size_t threads) {
+
+    if (level == 0)
+        return py::tuple(5);
+
+    if (index.limits().threads_search < threads)
+        throw std::invalid_argument("Can't use that many threads!");
+
+    py::buffer_info vectors_info = vectors.request();
+    if (vectors_info.ndim != 2)
+        throw std::invalid_argument("Expects a matrix of vectors to add!");
+
+    Py_ssize_t vectors_count = vectors_info.shape[0];
+    Py_ssize_t vectors_dimensions = vectors_info.shape[1];
+    if (vectors_dimensions != static_cast<Py_ssize_t>(index.scalar_words()))
+        throw std::invalid_argument("The number of vector dimensions doesn't match!");
+
+    py::array_t<key_t> keys_py(vectors_count);
+    py::array_t<distance_t> distances_py(vectors_count);
+    py::array_t<Py_ssize_t> counts_py(vectors_count);
+    std::atomic<std::size_t> stats_visited_members(0);
+    std::atomic<std::size_t> stats_computed_distances(0);
+
+    // Those would be set for one for al entries, in case of success
+    auto counts_py1d = counts_py.template mutable_unchecked<1>();
+    for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx)
+        counts_py1d(vector_idx) = 0;
+
+    // clang-format off
+    switch (numpy_string_to_kind(vectors_info.format)) {
+    case scalar_kind_t::b1x8_k: cluster_typed<b1x8_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
+    case scalar_kind_t::i8_k: cluster_typed<i8_bits_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
+    case scalar_kind_t::f16_k: cluster_typed<f16_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
+    case scalar_kind_t::f32_k: cluster_typed<f32_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
+    case scalar_kind_t::f64_k: cluster_typed<f64_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
+    default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + vectors_info.format);
+    }
+    // clang-format on
+
+    py::tuple results(5);
+    results[0] = keys_py;
+    results[1] = distances_py;
+    results[2] = counts_py;
+    results[3] = stats_visited_members.load();
+    results[4] = stats_computed_distances.load();
+    return results;
+}
+
 static std::unordered_map<key_t, key_t> join_index(       //
     dense_index_py_t const& a, dense_index_py_t const& b, //
     std::size_t max_proposals, bool exact) {
@@ -595,10 +697,11 @@ static std::unordered_map<key_t, key_t> join_index(       //
     return a_to_b;
 }
 
-static dense_index_py_t copy_index(dense_index_py_t const& index) {
+static dense_index_py_t copy_index(dense_index_py_t const& index, bool force_copy) {
 
     using copy_result_t = typename dense_index_py_t::copy_result_t;
-    index_copy_config_t config;
+    index_dense_copy_config_t config;
+    config.force_vector_copy = force_copy;
     copy_result_t result = index.copy(config);
     forward_error(result);
     return std::move(result.index);
@@ -792,6 +895,13 @@ PYBIND11_MODULE(compiled, m) {
         py::arg("threads") = 0                             //
     );
 
+    i.def(                                                   //
+        "cluster", &cluster_many_in_index<dense_index_py_t>, //
+        py::arg("query"),                                    //
+        py::arg("level") = 1,                                //
+        py::arg("threads") = 0                               //
+    );
+
     i.def(
         "rename",
         [](dense_index_py_t& index, key_t from, key_t to) -> bool {
@@ -883,7 +993,7 @@ PYBIND11_MODULE(compiled, m) {
     i.def("view", &view_index<dense_index_py_t>, py::arg("path"));
     i.def("reset", &reset_index<dense_index_py_t>);
     i.def("clear", &clear_index<dense_index_py_t>);
-    i.def("copy", &copy_index);
+    i.def("copy", &copy_index, py::kw_only(), py::arg("copy") = true);
     i.def("compact", &compact_index);
     i.def("join", &join_index, py::arg("other"), py::arg("max_proposals") = 0, py::arg("exact") = false);
 
diff --git a/python/scripts/test.py b/python/scripts/test.py
index 06437512..0801c0f8 100644
--- a/python/scripts/test.py
+++ b/python/scripts/test.py
@@ -287,6 +287,11 @@ def test_index_batch(
         for idx in range(len(matches_viewed)):
             assert np.all(matches_viewed[idx].keys == matches[idx].keys)
 
+    # Test clustering
+    if batch_size > 1:
+        clusters: BatchMatches = index.cluster(vectors, 1, threads=2)
+        assert len(clusters.keys) == batch_size
+
     # Cleanup
     index.reset()
     os.remove(temporary_usearch_filename)
diff --git a/python/usearch/index.py b/python/usearch/index.py
index cd0ad2e6..13d799a2 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -162,9 +162,17 @@ def distil_batch(
         pbar.close()
         return distil_batch(
             BatchMatches(
-                keys=np.vstack([m.keys for m in tasks_matches]),
-                distances=np.vstack([m.distances for m in tasks_matches]),
+                # Keys array can be 1 or 2 dimensional
+                keys=np.vstack([m.keys for m in tasks_matches])
+                if tasks_matches[0].keys.ndim == 2
+                else np.concatenate([m.keys for m in tasks_matches], axis=None),
+                # Distances array can be 1 or 2 dimensional
+                distances=np.vstack([m.distances for m in tasks_matches])
+                if tasks_matches[0].distances.ndim == 2
+                else np.concatenate([m.distances for m in tasks_matches], axis=None),
                 counts=np.concatenate([m.counts for m in tasks_matches], axis=None),
+                visited_members=sum([m.visited_members for m in tasks_matches]),
+                computed_distances=sum([m.computed_distances for m in tasks_matches]),
             )
         )
 
@@ -574,6 +582,49 @@ def search(
             batch_size=batch_size,
         )
 
+    def cluster(
+        self,
+        vectors,
+        level: int = 1,
+        *,
+        threads: int = 0,
+        log: Union[str, bool] = False,
+        batch_size: int = 0,
+    ) -> Union[Matches, BatchMatches]:
+        """
+        Performs approximate nearest neighbors search for one or more queries.
+
+        :param vectors: Query vector or vectors.
+        :type vectors: np.ndarray
+        :param level: Graph level to target - higher means coarse, defaults to 1
+        :type level: int, optional
+        :param threads: Optimal number of cores to use, defaults to 0
+        :type threads: int, optional
+        :param log: Whether to print the progress bar, default to False
+        :type log: Union[str, bool], optional
+        :param batch_size: Number of vectors to process at once, defaults to 0
+        :type batch_size: int, optional
+        :return: Matches for one or more queries
+        :rtype: Union[Matches, BatchMatches]
+        """
+
+        compiled_clustering = self._compiled.cluster
+
+        class WrappedDataset:
+            def search(self, query, k, **kwargs):
+                kwargs.pop("exact")
+                return compiled_clustering(query, k, **kwargs)
+
+        return _search_in_compiled(
+            compiled=WrappedDataset(),
+            vectors=vectors,
+            k=level,
+            exact=False,
+            threads=threads,
+            log=log,
+            batch_size=batch_size,
+        )
+
     def remove(
         self,
         keys: Union[int, Iterable[int]],
@@ -994,9 +1045,6 @@ def search(
         raise ValueError("The `metric` must be a `CompiledMetric` or a `MetricKind`")
 
     class WrappedDataset:
-        def __init__(self) -> None:
-            pass
-
         def search(self, query, k, **kwargs):
             kwargs.pop("exact")
             kwargs.update(

From d9bc92b1efbc243ddd4ace143b40af6eef3bef33 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Fri, 11 Aug 2023 12:05:13 +0400
Subject: [PATCH 15/70] Refactor: Args to top-level interface

---
 cpp/bench.cpp                     |  4 ++--
 cpp/test.cpp                      | 21 ++++++++++++---------
 include/usearch/index.hpp         |  4 ++--
 include/usearch/index_plugins.hpp | 19 ++++++++++++++++++-
 javascript/usearch.d.ts           | 10 +++++++++-
 setup.py                          |  2 +-
 6 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index be9c21dc..3ed6ed43 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -291,7 +291,7 @@ void index_many(index_at& index, std::size_t n, vector_id_at const* ids, real_at
         config.thread = omp_get_thread_num();
 #endif
         float_span_t vector{vectors + dims * i, dims};
-        index.add(ids[i], vector, config);
+        index.add(ids[i], vector, config.thread);
         printer.progress++;
         if (config.thread == 0)
             printer.refresh();
@@ -315,7 +315,7 @@ void search_many( //
         config.thread = omp_get_thread_num();
 #endif
         float_span_t vector{vectors + dims * i, dims};
-        index.search(vector, wanted, config).dump_to(ids + wanted * i, distances + wanted * i);
+        index.search(vector, wanted, config.thread).dump_to(ids + wanted * i, distances + wanted * i);
         printer.progress++;
         if (config.thread == 0)
             printer.refresh();
diff --git a/cpp/test.cpp b/cpp/test.cpp
index 8d73abc7..4fff6272 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -55,14 +55,12 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     expect(std::abs(matched_distances[0]) < 0.01);
 
     // Add more entries
-    index_search_config_t search_config;
-    search_config.exact = true;
     index.add(key_second, vector_second, args...);
     index.add(key_third, vector_third, args...);
     expect(index.size() == 3);
 
     // Perform exact search
-    matched_count = index.search(vector_first, 5, args..., search_config).dump_to(matched_labels, matched_distances);
+    matched_count = index.search(vector_first, 5, args...).dump_to(matched_labels, matched_distances);
 
     // Validate scans
     std::size_t count = 0;
@@ -74,6 +72,9 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     expect((count == 3));
     expect((index.stats(0).nodes == 3));
 
+    // Check if clustering endpoint compiles
+    index.cluster(vector_first, 0, args...);
+
     // Try removals and replacements
     if constexpr (punned_ak) {
         using labeling_result_t = typename index_t::labeling_result_t;
@@ -102,9 +103,13 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     executor_default_t executor;
     index.reserve({vectors.size(), executor.size()});
     executor.fixed(vectors.size() - 3, [&](std::size_t thread, std::size_t task) {
-        index_update_config_t config;
-        config.thread = thread;
-        index.add(key_max - task - 3, vectors[task + 3].data(), args..., config);
+        if constexpr (punned_ak) {
+            index.add(key_max - task - 3, vectors[task + 3].data(), args...);
+        } else {
+            index_update_config_t config;
+            config.thread = thread;
+            index.add(key_max - task - 3, vectors[task + 3].data(), args..., config);
+        }
     });
 
     // Search again over mapped index
@@ -210,9 +215,7 @@ template <typename key_at, typename slot_at> void test_tanimoto(std::size_t dime
 
     index.reserve({batch_size + index.size(), executor.size()});
     executor.fixed(batch_size, [&](std::size_t thread, std::size_t task) {
-        index_update_config_t config;
-        config.thread = thread;
-        index.add(task + 25000, scalars.data() + index.scalar_words() * task, config);
+        index.add(task + 25000, scalars.data() + index.scalar_words() * task, thread);
     });
 }
 
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5d5f16e0..f0e66b8d 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -19,7 +19,7 @@
 #define USEARCH_DEFINED_CPP17
 #endif
 
-// Inferring target OS
+// Inferring target OS: Windows, MacOS, or Linux
 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
 #define USEARCH_DEFINED_WINDOWS
 #elif defined(__APPLE__) && defined(__MACH__)
@@ -28,7 +28,7 @@
 #define USEARCH_DEFINED_LINUX
 #endif
 
-// Inferring the compiler
+// Inferring the compiler: Clang vs GCC
 #if defined(__clang__)
 #define USEARCH_DEFINED_CLANG
 #elif defined(__GNUC__)
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 7cb0dbdd..89c0317b 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -30,7 +30,6 @@
 #include <fp16/fp16.h>
 #endif
 #else
-#define USEARCH_USE_NATIVE_F16 0
 #include <fp16/fp16.h>
 #endif
 
@@ -196,6 +195,9 @@ inline char const* isa_name(isa_kind_t isa_kind) noexcept {
 }
 
 inline bool hardware_supports(isa_kind_t isa_kind) noexcept {
+
+    // On Linux Arm machines the `getauxval` can be queried to check
+    // if SVE extensions are available. Arm Neon has no separate capability check.
 #if defined(USEARCH_DEFINED_ARM) && defined(USEARCH_DEFINED_LINUX)
     unsigned long capabilities = getauxval(AT_HWCAP);
     switch (isa_kind) {
@@ -205,6 +207,8 @@ inline bool hardware_supports(isa_kind_t isa_kind) noexcept {
     }
 #endif
 
+    // When compiling with GCC, one may use the "built-ins", including ones
+    // designed for CPU capability detection.
 #if defined(USEARCH_DEFINED_X86) && defined(USEARCH_DEFINED_GCC)
     __builtin_cpu_init();
     switch (isa_kind) {
@@ -214,6 +218,19 @@ inline bool hardware_supports(isa_kind_t isa_kind) noexcept {
     }
 #endif
 
+    // On Apple we can expect Arm devices to support Neon extesions,
+    // and the x86 machines to support AVX2 extensions.
+#if defined(USEARCH_DEFINED_APPLE)
+    switch (isa_kind) {
+#if defined(USEARCH_DEFINED_ARM)
+    case isa_kind_t::neon_k: return true;
+#else
+    case isa_kind_t::avx2_k: return true;
+#endif
+    default: return false;
+    }
+#endif
+
     (void)isa_kind;
     return false;
 }
diff --git a/javascript/usearch.d.ts b/javascript/usearch.d.ts
index bd7a6bd1..a4bf7fcb 100644
--- a/javascript/usearch.d.ts
+++ b/javascript/usearch.d.ts
@@ -23,7 +23,15 @@ export class Index {
      * @param {bigint} expansion_add
      * @param {bigint} expansion_search
      */
-    constructor(...args);
+    constructor(
+        dimensions: bigint,
+        metric: string,
+        quantization: string,
+        capacity: bigint,
+        connectivity: bigint,
+        expansion_add: bigint,
+        expansion_search: bigint
+    );
 
     /**
      * Returns the dimensionality of vectors.
diff --git a/setup.py b/setup.py
index f60903dc..a6fa3182 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
     compile_args.append("-g")  # Simplify debugging
     compile_args.append("-Wno-unknown-pragmas")
 
-    # Linking OpenMP requires additional preparion in CIBuildWheel
+    # Linking OpenMP requires additional preparation in CIBuildWheel
     # macros_args.append(("USEARCH_USE_OPENMP", "1"))
     # compile_args.append("-Xpreprocessor -fopenmp")
     # link_args.append("-Xpreprocessor -lomp")

From 886e29f3a44ea058fc9b174ba7c749866cec7cb2 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Fri, 11 Aug 2023 12:34:41 +0400
Subject: [PATCH 16/70] Refactor: Bindings settgins and docs

---
 Package.swift                      |  2 +
 c/lib.cpp                          | 10 ++++
 c/usearch.h                        |  9 +++
 docs/compilation.md                |  6 +-
 golang/lib.go                      |  2 +-
 java/cloud/unum/usearch/Index.java |  1 +
 rust/lib.cpp                       | 30 ++--------
 rust/lib.hpp                       |  4 +-
 rust/lib.rs                        | 89 ++++++++++++++++++++++++++++++
 9 files changed, 122 insertions(+), 31 deletions(-)

diff --git a/Package.swift b/Package.swift
index b9a898d9..37158ecc 100644
--- a/Package.swift
+++ b/Package.swift
@@ -27,12 +27,14 @@ let package = Package(
             name: "USearch",
             dependencies: ["USearchObjective"],
             path: "swift",
+            exclude: ["README.md", "Test.swift"],
             sources: ["USearch.swift", "Index+Sugar.swift"]
         ),
         .testTarget(
             name: "USearchTests",
             dependencies: ["USearch"],
             path: "swift",
+            exclude: ["USearch.swift", "Index+Sugar.swift", "README.md"],
             sources: ["Test.swift"]
         )
     ],
diff --git a/c/lib.cpp b/c/lib.cpp
index 282bcf29..9570fd87 100644
--- a/c/lib.cpp
+++ b/c/lib.cpp
@@ -198,4 +198,14 @@ USEARCH_EXPORT bool usearch_remove(usearch_index_t index, usearch_key_t key, use
         *error = result.error.release();
     return result.completed;
 }
+
+USEARCH_EXPORT bool usearch_rename(usearch_index_t index, usearch_key_t from, usearch_key_t to,
+                                   usearch_error_t* error) {
+
+    assert(index && error);
+    labeling_result_t result = reinterpret_cast<index_dense_t*>(index)->rename(from, to);
+    if (!result)
+        *error = result.error.release();
+    return result.completed;
+}
 }
diff --git a/c/usearch.h b/c/usearch.h
index 6f9cf7a6..1498a9ac 100644
--- a/c/usearch.h
+++ b/c/usearch.h
@@ -185,6 +185,15 @@ USEARCH_EXPORT bool usearch_get(        //
  */
 USEARCH_EXPORT bool usearch_remove(usearch_index_t, usearch_key_t key, usearch_error_t* error);
 
+/**
+ *  @brief Renames the vector to map to a different key.
+ *  @param[in] from The key of the vector to be renamed.
+ *  @param[in] to New key for found entry.
+ *  @param[out] error Pointer to a string where the error message will be stored, if an error occurs.
+ *  @return `true` if the vector is successfully removed, `false` if the vector is not found.
+ */
+USEARCH_EXPORT bool usearch_rename(usearch_index_t, usearch_key_t from, usearch_key_t to, usearch_error_t* error);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/docs/compilation.md b/docs/compilation.md
index 51d3e24e..cdf948a9 100644
--- a/docs/compilation.md
+++ b/docs/compilation.md
@@ -156,19 +156,19 @@ There are a few ways to compile the C 99 USearch SDK.
 Using the Makefile:
 
 ```sh
-make -C ./c build
+make -C ./c make -C ./c libusearch_c.so
 ```
 
 Using CMake:
 
 ```sh
-cmake -B ./build_release -DUSEARCH_BUILD_C=1 && make -C ./build_release -j
+cmake -B ./build_release -DUSEARCH_BUILD_CLIB=1 && make -C ./build_release -j
 ```
 
 Linux:
 
 ```sh
-g++ -std=c++11 -shared -fPIC c/lib.cpp -I ./include/  -I ./fp16/include/ -I ./robin-map/include/ -o libusearch_c.a
+g++ -std=c++11 -shared -fPIC c/lib.cpp -I ./include/  -I ./fp16/include/ -I ./robin-map/include/ -o libusearch_c.so
 ```
 
 
diff --git a/golang/lib.go b/golang/lib.go
index 39739e84..f13df247 100644
--- a/golang/lib.go
+++ b/golang/lib.go
@@ -7,7 +7,7 @@ import (
 
 /*
 #cgo CFLAGS: -I${SRCDIR}/../c/
-#cgo LDFLAGS: -L${SRCDIR}/. -Wl,-rpath,$SRCDIR/. -lusearch_c
+#cgo LDFLAGS: -L${SRCDIR}/. -Wl,-rpath,\$ORIGIN/ -lusearch_c
 #include <usearch.h>
 #include <stdlib.h>
 */
diff --git a/java/cloud/unum/usearch/Index.java b/java/cloud/unum/usearch/Index.java
index 3b234720..731bd305 100644
--- a/java/cloud/unum/usearch/Index.java
+++ b/java/cloud/unum/usearch/Index.java
@@ -135,6 +135,7 @@ public Config expansion_search(long _expansion_search) {
   public static void main(String[] args) {
     Index index = new Index.Config().metric("cos").dimensions(100).build();
     index.size();
+    System.out.println("Java tests passed!");
   }
 
   private static native long c_create(//
diff --git a/rust/lib.cpp b/rust/lib.cpp
index 603cc5b7..97f42a9d 100644
--- a/rust/lib.cpp
+++ b/rust/lib.cpp
@@ -11,30 +11,6 @@ using labeling_result_t = typename index_t::labeling_result_t;
 
 Index::Index(std::unique_ptr<index_t> index) : index_(std::move(index)) {}
 
-void Index::add_in_thread(key_t key, rust::Slice<float const> vector, size_t thread) const {
-    index_update_config_t config;
-    config.thread = thread;
-    config.expansion = index_->expansion_add();
-    index_->add(key, vector.data(), config).error.raise();
-}
-
-Matches Index::search_in_thread(rust::Slice<float const> vector, size_t count, size_t thread) const {
-    Matches matches;
-    matches.keys.reserve(count);
-    matches.distances.reserve(count);
-    for (size_t i = 0; i != count; ++i)
-        matches.keys.push_back(0), matches.distances.push_back(0);
-    index_search_config_t config;
-    config.thread = thread;
-    config.expansion = index_->expansion_search();
-    search_result_t result = index_->search(vector.data(), count, config);
-    result.error.raise();
-    count = result.dump_to(matches.keys.data(), matches.distances.data());
-    matches.keys.truncate(count);
-    matches.distances.truncate(count);
-    return matches;
-}
-
 void Index::add(key_t key, rust::Slice<float const> vector) const { index_->add(key, vector.data()).error.raise(); }
 
 bool Index::remove(key_t key) const {
@@ -43,6 +19,12 @@ bool Index::remove(key_t key) const {
     return result.completed;
 }
 
+bool Index::rename(key_t from, key_t to) const {
+    labeling_result_t result = index_->rename(from, to);
+    result.error.raise();
+    return result.completed;
+}
+
 bool Index::contains(key_t key) const { return index_->contains(key); }
 
 Matches Index::search(rust::Slice<float const> vector, size_t count) const {
diff --git a/rust/lib.hpp b/rust/lib.hpp
index 705f0d17..e3422219 100644
--- a/rust/lib.hpp
+++ b/rust/lib.hpp
@@ -22,13 +22,11 @@ class Index {
     void reserve(size_t) const;
 
     void add(key_t key, rust::Slice<float const> vector) const;
-    void add_in_thread(key_t key, rust::Slice<float const> vector, size_t thread) const;
-
     Matches search(rust::Slice<float const> vector, size_t count) const;
-    Matches search_in_thread(rust::Slice<float const> vector, size_t count, size_t thread) const;
 
     bool remove(key_t key) const;
     bool contains(key_t key) const;
+    bool rename(key_t from, key_t to) const;
 
     size_t dimensions() const;
     size_t connectivity() const;
diff --git a/rust/lib.rs b/rust/lib.rs
index 1c9eb9d2..0ae28b3d 100644
--- a/rust/lib.rs
+++ b/rust/lib.rs
@@ -39,25 +39,114 @@ pub mod ffi {
     unsafe extern "C++" {
         include!("lib.hpp");
 
+        /// Represents the USearch index.
         type Index;
 
+        /// Initializes a new instance of the index with the provided options.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `options` - A reference to the `IndexOptions` structure containing initialization options.
+        /// 
+        /// # Returns
+        /// 
+        /// A `Result` which is `Ok` if the index is successfully initialized, or `Err` if an error occurs.
         pub fn new_index(options: &IndexOptions) -> Result<UniquePtr<Index>>;
 
+        /// Reserves memory for a specified number of incoming vectors.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `capacity` - The desired total capacity including the current size.
         pub fn reserve(self: &Index, capacity: usize) -> Result<()>;
 
+        /// Retrieves the number of dimensions in the vectors indexed.
         pub fn dimensions(self: &Index) -> usize;
+
+        /// Retrieves the connectivity parameter that limits connections-per-node in the graph.
         pub fn connectivity(self: &Index) -> usize;
+
+        /// Retrieves the current number of vectors in the index.
         pub fn size(self: &Index) -> usize;
+
+        /// Retrieves the total capacity of the index, including reserved space.
         pub fn capacity(self: &Index) -> usize;
 
+        /// Adds a vector with a specified key to the index.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `key` - The key associated with the vector.
+        /// * `vector` - A slice containing the vector data.
         pub fn add(self: &Index, key: u64, vector: &[f32]) -> Result<()>;
+
+        /// Performs k-Approximate Nearest Neighbors (kANN) Search for closest vectors to the provided query.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `query` - A slice containing the query vector data.
+        /// * `count` - The maximum number of neighbors to search for.
+        /// 
+        /// # Returns
+        /// 
+        /// A `Result` containing the matches found.
         pub fn search(self: &Index, query: &[f32], count: usize) -> Result<Matches>;
+
+        /// Removes the vector associated with the given key from the index.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `key` - The key of the vector to be removed.
+        /// 
+        /// # Returns
+        /// 
+        /// `true` if the vector is successfully removed, `false` otherwise.
         pub fn remove(self: &Index, key: u64) -> Result<bool>;
+
+        /// Renames the vector under a certain key.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `from` - The key of the vector to be renamed.
+        /// * `to` - The new name.
+        /// 
+        /// # Returns
+        /// 
+        /// `true` if the vector is successfully renamed, `false` otherwise.
+        pub fn rename(self: &Index, from: u64, to: u64) -> Result<bool>;
+
+        /// Checks if the index contains a vector with a specified key.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `key` - The key to be checked.
+        /// 
+        /// # Returns
+        /// 
+        /// `true` if the index contains the vector with the given key, `false` otherwise.
         pub fn contains(self: &Index, key: u64) -> bool;
 
+        /// Saves the index to a specified file.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `path` - The file path where the index will be saved.
         pub fn save(self: &Index, path: &str) -> Result<()>;
+
+        /// Loads the index from a specified file.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `path` - The file path from where the index will be loaded.
         pub fn load(self: &Index, path: &str) -> Result<()>;
+
+        /// Creates a view of the index from a file without loading it into memory.
+        /// 
+        /// # Arguments
+        /// 
+        /// * `path` - The file path from where the view will be created.
         pub fn view(self: &Index, path: &str) -> Result<()>;
+
     }
 }
 

From a1b9b3883785cf8f16e02b932b7ddf00c7058645 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Fri, 11 Aug 2023 17:40:46 +0400
Subject: [PATCH 17/70] Refactor: Prepare to move GoLang builds

---
 .github/workflows/prerelease.yml | 16 --------------
 golang/README.md                 | 38 ++++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
index fe8fcfe1..b6c030cb 100644
--- a/.github/workflows/prerelease.yml
+++ b/.github/workflows/prerelease.yml
@@ -122,22 +122,6 @@ jobs:
           toolchain: stable
           override: true
 
-  test_golang:
-    name: Test GoLang
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    - run: git submodule update --init --recursive
-    - name: Set up Go
-      uses: actions/setup-go@v4
-      with:
-        go-version: '1.15'
-    - name: Build C library for cGo
-      run: |
-        make -C ./c libusearch_c.so
-        mv ./c/libusearch_c.so ./golang/libusearch_c.so
-        cd golang && ls && go test -v
-
   test_java:
     name: Test Java
     runs-on: ubuntu-latest
diff --git a/golang/README.md b/golang/README.md
index 16b4c19c..b623ad46 100644
--- a/golang/README.md
+++ b/golang/README.md
@@ -4,7 +4,7 @@
 
 ```golang
 import (
-	"github.com/unum-cloud/usearch/golang"
+	"github.com/unum-cloud/usearch/golang-go"
 )
 ```
 
@@ -15,14 +15,38 @@ package main
 
 import (
 	"fmt"
-	"github.com/unum-cloud/usearch/golang"
+	"github.com/unum-cloud/usearch/golang-go"
 )
 
 func main() {
-	conf := usearch.DefaultConfig(128)
-	index := usearch.NewIndex(conf)
-	v := make([]float32, 128)
-	index.Add(42, v)
-	results := index.Search(v, 1)
+	dim := uint(128)
+	conf := DefaultConfig(dim)
+	ind, err := NewIndex(conf)
+	if err != nil {
+		panic("Failed to construct the index: %s", err)
+	}
+	defer ind.Destroy()
+
+	err = ind.Reserve(100)
+	if err != nil {
+		panic("Failed to reserve capacity: %s", err)
+	}
+
+	vec := make([]float32, dim)
+	vec[0] = 40.0
+	vec[1] = 2.0
+
+	err = ind.Add(42, vec)
+	if err != nil {
+		panic("Failed to insert: %s", err)
+	}
+
+	keys, distances, err := ind.Search(vec, 10)
+	if err != nil {
+		panic("Failed to search: %s", err)
+	}
+	if keys[0] != 42 || distances[0] != 0.0 {
+		panic("Expected result 42 with distance 0, got key %d with distance %f", keys[0], distances[0])
+	}
 }
 ```

From 1b3ae10dfef275b3a7ca906e1faca016992a6fd8 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Fri, 11 Aug 2023 18:02:30 +0400
Subject: [PATCH 18/70] Docs: Mention integratiosn

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 5102e9bc..8c394bf9 100644
--- a/README.md
+++ b/README.md
@@ -319,10 +319,10 @@ matches = index.search(fingerprints, 10)
 
 ## Integrations
 
-- [x] GPT-Cache.
-- [x] LangChain.
-- [ ] ClickHouse.
-- [ ] Microsoft Semantic Kernel.
+- [x] GPTCache: [Python](https://github.com/zilliztech/GPTCache/releases/tag/0.1.29).
+- [x] LangChain: [Python](https://github.com/langchain-ai/langchain/releases/tag/v0.0.257) and [JavaScipt](https://github.com/hwchase17/langchainjs/releases/tag/0.0.125).
+- [ ] Microsoft Semantic Kernel: [Python](https://github.com/microsoft/semantic-kernel/pull/2358) and C#.
+- [ ] ClickHouse: C++.
 
 ## Citations
 
@@ -332,8 +332,8 @@ doi = {10.5281/zenodo.7949416},
 author = {Vardanian, Ash},
 title = {{USearch by Unum Cloud}},
 url = {https://github.com/unum-cloud/usearch},
-version = {0.13.0},
-year = {2022}
+version = {1.0.0},
+year = {2022},
 month = jun,
 }
 ```

From 1227ff0fc0636e88de5caa05d6d79202448f162a Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 13 Aug 2023 18:28:08 +0400
Subject: [PATCH 19/70] Add: Rename, remove, count and check in batches

---
 include/usearch/index_dense.hpp |  60 ++--
 python/lib.cpp                  | 269 ++++++++++-----
 python/scripts/test.py          |  30 +-
 python/usearch/eval.py          |   2 +-
 python/usearch/index.py         | 592 +++++++++++++++++++-------------
 5 files changed, 595 insertions(+), 358 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 2b9cc414..89babbc8 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -96,7 +96,7 @@ struct index_dense_config_t : public index_config_t {
     std::size_t expansion_add = default_expansion_add();
     std::size_t expansion_search = default_expansion_search();
     bool exclude_vectors = false;
-    bool allow_key_collisions = false;
+    bool ban_collisions = false;
 
     index_dense_config_t(index_config_t base) noexcept : index_config_t(base) {}
 
@@ -528,11 +528,11 @@ class index_dense_gt {
     cluster_result_t cluster(f32_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f32); }
     cluster_result_t cluster(f64_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f64); }
 
-    bool get(key_t key, b1x8_t* vector) const { return get_(key, vector, casts_.to_b1x8); }
-    bool get(key_t key, i8_bits_t* vector) const { return get_(key, vector, casts_.to_i8); }
-    bool get(key_t key, f16_t* vector) const { return get_(key, vector, casts_.to_f16); }
-    bool get(key_t key, f32_t* vector) const { return get_(key, vector, casts_.to_f32); }
-    bool get(key_t key, f64_t* vector) const { return get_(key, vector, casts_.to_f64); }
+    bool get(key_t key, b1x8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_b1x8); }
+    bool get(key_t key, i8_bits_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_i8); }
+    bool get(key_t key, f16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f16); }
+    bool get(key_t key, f32_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f32); }
+    bool get(key_t key, f64_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f64); }
     // clang-format on
 
     /**
@@ -1197,7 +1197,7 @@ class index_dense_gt {
         key_t key, scalar_at const* vector, //
         std::size_t thread, bool force_vector_copy, cast_t const& cast) {
 
-        if (!config_.allow_key_collisions && contains(key))
+        if (config_.ban_collisions && contains(key))
             return add_result_t{}.failed("Duplicate keys not allowed in high-level wrappers");
 
         // Cast the vector, if needed for compatibility with `metric_`
@@ -1319,22 +1319,40 @@ class index_dense_gt {
         }
     }
 
-    template <typename scalar_at> bool get_(key_t key, scalar_at* reconstructed, cast_t const& cast) const {
-        compressed_slot_t slot;
-        // Find the matching ID
-        {
+    template <typename scalar_at>
+    std::size_t get_(key_t key, scalar_at* reconstructed, std::size_t vectors_limit, cast_t const& cast) const {
+
+        if (config_.ban_collisions) {
+            compressed_slot_t slot;
+            // Find the matching ID
+            {
+                shared_lock_t lock(slot_lookup_mutex_);
+                auto it = slot_lookup_.find(key);
+                if (it == slot_lookup_.end())
+                    return false;
+                slot = (*it).slot;
+            }
+            // Export the entry
+            byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
+            bool casted = cast(punned_vector, dimensions(), (byte_t*)reconstructed);
+            if (!casted)
+                std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector());
+            return true;
+        } else {
             shared_lock_t lock(slot_lookup_mutex_);
-            auto it = slot_lookup_.find(key);
-            if (it == slot_lookup_.end())
-                return false;
-            slot = (*it).slot;
+            auto equal_range_pair = slot_lookup_.equal_range(key);
+            std::size_t count_exported = 0;
+            for (auto begin = equal_range_pair.first;
+                 begin != equal_range_pair.second && count_exported != vectors_limit; ++begin, ++count_exported) {
+                compressed_slot_t slot = (*begin).slot;
+                byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
+                byte_t* reconstructed_vector = (byte_t*)reconstructed + metric_.bytes_per_vector() * count_exported;
+                bool casted = cast(punned_vector, dimensions(), reconstructed_vector);
+                if (!casted)
+                    std::memcpy(reconstructed_vector, punned_vector, metric_.bytes_per_vector());
+            }
+            return count_exported;
         }
-        // Export the entry
-        byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
-        bool casted = cast(punned_vector, dimensions(), (byte_t*)reconstructed);
-        if (!casted)
-            std::memcpy(reconstructed, punned_vector, metric_.bytes_per_vector());
-        return true;
     }
 
     template <typename to_scalar_at> static casts_t make_casts_() {
diff --git a/python/lib.cpp b/python/lib.cpp
index 1889a637..3d96d281 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -67,12 +67,12 @@ struct dense_index_py_t : public index_dense_t {
 struct dense_indexes_py_t {
     std::vector<std::shared_ptr<dense_index_py_t>> shards_;
 
-    void add(std::shared_ptr<dense_index_py_t> shard) { shards_.push_back(shard); }
+    void merge(std::shared_ptr<dense_index_py_t> shard) { shards_.push_back(shard); }
     std::size_t bytes_per_vector() const noexcept { return shards_.empty() ? 0 : shards_[0]->bytes_per_vector(); }
     std::size_t scalar_words() const noexcept { return shards_.empty() ? 0 : shards_[0]->scalar_words(); }
     index_limits_t limits() const noexcept { return {size(), std::numeric_limits<std::size_t>::max()}; }
 
-    void add_paths(std::vector<std::string> const& paths, bool view = true, std::size_t threads = 0) {
+    void merge_paths(std::vector<std::string> const& paths, bool view = true, std::size_t threads = 0) {
         if (!threads)
             threads = std::thread::hardware_concurrency();
 
@@ -100,7 +100,7 @@ struct dense_indexes_py_t {
 };
 
 template <typename scalar_at>
-metric_t typed_udf(                                                                  //
+metric_t wrap_typed_user_defined_metric(                                             //
     metric_kind_t kind, metric_signature_t signature, std::uintptr_t metric_uintptr, //
     scalar_kind_t scalar_kind, std::size_t dimensions) {
     //
@@ -134,16 +134,21 @@ metric_t typed_udf(
     return metric_t(stl_function, dimensions, kind, scalar_kind);
 }
 
-metric_t udf(                                                                        //
+metric_t wrap_user_defined_metric(                                                   //
     metric_kind_t kind, metric_signature_t signature, std::uintptr_t metric_uintptr, //
     scalar_kind_t scalar_kind, std::size_t dimensions) {
 
     switch (scalar_kind) {
-    case scalar_kind_t::b1x8_k: return typed_udf<b1x8_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
-    case scalar_kind_t::i8_k: return typed_udf<i8_bits_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
-    case scalar_kind_t::f16_k: return typed_udf<f16_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
-    case scalar_kind_t::f32_k: return typed_udf<f32_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
-    case scalar_kind_t::f64_k: return typed_udf<f64_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
+    case scalar_kind_t::b1x8_k:
+        return wrap_typed_user_defined_metric<b1x8_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
+    case scalar_kind_t::i8_k:
+        return wrap_typed_user_defined_metric<i8_bits_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
+    case scalar_kind_t::f16_k:
+        return wrap_typed_user_defined_metric<f16_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
+    case scalar_kind_t::f32_k:
+        return wrap_typed_user_defined_metric<f32_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
+    case scalar_kind_t::f64_k:
+        return wrap_typed_user_defined_metric<f64_t>(kind, signature, metric_uintptr, scalar_kind, dimensions);
     default: return {};
     }
 }
@@ -156,12 +161,15 @@ static dense_index_py_t make_index(      //
     std::size_t expansion_search,        //
     metric_kind_t metric_kind,           //
     metric_signature_t metric_signature, //
-    std::uintptr_t metric_uintptr) {
+    std::uintptr_t metric_uintptr,       //
+    bool multi) {
 
     index_dense_config_t config(connectivity, expansion_add, expansion_search);
+    config.ban_collisions = !multi;
+
     metric_t metric =  //
         metric_uintptr //
-            ? udf(metric_kind, metric_signature, metric_uintptr, scalar_kind, dimensions)
+            ? wrap_user_defined_metric(metric_kind, metric_signature, metric_uintptr, scalar_kind, dimensions)
             : metric_t(dimensions, metric_kind, scalar_kind);
     return index_dense_t::make(metric, config);
 }
@@ -241,7 +249,7 @@ static void add_many_to_index(                            //
         throw std::invalid_argument("Incompatible key type!");
 
     if (keys_info.ndim != 1)
-        throw std::invalid_argument("Labels must be placed in a single-dimensional array!");
+        throw std::invalid_argument("Keys must be placed in a single-dimensional array!");
 
     if (vectors_info.ndim != 2)
         throw std::invalid_argument("Expects a matrix of vectors to add!");
@@ -545,7 +553,7 @@ static py::tuple search_many_brute_force(    //
     std::size_t dimensions = static_cast<std::size_t>(queries_dimensions);
     metric_t metric =  //
         metric_uintptr //
-            ? udf(metric_kind, metric_signature, metric_uintptr, queries_kind, dimensions)
+            ? wrap_user_defined_metric(metric_kind, metric_signature, metric_uintptr, queries_kind, dimensions)
             : metric_t(dimensions, metric_kind, queries_kind);
 
     // clang-format off
@@ -575,8 +583,8 @@ static void cluster_typed(                                              //
     py::array_t<key_t>& keys_py, py::array_t<distance_t>& distances_py, //
     std::atomic<std::size_t>& stats_visited_members, std::atomic<std::size_t>& stats_computed_distances) {
 
-    auto keys_py1d = keys_py.template mutable_unchecked<1>();
-    auto distances_py1d = distances_py.template mutable_unchecked<1>();
+    auto keys_py2d = keys_py.template mutable_unchecked<2>();
+    auto distances_py2d = distances_py.template mutable_unchecked<2>();
 
     Py_ssize_t vectors_count = vectors_info.shape[0];
     byte_t const* vectors_data = reinterpret_cast<byte_t const*>(vectors_info.ptr);
@@ -595,8 +603,8 @@ static void cluster_typed(                                              //
             return false;
         }
 
-        keys_py1d(task_idx) = result.cluster.member.key;
-        distances_py1d(task_idx) = result.cluster.distance;
+        keys_py2d(task_idx, 0) = result.cluster.member.key;
+        distances_py2d(task_idx, 0) = result.cluster.distance;
 
         stats_visited_members += result.visited_members;
         stats_computed_distances += result.computed_distances;
@@ -646,8 +654,8 @@ static py::tuple cluster_many_in_index( //
     if (vectors_dimensions != static_cast<Py_ssize_t>(index.scalar_words()))
         throw std::invalid_argument("The number of vector dimensions doesn't match!");
 
-    py::array_t<key_t> keys_py(vectors_count);
-    py::array_t<distance_t> distances_py(vectors_count);
+    py::array_t<key_t> keys_py({vectors_count, Py_ssize_t(1)});
+    py::array_t<distance_t> distances_py({vectors_count, Py_ssize_t(1)});
     py::array_t<Py_ssize_t> counts_py(vectors_count);
     std::atomic<std::size_t> stats_visited_members(0);
     std::atomic<std::size_t> stats_computed_distances(0);
@@ -729,53 +737,49 @@ template <typename index_at> typename index_at::stats_t compute_level_stats(inde
 // clang-format on
 
 template <typename internal_at, typename external_at = internal_at, typename index_at = void>
-py::object get_typed_member(index_at const& index, key_t key) {
-    py::array_t<external_at> result_py(static_cast<Py_ssize_t>(index.scalar_words()));
-    auto result_py1d = result_py.template mutable_unchecked<1>();
-    if (!index.get(key, (internal_at*)&result_py1d(0)))
-        return py::none();
-    return py::object(result_py);
+static py::tuple get_typed_vectors_for_keys(index_at const& index, py::buffer keys) {
+
+    py::buffer_info keys_info = keys.request();
+    if (keys_info.ndim != 1)
+        throw std::invalid_argument("Keys must be placed in a single-dimensional array!");
+
+    Py_ssize_t keys_count = keys_info.shape[0];
+    byte_t const* keys_data = reinterpret_cast<byte_t const*>(keys_info.ptr);
+    py::tuple results(keys_count);
+
+    for (Py_ssize_t task_idx = 0; task_idx != keys_count; ++task_idx) {
+        key_t key = *reinterpret_cast<key_t const*>(keys_data + task_idx * keys_info.strides[0]);
+        std::size_t vectors_count = index.count(key);
+        if (!vectors_count) {
+            results[task_idx] = py::none();
+            continue;
+        }
+
+        py::array_t<external_at> result_py({static_cast<Py_ssize_t>(vectors_count), //
+                                            static_cast<Py_ssize_t>(index.scalar_words())});
+        auto result_py2d = result_py.template mutable_unchecked<2>();
+        index.get(key, (internal_at*)&result_py2d(0, 0), vectors_count);
+        results[task_idx] = result_py;
+    }
+
+    return results;
 }
 
-template <typename index_at> py::object get_member(index_at const& index, key_t key, scalar_kind_t scalar_kind) {
+template <typename index_at> py::tuple get_many(index_at const& index, py::buffer keys, scalar_kind_t scalar_kind) {
     if (scalar_kind == scalar_kind_t::f32_k)
-        return get_typed_member<f32_t>(index, key);
+        return get_typed_vectors_for_keys<f32_t>(index, keys);
     else if (scalar_kind == scalar_kind_t::f64_k)
-        return get_typed_member<f64_t>(index, key);
+        return get_typed_vectors_for_keys<f64_t>(index, keys);
     else if (scalar_kind == scalar_kind_t::f16_k)
-        return get_typed_member<f16_t, std::uint16_t>(index, key);
+        return get_typed_vectors_for_keys<f16_t, std::uint16_t>(index, keys);
     else if (scalar_kind == scalar_kind_t::i8_k)
-        return get_typed_member<i8_bits_t, std::int8_t>(index, key);
+        return get_typed_vectors_for_keys<i8_bits_t, std::int8_t>(index, keys);
     else if (scalar_kind == scalar_kind_t::b1x8_k)
-        return get_typed_member<b1x8_t, std::uint8_t>(index, key);
+        return get_typed_vectors_for_keys<b1x8_t, std::uint8_t>(index, keys);
     else
         throw std::invalid_argument("Incompatible scalars in the query matrix!");
 }
 
-template <typename index_at> py::array_t<key_t> get_keys(index_at const& index, std::size_t offset, std::size_t limit) {
-    limit = std::min(index.size(), limit);
-    py::array_t<key_t> result_py(static_cast<Py_ssize_t>(limit));
-    auto result_py1d = result_py.template mutable_unchecked<1>();
-    index.export_keys(&result_py1d(0), offset, limit);
-    return result_py;
-}
-
-template <typename index_at> py::array_t<key_t> get_all_keys(index_at const& index) {
-    return get_keys(index, 0, index.size());
-}
-
-template <typename element_at> bool has_duplicates(element_at const* begin, element_at const* end) {
-    if (begin == end)
-        return false;
-    element_at const* last = begin;
-    begin++;
-    for (; begin != end; ++begin, ++last) {
-        if (*begin == *last)
-            return true;
-    }
-    return false;
-}
-
 PYBIND11_MODULE(compiled, m) {
     m.doc() = "Smaller & Faster Single-File Vector Search Engine from Unum";
 
@@ -875,44 +879,74 @@ PYBIND11_MODULE(compiled, m) {
           py::arg("expansion_search") = default_expansion_search(),        //
           py::arg("metric_kind") = metric_kind_t::cos_k,                   //
           py::arg("metric_signature") = metric_signature_t::array_array_k, //
-          py::arg("metric_pointer") = 0                                    //
+          py::arg("metric_pointer") = 0,                                   //
+          py::arg("multi") = false                                         //
     );
 
-    i.def(                                           //
-        "add", &add_many_to_index<dense_index_py_t>, //
-        py::arg("keys"),                             //
-        py::arg("vectors"),                          //
-        py::kw_only(),                               //
-        py::arg("copy") = true,                      //
-        py::arg("threads") = 0                       //
+    i.def(                                                //
+        "add_many", &add_many_to_index<dense_index_py_t>, //
+        py::arg("keys"),                                  //
+        py::arg("vectors"),                               //
+        py::kw_only(),                                    //
+        py::arg("copy") = true,                           //
+        py::arg("threads") = 0                            //
     );
 
-    i.def(                                                 //
-        "search", &search_many_in_index<dense_index_py_t>, //
-        py::arg("query"),                                  //
-        py::arg("count") = 10,                             //
-        py::arg("exact") = false,                          //
-        py::arg("threads") = 0                             //
+    i.def(                                                      //
+        "search_many", &search_many_in_index<dense_index_py_t>, //
+        py::arg("query"),                                       //
+        py::arg("count") = 10,                                  //
+        py::arg("exact") = false,                               //
+        py::arg("threads") = 0                                  //
     );
 
-    i.def(                                                   //
-        "cluster", &cluster_many_in_index<dense_index_py_t>, //
-        py::arg("query"),                                    //
-        py::arg("level") = 1,                                //
-        py::arg("threads") = 0                               //
+    i.def(                                                        //
+        "cluster_many", &cluster_many_in_index<dense_index_py_t>, //
+        py::arg("query"),                                         //
+        py::arg("level") = 1,                                     //
+        py::arg("threads") = 0                                    //
     );
 
     i.def(
-        "rename",
+        "rename_one_to_one",
         [](dense_index_py_t& index, key_t from, key_t to) -> bool {
             dense_labeling_result_t result = index.rename(from, to);
             forward_error(result);
             return result.completed;
         },
-        py::arg("from"), py::arg("to"));
+        py::arg("from_"), py::arg("to"));
+
+    i.def(
+        "rename_many_to_many",
+        [](dense_index_py_t& index, std::vector<key_t> const& from, std::vector<key_t> const& to) -> std::vector<bool> {
+            if (from.size() != to.size())
+                throw std::invalid_argument("Sizes of `from` and `to` arrays don't match!");
+
+            std::vector<bool> results(from.size(), false);
+            for (std::size_t i = 0; i != from.size(); ++i) {
+                dense_labeling_result_t result = index.rename(from[i], to[i]);
+                results[i] = result.completed;
+                forward_error(result);
+            }
+            return results;
+        },
+        py::arg("from_"), py::arg("to"));
 
     i.def(
-        "remove",
+        "rename_many_to_one",
+        [](dense_index_py_t& index, std::vector<key_t> const& from, key_t to) -> std::vector<bool> {
+            std::vector<bool> results(from.size(), false);
+            for (std::size_t i = 0; i != from.size(); ++i) {
+                dense_labeling_result_t result = index.rename(from[i], to);
+                results[i] = result.completed;
+                forward_error(result);
+            }
+            return results;
+        },
+        py::arg("from_"), py::arg("to"));
+
+    i.def(
+        "remove_one",
         [](dense_index_py_t& index, key_t key, bool compact, std::size_t threads) -> bool {
             dense_labeling_result_t result = index.remove(key);
             forward_error(result);
@@ -930,7 +964,7 @@ PYBIND11_MODULE(compiled, m) {
         py::arg("key"), py::arg("compact"), py::arg("threads"));
 
     i.def(
-        "remove",
+        "remove_many",
         [](dense_index_py_t& index, std::vector<key_t> const& keys, bool compact, std::size_t threads) -> std::size_t {
             dense_labeling_result_t result = index.remove(keys.begin(), keys.end());
             forward_error(result);
@@ -969,7 +1003,7 @@ PYBIND11_MODULE(compiled, m) {
             std::size_t dimensions = index.dimensions();
             metric_t metric =  //
                 metric_uintptr //
-                    ? udf(metric_kind, metric_signature, metric_uintptr, scalar_kind, dimensions)
+                    ? wrap_user_defined_metric(metric_kind, metric_signature, metric_uintptr, scalar_kind, dimensions)
                     : metric_t(dimensions, metric_kind, scalar_kind);
             index.change_metric(std::move(metric));
         },
@@ -982,11 +1016,62 @@ PYBIND11_MODULE(compiled, m) {
         return isa_name(index.metric().isa_kind());
     });
 
-    i.def_property_readonly("keys", &get_all_keys<dense_index_py_t>);
-    i.def("get_keys", &get_keys<dense_index_py_t>, py::arg("offset") = 0,
-          py::arg("limit") = std::numeric_limits<std::size_t>::max());
-    i.def("__contains__", &dense_index_py_t::contains);
-    i.def("__getitem__", &get_member<dense_index_py_t>, py::arg("key"), py::arg("dtype") = scalar_kind_t::f32_k);
+    i.def("contains_one", &dense_index_py_t::contains);
+    i.def("count_one", &dense_index_py_t::count);
+
+    i.def( //
+        "contains_many", [](dense_index_py_t const& index, py::array_t<key_t> const& keys_py) -> py::array_t<bool> {
+            py::array_t<bool> results_py(keys_py.size());
+            auto results_py1d = results_py.template mutable_unchecked<1>();
+            auto keys_py1d = keys_py.template unchecked<1>();
+            for (Py_ssize_t task_idx = 0; task_idx != keys_py.size(); ++task_idx)
+                results_py1d(task_idx) = index.contains(keys_py1d(task_idx));
+            return results_py;
+        });
+
+    i.def( //
+        "count_many", [](dense_index_py_t const& index, py::array_t<key_t> const& keys_py) -> py::array_t<std::size_t> {
+            py::array_t<std::size_t> results_py(keys_py.size());
+            auto results_py1d = results_py.template mutable_unchecked<1>();
+            auto keys_py1d = keys_py.template unchecked<1>();
+            for (Py_ssize_t task_idx = 0; task_idx != keys_py.size(); ++task_idx)
+                results_py1d(task_idx) = index.count(keys_py1d(task_idx));
+            return results_py;
+        });
+
+    i.def("get_many", &get_many<dense_index_py_t>, py::arg("keys"), py::arg("dtype") = scalar_kind_t::f32_k);
+
+    i.def(
+        "get_keys_in_slice",
+        [](dense_index_py_t const& index, std::size_t offset, std::size_t limit) -> py::array_t<key_t> {
+            limit = std::min(index.size(), limit);
+            py::array_t<key_t> result_py(static_cast<Py_ssize_t>(limit));
+            auto result_py1d = result_py.template mutable_unchecked<1>();
+            index.export_keys(&result_py1d(0), offset, limit);
+            return result_py;
+        },
+        py::arg("offset") = 0, py::arg("limit") = std::numeric_limits<std::size_t>::max());
+
+    i.def(
+        "get_keys_at_offsets",
+        [](dense_index_py_t const& index, py::array_t<Py_ssize_t> const& offsets_py) -> py::array_t<key_t> {
+            py::array_t<key_t> result_py(offsets_py.size());
+            auto result_py1d = result_py.template mutable_unchecked<1>();
+            auto offsets_py1d = offsets_py.template unchecked<1>();
+            for (Py_ssize_t task_idx = 0; task_idx != offsets_py.size(); ++task_idx)
+                index.export_keys(&result_py1d(task_idx), offsets_py1d(task_idx), 1);
+            return result_py;
+        },
+        py::arg("offsets"));
+
+    i.def(
+        "get_key_at_offset",
+        [](dense_index_py_t const& index, std::size_t offset) -> key_t {
+            key_t result;
+            index.export_keys(&result, offset, 1);
+            return result;
+        },
+        py::arg("offset"));
 
     i.def("save", &save_index<dense_index_py_t>, py::arg("path"));
     i.def("load", &load_index<dense_index_py_t>, py::arg("path"));
@@ -1011,14 +1096,14 @@ PYBIND11_MODULE(compiled, m) {
     auto is = py::class_<dense_indexes_py_t>(m, "Indexes");
     is.def(py::init());
     is.def("__len__", &dense_indexes_py_t::size);
-    is.def("add", &dense_indexes_py_t::add);
-    is.def("add_paths", &dense_indexes_py_t::add_paths, py::arg("paths"), py::arg("view") = true,
+    is.def("merge", &dense_indexes_py_t::merge);
+    is.def("merge_paths", &dense_indexes_py_t::merge_paths, py::arg("paths"), py::arg("view") = true,
            py::arg("threads") = 0);
-    is.def(                                                  //
-        "search", &search_many_in_index<dense_indexes_py_t>, //
-        py::arg("query"),                                    //
-        py::arg("count") = 10,                               //
-        py::arg("exact") = false,                            //
-        py::arg("threads") = 0                               //
+    is.def(                                                       //
+        "search_many", &search_many_in_index<dense_indexes_py_t>, //
+        py::arg("query"),                                         //
+        py::arg("count") = 10,                                    //
+        py::arg("exact") = false,                                 //
+        py::arg("threads") = 0                                    //
     );
 }
diff --git a/python/scripts/test.py b/python/scripts/test.py
index 4102361b..19519633 100644
--- a/python/scripts/test.py
+++ b/python/scripts/test.py
@@ -122,6 +122,7 @@ def test_minimal_index(
         connectivity=connectivity,
         expansion_add=DEFAULT_EXPANSION_ADD,
         expansion_search=DEFAULT_EXPANSION_SEARCH,
+        multi=False,
     )
     assert index.ndim == ndim
     assert index.connectivity == connectivity
@@ -145,7 +146,7 @@ def test_minimal_index(
     assert 43 not in index, "Presence in the index, false positive"
     assert index[42] is not None, "Vector recovery"
     assert index[43] is None, "Vector recovery, false positive"
-    assert len(index[42]) == ndim
+    assert len(index[42].flatten()) == ndim
     if numpy_type != np.byte:
         assert np.allclose(index[42], vector, atol=0.1)
 
@@ -189,20 +190,20 @@ def test_minimal_index(
 
     index_copy = index.copy()
     assert len(index_copy) == 1
-    assert len(index_copy[42]) == ndim
+    assert len(index_copy[42].flatten()) == ndim
     matches_copy: Matches = index_copy.search(vector, 10)
     assert np.all(matches_copy.keys == matches.keys)
 
     index.load(temporary_usearch_filename)
     assert len(index) == 1
-    assert len(index[42]) == ndim
+    assert len(index[42].flatten()) == ndim
 
     matches_loaded: Matches = index.search(vector, 10)
     assert np.all(matches_loaded.keys == matches.keys)
 
     index = Index.restore(temporary_usearch_filename, view=True)
     assert len(index) == 1
-    assert len(index[42]) == ndim
+    assert len(index[42].flatten()) == ndim
 
     matches_viewed: Matches = index.search(vector, 10)
     assert np.all(matches_viewed.keys == matches.keys)
@@ -246,14 +247,21 @@ def test_index_batch(
     index_type: ScalarKind,
     numpy_type: str,
 ):
-    index = Index(ndim=ndim, metric=metric, dtype=index_type)
+    index = Index(
+        ndim=ndim,
+        metric=metric,
+        dtype=index_type,
+        multi=False,
+    )
 
     keys = np.arange(batch_size)
     vectors = random_vectors(count=batch_size, ndim=ndim, dtype=numpy_type)
 
     index.add(keys, vectors, threads=2)
     assert len(index) == batch_size
-    assert np.allclose(index.get_vectors(keys).astype(numpy_type), vectors, atol=0.1)
+
+    vectors_retrived = np.vstack(index.get(keys))
+    assert np.allclose(vectors_retrived.astype(numpy_type), vectors, atol=0.1)
 
     # Ban duplicates unless explicitly allowed
     with pytest.raises(Exception):
@@ -277,7 +285,7 @@ def test_index_batch(
 
     index.load(temporary_usearch_filename)
     assert len(index) == batch_size
-    assert len(index[0]) == ndim
+    assert len(index[0].flatten()) == ndim
 
     if batch_size > 1:
         matches_loaded: BatchMatches = index.search(vectors, 10, threads=2)
@@ -286,7 +294,7 @@ def test_index_batch(
 
     index = Index.restore(temporary_usearch_filename, view=True)
     assert len(index) == batch_size
-    assert len(index[0]) == ndim
+    assert len(index[0].flatten()) == ndim
 
     if batch_size > 1:
         matches_viewed: BatchMatches = index.search(vectors, 10, threads=2)
@@ -295,7 +303,7 @@ def test_index_batch(
 
     # Test clustering
     if batch_size > 1:
-        clusters: BatchMatches = index.cluster(vectors, 1, threads=2)
+        clusters: BatchMatches = index.cluster(vectors, threads=2)
         assert len(clusters.keys) == batch_size
 
     # Cleanup
@@ -380,7 +388,9 @@ def test_bitwise_index(
     bit_vectors = np.packbits(byte_vectors, axis=1)
 
     index.add(keys, bit_vectors)
-    assert np.all(index.get_vectors(keys, ScalarKind.B1) == bit_vectors)
+
+    byte_vectors_retrieved = np.vstack(index.get(keys, ScalarKind.B1))
+    assert np.all(byte_vectors_retrieved == bit_vectors)
 
     index.search(bit_vectors, 10)
 
diff --git a/python/usearch/eval.py b/python/usearch/eval.py
index c4c884bd..412a7f90 100644
--- a/python/usearch/eval.py
+++ b/python/usearch/eval.py
@@ -110,7 +110,7 @@ def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
     if sample != 1:
         keys = np.random.choice(keys, int(ceil(len(keys) * sample)))
 
-    queries = index.get_vectors(keys, index.dtype)
+    queries = index.get(keys, index.dtype)
     matches: BatchMatches = index.search(queries, **kwargs)
     count_matches: float = matches.count_matches(keys)
     return SearchStats(
diff --git a/python/usearch/index.py b/python/usearch/index.py
index 13d799a2..b5541615 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -6,8 +6,17 @@
 # into the primary `Index` class, connecting USearch with Numba.
 import os
 import math
-from typing import Optional, Union, NamedTuple, List, Iterable
 from dataclasses import dataclass
+from typing import (
+    Optional,
+    Union,
+    NamedTuple,
+    List,
+    Iterable,
+    Tuple,
+    Dict,
+    Callable,
+)
 
 import numpy as np
 from tqdm import tqdm
@@ -34,8 +43,25 @@
     MetricKind.Sorensen,
 )
 
+
+class CompiledMetric(NamedTuple):
+    pointer: int
+    kind: MetricKind
+    signature: MetricSignature
+
+
+os.PathLike
+
 Key = np.uint64
 
+KeyOrKeysLike = Union[Key, Iterable[Key], int, Iterable[int], np.ndarray, memoryview]
+
+VectorOrVectorsLike = Union[np.ndarray, Iterable[np.ndarray], memoryview]
+
+DTypeLike = Union[str, ScalarKind]
+
+MetricLike = Union[str, MetricKind, CompiledMetric]
+
 
 def _normalize_dtype(dtype, metric: MetricKind = MetricKind.Cos) -> ScalarKind:
     if dtype is None or dtype == "":
@@ -86,7 +112,7 @@ def _to_numpy_dtype(dtype: ScalarKind):
     return _normalize[dtype]
 
 
-def _normalize_metric(metric):
+def _normalize_metric(metric) -> MetricKind:
     if metric is None:
         return MetricKind.Cos
 
@@ -111,14 +137,12 @@ def _normalize_metric(metric):
 
 
 def _search_in_compiled(
-    *,
-    compiled: Union[_CompiledIndex, _CompiledIndexes],
+    compiled_callable: Callable,
     vectors: np.ndarray,
-    k: int,
-    threads: int,
-    exact: bool,
+    *,
     log: Union[str, bool],
     batch_size: int,
+    **kwargs,
 ) -> Union[Matches, BatchMatches]:
     #
     assert isinstance(vectors, np.ndarray), "Expects a NumPy array"
@@ -150,26 +174,15 @@ def distil_batch(
             disable=log is False,
         )
         for vectors in tasks:
-            tuple_ = compiled.search(
-                vectors,
-                k,
-                exact=exact,
-                threads=threads,
-            )
+            tuple_ = compiled_callable(vectors, **kwargs)
             tasks_matches.append(BatchMatches(*tuple_))
             pbar.update(vectors.shape[0])
 
         pbar.close()
         return distil_batch(
             BatchMatches(
-                # Keys array can be 1 or 2 dimensional
-                keys=np.vstack([m.keys for m in tasks_matches])
-                if tasks_matches[0].keys.ndim == 2
-                else np.concatenate([m.keys for m in tasks_matches], axis=None),
-                # Distances array can be 1 or 2 dimensional
-                distances=np.vstack([m.distances for m in tasks_matches])
-                if tasks_matches[0].distances.ndim == 2
-                else np.concatenate([m.distances for m in tasks_matches], axis=None),
+                keys=np.vstack([m.keys for m in tasks_matches]),
+                distances=np.vstack([m.distances for m in tasks_matches]),
                 counts=np.concatenate([m.counts for m in tasks_matches], axis=None),
                 visited_members=sum([m.visited_members for m in tasks_matches]),
                 computed_distances=sum([m.computed_distances for m in tasks_matches]),
@@ -177,18 +190,13 @@ def distil_batch(
         )
 
     else:
-        tuple_ = compiled.search(
-            vectors,
-            k,
-            exact=exact,
-            threads=threads,
-        )
+        tuple_ = compiled_callable(vectors, **kwargs)
         return distil_batch(BatchMatches(*tuple_))
 
 
 def _add_to_compiled(
-    *,
     compiled,
+    *,
     keys,
     vectors,
     copy: bool,
@@ -239,13 +247,13 @@ def _add_to_compiled(
             disable=log is False,
         )
         for keys, vectors in tasks:
-            compiled.add(keys, vectors, copy=copy, threads=threads)
+            compiled.add_many(keys, vectors, copy=copy, threads=threads)
             pbar.update(len(keys))
 
         pbar.close()
 
     else:
-        compiled.add(keys, vectors, copy=copy, threads=threads)
+        compiled.add_many(keys, vectors, copy=copy, threads=threads)
 
     return keys
 
@@ -323,56 +331,82 @@ def to_list(self) -> List[List[tuple]]:
         list_of_matches = [self.__getitem__(row) for row in range(self.__len__())]
         return [match.to_list() for matches in list_of_matches for match in matches]
 
-    def mean_recall(self, expected: np.ndarray, k: Optional[int] = None) -> float:
+    def mean_recall(self, expected: np.ndarray, count: Optional[int] = None) -> float:
         """Measures recall [0, 1] as of `Matches` that contain the corresponding
         `expected` entry anywhere among results."""
-        return self.count_matches(expected, k=k) / len(expected)
+        return self.count_matches(expected, count=count) / len(expected)
 
-    def count_matches(self, expected: np.ndarray, k: Optional[int] = None) -> int:
+    def count_matches(self, expected: np.ndarray, count: Optional[int] = None) -> int:
         """Measures recall [0, len(expected)] as of `Matches` that contain the corresponding
         `expected` entry anywhere among results.
         """
         assert len(expected) == len(self)
         recall = 0
-        if k is None:
-            k = self.keys.shape[1]
+        if count is None:
+            count = self.keys.shape[1]
 
-        if k == 1:
+        if count == 1:
             recall = np.sum(self.keys[:, 0] == expected)
         else:
             for i in range(len(self)):
-                recall += expected[i] in self.keys[i, :k]
+                recall += expected[i] in self.keys[i, :count]
         return recall
 
     def __repr__(self) -> str:
         return f"usearch.BatchMatches({np.sum(self.counts)} across {len(self)} queries)"
 
 
-class CompiledMetric(NamedTuple):
-    pointer: int
-    kind: MetricKind
-    signature: MetricSignature
+class IndexedKeys:
+    """Smart-reference for the range of keys present in a specific `Index`"""
+
+    def __init__(self, index: Index) -> None:
+        self.index = index
+
+    def __len__(self) -> int:
+        return len(self.index)
+
+    def __getitem__(
+        self,
+        offset_offsets_or_slice: Union[int, np.ndarray, slice],
+    ) -> Union[Key, np.ndarray]:
+        if isinstance(offset_offsets_or_slice, slice):
+            start, stop, step = offset_offsets_or_slice.indices(len(self))
+            if step:
+                raise
+            return self.index._compiled.get_keys_in_slice(start, stop - start)
+
+        elif isinstance(offset_offsets_or_slice, Iterable):
+            offsets = np.array(offset_offsets_or_slice)
+            return self.index._compiled.get_keys_at_offsets(offsets)
+
+        else:
+            offset = int(offset_offsets_or_slice)
+            return self.index._compiled.get_key_at_offset(offset)
+
+    def __array__(self, dtype=None) -> np.ndarray:
+        return self.index._compiled.get_keys_in_slice().astype(dtype)
 
 
 class Index:
-    """Fast JIT-compiled vector-search index for dense equi-dimensional embeddings.
+    """Fast vector-search engine for dense equi-dimensional embeddings.
 
     Vector keys must be integers.
     Vectors must have the same number of dimensions within the index.
-    Supports Inner Product, Cosine Distance, Ln measures
-    like the Euclidean metric, as well as automatic downcasting
-    and quantization.
+    Supports Inner Product, Cosine Distance, L^n measures like the Euclidean metric,
+    as well as automatic downcasting to low-precision floating-point and integral
+    representations.
     """
 
     def __init__(
         self,
         *,
         ndim: int = 0,
-        metric: Union[str, MetricKind, CompiledMetric] = MetricKind.Cos,
-        dtype: Optional[Union[str, ScalarKind]] = None,
+        metric: MetricLike = MetricKind.Cos,
+        dtype: Optional[DTypeLike] = None,
         connectivity: Optional[int] = None,
         expansion_add: Optional[int] = None,
         expansion_search: Optional[int] = None,
+        multi: bool = True,
         path: Optional[os.PathLike] = None,
         view: bool = False,
     ) -> None:
@@ -385,41 +419,43 @@ def __init__(
             coordinates. Angular (Cos) and Euclidean (L2sq), obviously, apply to
             vectors with arbitrary number of dimensions.
 
-        :param metric: Distance function, defaults to MetricKind.Cos
-        :type metric: Union[MetricKind, Callable, str], optional
+        :param metric: Distance function
+        :type metric: MetricLike, defaults to MetricKind.Cos
             Kind of the distance function, or the Numba `cfunc` JIT-compiled object.
             Possible `MetricKind` values: IP, Cos, L2sq, Haversine, Pearson,
             Hamming, Tanimoto, Sorensen.
 
-        :param dtype: Scalar type for internal vector storage, defaults to None
-        :type dtype: Optional[Union[str, ScalarKind]], optional
+        :param dtype: Scalar type for internal vector storage
+        :type dtype: Optional[DTypeLike], defaults to None
             For continuous metrics can be: f16, f32, f64, or i8.
             For bitwise metrics it's implementation-defined, and can't change.
             Example: you can use the `f16` index with `f32` vectors in Euclidean space,
             which will be automatically downcasted.
 
-        :param connectivity: Connections per node in HNSW, defaults to None
-        :type connectivity: Optional[int], optional
+        :param connectivity: Connections per node in HNSW
+        :type connectivity: Optional[int], defaults to None
             Hyper-parameter for the number of Graph connections
             per layer of HNSW. The original paper calls it "M".
             Optional, but can't be changed after construction.
 
-        :param expansion_add: Traversal depth on insertions, defaults to None
-        :type expansion_add: Optional[int], optional
+        :param expansion_add: Traversal depth on insertions
+        :type expansion_add: Optional[int], defaults to None
             Hyper-parameter for the search depth when inserting new
             vectors. The original paper calls it "efConstruction".
             Can be changed afterwards, as the `.expansion_add`.
 
-        :param expansion_search: Traversal depth on queries, defaults to None
-        :type expansion_search: Optional[int], optional
+        :param expansion_search: Traversal depth on queries
+        :type expansion_search: Optional[int], defaults to None
             Hyper-parameter for the search depth when querying
             nearest neighbors. The original paper calls it "ef".
             Can be changed afterwards, as the `.expansion_search`.
 
-        :param path: Where to store the index, defaults to None
-        :type path: Optional[os.PathLike], optional
-        :param view: Are we simply viewing an immutable index, defaults to False
-        :type view: bool, optional
+        :param multi: Allow multiple vectors with the same key
+        :type multi: bool, defaults to True
+        :param path: Where to store the index
+        :type path: Optional[os.PathLike], defaults to None
+        :param view: Are we simply viewing an immutable index
+        :type view: bool, defaults to False
         """
 
         if connectivity is None:
@@ -457,6 +493,7 @@ def __init__(
             connectivity=connectivity,
             expansion_add=expansion_add,
             expansion_search=expansion_search,
+            multi=multi,
             metric_kind=self._metric_kind,
             metric_pointer=self._metric_pointer,
             metric_signature=self._metric_signature,
@@ -494,10 +531,13 @@ def restore(path: os.PathLike, view: bool = False) -> Optional[Index]:
             view=view,
         )
 
+    def __len__(self) -> int:
+        return self._compiled.__len__()
+
     def add(
         self,
-        keys,
-        vectors,
+        keys: KeyOrKeysLike,
+        vectors: VectorOrVectorsLike,
         *,
         copy: bool = True,
         threads: int = 0,
@@ -518,23 +558,23 @@ def add(
         pass `copy=False`, if you can guarantee the lifetime of the
         primary vectors store during the process of construction.
 
-        :param keys: Unique identifier(s) for passed vectors, optional
-        :type keys: np.ndarray
+        :param keys: Unique identifier(s) for passed vectors
+        :type keys: Optional[KeyOrKeysLike], can be `None`
         :param vectors: Vector or a row-major matrix
-        :type vectors: np.ndarray
-        :param copy: Should the index store a copy of vectors, defaults to True
-        :type copy: bool, optional
-        :param threads: Optimal number of cores to use, defaults to 0
-        :type threads: int, optional
-        :param log: Whether to print the progress bar, default to False
-        :type log: Union[str, bool], optional
-        :param batch_size: Number of vectors to process at once, defaults to 0
-        :type batch_size: int, optional
+        :type vectors: VectorOrVectorsLike
+        :param copy: Should the index store a copy of vectors
+        :type copy: bool, defaults to True
+        :param threads: Optimal number of cores to use
+        :type threads: int, defaults to 0
+        :param log: Whether to print the progress bar
+        :type log: Union[str, bool], defaults to False
+        :param batch_size: Number of vectors to process at once
+        :type batch_size: int, defaults to 0
         :return: Inserted key or keys
         :type: Union[int, np.ndarray]
         """
         return _add_to_compiled(
-            compiled=self._compiled,
+            self._compiled,
             keys=keys,
             vectors=vectors,
             copy=copy,
@@ -545,8 +585,9 @@ def add(
 
     def search(
         self,
-        vectors,
-        k: int = 10,
+        vectors: VectorOrVectorsLike,
+        count: int = 10,
+        radius: float = math.inf,
         *,
         threads: int = 0,
         exact: bool = False,
@@ -557,81 +598,110 @@ def search(
         Performs approximate nearest neighbors search for one or more queries.
 
         :param vectors: Query vector or vectors.
-        :type vectors: np.ndarray
-        :param k: Upper limit on the number of matches to find, defaults to 10
-        :type k: int, optional
-        :param threads: Optimal number of cores to use, defaults to 0
-        :type threads: int, optional
-        :param exact: Perform exhaustive linear-time exact search, defaults to False
-        :type exact: bool, optional
+        :type vectors: VectorOrVectorsLike
+        :param count: Upper count on the number of matches to find
+        :type count: int, defaults to 10
+        :param threads: Optimal number of cores to use
+        :type threads: int, defaults to 0
+        :param exact: Perform exhaustive linear-time exact search
+        :type exact: bool, defaults to False
         :param log: Whether to print the progress bar, default to False
         :type log: Union[str, bool], optional
-        :param batch_size: Number of vectors to process at once, defaults to 0
-        :type batch_size: int, optional
+        :param batch_size: Number of vectors to process at once
+        :type batch_size: int, defaults to 0
         :return: Matches for one or more queries
         :rtype: Union[Matches, BatchMatches]
         """
 
         return _search_in_compiled(
-            compiled=self._compiled,
-            vectors=vectors,
-            k=k,
-            exact=exact,
-            threads=threads,
+            self._compiled.search_many,
+            vectors,
+            # Batch scheduling:
             log=log,
             batch_size=batch_size,
+            # Search constraints:
+            count=count,
+            exact=exact,
+            threads=threads,
         )
 
-    def cluster(
-        self,
-        vectors,
-        level: int = 1,
-        *,
-        threads: int = 0,
-        log: Union[str, bool] = False,
-        batch_size: int = 0,
-    ) -> Union[Matches, BatchMatches]:
-        """
-        Performs approximate nearest neighbors search for one or more queries.
+    def contains(self, keys: KeyOrKeysLike) -> Union[bool, np.ndarray]:
+        if isinstance(keys, Iterable):
+            return self._compiled.contains_many(np.array(keys, dtype=Key))
+        else:
+            return self._compiled.contains_one(int(keys))
 
-        :param vectors: Query vector or vectors.
-        :type vectors: np.ndarray
-        :param level: Graph level to target - higher means coarse, defaults to 1
-        :type level: int, optional
-        :param threads: Optimal number of cores to use, defaults to 0
-        :type threads: int, optional
-        :param log: Whether to print the progress bar, default to False
-        :type log: Union[str, bool], optional
-        :param batch_size: Number of vectors to process at once, defaults to 0
-        :type batch_size: int, optional
-        :return: Matches for one or more queries
-        :rtype: Union[Matches, BatchMatches]
+    def __contains__(self, keys: KeyOrKeysLike) -> Union[bool, np.ndarray]:
+        return self.contains(keys)
+
+    def count(self, keys: KeyOrKeysLike) -> Union[int, np.ndarray]:
+        if isinstance(keys, Iterable):
+            return self._compiled.count_many(np.array(keys, dtype=Key))
+        else:
+            return self._compiled.count_one(int(keys))
+
+    def get(
+        self,
+        keys: KeyOrKeysLike,
+        dtype: Optional[DTypeLike] = None,
+    ) -> Union[Optional[np.ndarray], Tuple[Optional[np.ndarray]]]:
+        """Looks up one or more keys from the `Index`, retrieving corresponding vectors.
+
+        Returns `None`, if one key is requested, and its not present.
+        Returns a (row) vector, if the key maps into a single vector.
+        Returns a (row-major) matrix, if the key maps into a multiple vectors.
+        If multiple keys are requested, composes many such responses into a `tuple`.
+
+        :param keys: One or more keys to lookup
+        :type keys: KeyOrKeysLike
+        :return: One or more keys lookup results
+        :rtype: Union[Optional[np.ndarray], Tuple[Optional[np.ndarray]]]
         """
+        if not dtype:
+            dtype = self.dtype
 
-        compiled_clustering = self._compiled.cluster
+        get_dtype = _to_numpy_compatible_dtype(dtype)
+        view_dtype = _to_numpy_dtype(dtype)
 
-        class WrappedDataset:
-            def search(self, query, k, **kwargs):
-                kwargs.pop("exact")
-                return compiled_clustering(query, k, **kwargs)
+        def cast(result):
+            if result is not None:
+                return result.view(view_dtype)
+            return result
 
-        return _search_in_compiled(
-            compiled=WrappedDataset(),
-            vectors=vectors,
-            k=level,
-            exact=False,
-            threads=threads,
-            log=log,
-            batch_size=batch_size,
-        )
+        is_one = not isinstance(keys, Iterable)
+        if is_one:
+            keys = [keys]
+        if not isinstance(keys, np.ndarray):
+            keys = np.array(keys, dtype=Key)
+
+        results = self._compiled.get_many(keys, get_dtype)
+        results = [cast(result) for result in results]
+        return results[0] if is_one else results
+
+    def __getitem__(
+        self, keys: KeyOrKeysLike
+    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+        """Looks up one or more keys from the `Index`, retrieving corresponding vectors.
+
+        Returns `None`, if one key is requested, and its not present.
+        Returns a (row) vector, if the key maps into a single vector.
+        Returns a (row-major) matrix, if the key maps into a multiple vectors.
+        If multiple keys are requested, composes many such responses into a `tuple`.
+
+        :param keys: One or more keys to lookup
+        :type keys: KeyOrKeysLike
+        :return: One or more keys lookup results
+        :rtype: Union[Optional[np.ndarray], Tuple[Optional[np.ndarray]]]
+        """
+        return self.get(keys)
 
     def remove(
         self,
-        keys: Union[int, Iterable[int]],
+        keys: KeyOrKeysLike,
         *,
         compact: bool = False,
         threads: int = 0,
-    ) -> Union[bool, int]:
+    ) -> Union[int, np.ndarray]:
         """Removes one or move vectors from the index.
 
         When working with extremely large indexes, you may want to
@@ -639,59 +709,70 @@ def remove(
         In other cases, rebuilding - is the recommended approach.
 
         :param keys: Unique identifier for passed vectors, optional
-        :type keys: np.ndarray
+        :type keys: KeyOrKeysLike
         :param compact: Removes links to removed nodes (expensive), defaults to False
         :type compact: bool, optional
         :param threads: Optimal number of cores to use, defaults to 0
         :type threads: int, optional
-        :return: Number of removed entries
-        :type: Union[bool, int]
+        :return: Array of integers for the number of removed vectors per key
+        :type: Union[int, np.ndarray]
         """
-        return self._compiled.remove(keys, compact=compact, threads=threads)
-
-    def rename(self, label_from: int, label_to: int) -> bool:
-        """Relabel existing entry"""
-        return self._compiled.rename(label_from, label_to)
-
-    @property
-    def specs(self) -> dict:
-        return {
-            "Class": "usearch.Index",
-            "Connectivity": self.connectivity,
-            "Size": self.size,
-            "Dimensions": self.ndim,
-            "Expansion@Add": self.expansion_add,
-            "Expansion@Search": self.expansion_search,
-            "OpenMP": USES_OPENMP,
-            "SimSIMD": USES_SIMSIMD,
-            "NativeF16": USES_NATIVE_F16,
-            "JIT": self.jit,
-            "DType": self.dtype,
-            "Path": self.path,
-        }
+        if not isinstance(keys, Iterable):
+            return self._compiled.remove_one(keys, compact=compact, threads=threads)
+        else:
+            keys = np.array(keys, dtype=Key)
+            return self._compiled.remove_many(keys, compact=compact, threads=threads)
 
-    def __len__(self) -> int:
-        return self._compiled.__len__()
+    def __delitem__(self, keys: KeyOrKeysLike) -> Union[int, np.ndarray]:
+        raise self.remove(keys)
 
-    def __delitem__(self, key: int) -> bool:
-        raise self.remove(key)
+    def rename(
+        self,
+        from_: KeyOrKeysLike,
+        to: KeyOrKeysLike,
+    ) -> Union[int, np.ndarray]:
+        """Rename existing member vector or vectors.
+
+        May be used in iterative clustering procedures, where one would iteratively
+        relabel every vector with the name of the cluster an entry belongs to, until
+        the system converges.
+
+        :param from_: One or more keys to be renamed
+        :type from_: KeyOrKeysLike
+        :param to: New name or names (of identical length as `from_`)
+        :type to: KeyOrKeysLike
+        :return: Number of vectors that were found and renamed
+        :rtype: int
+        """
+        if isinstance(from_, Iterable):
+            from_ = np.array(from_, dtype=Key)
+            if isinstance(to, Iterable):
+                to = np.array(to, dtype=Key)
+                return self._compiled.rename_many_to_many(from_, to)
 
-    def __contains__(self, key: int) -> bool:
-        return self._compiled.__contains__(key)
+            else:
+                return self._compiled.rename_many_to_one(from_, int(to))
 
-    def __getitem__(self, key: int) -> np.ndarray:
-        dtype = self.dtype
-        get_dtype = _to_numpy_compatible_dtype(dtype)
-        vector = self._compiled.__getitem__(key, get_dtype)
-        view_dtype = _to_numpy_dtype(dtype)
-        return None if vector is None else vector.view(view_dtype)
+        else:
+            return self._compiled.rename_one_to_one(int(from_), int(to))
 
     @property
     def jit(self) -> bool:
+        """
+        :return: True, if the provided `metric` was JIT-ed
+        :rtype: bool
+        """
         return self._metric_jit is not None
 
     @property
     def hardware_acceleration(self) -> str:
+        """Describes the kind of hardware-acceleration support used in
+        that exact instance of the `Index`, for that metric kind, and
+        the given number of dimensions.
+
+        :return: "auto", if nothing is available, ISA subset name otherwise
+        :rtype: str
+        """
         return self._compiled.hardware_acceleration
 
     @property
@@ -707,7 +788,7 @@ def metric(self) -> Union[MetricKind, CompiledMetric]:
         return self._metric_jit if self._metric_jit else self._metric_kind
 
     @metric.setter
-    def metric(self, metric: Union[str, MetricKind, CompiledMetric]):
+    def metric(self, metric: MetricLike):
         metric = _normalize_metric(metric)
         if isinstance(metric, MetricKind):
             metric_kind = metric
@@ -786,6 +867,9 @@ def reset(self):
         """Erases all members from index, closing files, and returning RAM to OS."""
         self._compiled.reset()
 
+    def __del__(self):
+        self.reset()
+
     def copy(self) -> Index:
         result = Index(
             ndim=self.ndim,
@@ -799,7 +883,12 @@ def copy(self) -> Index:
         result._compiled = self._compiled.copy()
         return result
 
-    def join(self, other: Index, max_proposals: int = 0, exact: bool = False) -> dict:
+    def join(
+        self,
+        other: Index,
+        max_proposals: int = 0,
+        exact: bool = False,
+    ) -> Dict[Key, Key]:
         """Performs "Semantic Join" or pairwise matching between `self` & `other` index.
         Is different from `search`, as no collisions are allowed in resulting pairs.
         Uses the concept of "Stable Marriages" from Combinatorics, famous for the 2012
@@ -812,7 +901,7 @@ def join(self, other: Index, max_proposals: int = 0, exact: bool = False) -> dic
         :param exact: Controls if underlying `search` should be exact, defaults to False
         :type exact: bool, optional
         :return: Mapping from keys of `self` to keys of `other`
-        :rtype: dict
+        :rtype: Dict[Key, Key]
         """
         return self._compiled.join(
             other=other._compiled,
@@ -820,39 +909,56 @@ def join(self, other: Index, max_proposals: int = 0, exact: bool = False) -> dic
             exact=exact,
         )
 
-    def get_keys(self, offset: int = 0, limit: int = 0) -> np.ndarray:
-        if limit == 0:
-            limit = 2**63 - 1
-        return self._compiled.get_keys(offset, limit)
-
-    @property
-    def keys(self) -> np.ndarray:
-        """Retrieves the keys of all vectors present in `self`
+    def cluster(
+        self,
+        vectors: np.ndarray,
+        *,
+        count: Optional[int] = None,
+        level: Optional[int] = None,
+        threads: int = 0,
+        log: Union[str, bool] = False,
+        batch_size: int = 0,
+    ) -> Union[Matches, BatchMatches]:
+        """
+        Performs approximate nearest neighbors search for one or more queries.
 
-        :return: Array of keys
-        :rtype: np.ndarray
+        :param vectors: Query vector or vectors.
+        :type vectors: VectorOrVectorsLike
+        :param count: Number of clusters to produce, can be inferred from `level`
+        :type count: Optional[int], defaults to None
+        :param level: Graph level to target - higher means coarse, can be inferred from `count`
+        :type level: Optional[int], defaults to None
+
+        :param threads: Optimal number of cores to use,
+        :type threads: int, defaults to 0
+        :param log: Whether to print the progress bar
+        :type log: Union[str, bool], defaults to False
+        :param batch_size: Number of vectors to process at once, defaults to 0
+        :type batch_size: int, defaults to 0
+        :return: Matches for one or more queries
+        :rtype: Union[Matches, BatchMatches]
         """
-        return self._compiled.keys
+        if level is None and count is None:
+            level = 1
 
-    def get_vectors(
-        self,
-        keys: np.ndarray,
-        dtype: ScalarKind = ScalarKind.F32,
-    ) -> np.ndarray:
-        """Retrieves vectors associated with given `keys`
+        return _search_in_compiled(
+            self._compiled.cluster_many,
+            vectors,
+            # Batch scheduling:
+            log=log,
+            batch_size=batch_size,
+            # Search constraints:
+            level=level,
+            threads=threads,
+        )
 
-        :return: Matrix of vectors (row-major)
-        :rtype: np.ndarray
-        """
-        dtype = _normalize_dtype(dtype, self._metric_kind)
-        get_dtype = _to_numpy_compatible_dtype(dtype)
-        vectors = np.vstack([self._compiled.__getitem__(l, get_dtype) for l in keys])
-        view_dtype = _to_numpy_dtype(dtype)
-        return vectors.view(view_dtype)
+    @property
+    def keys(self) -> IndexedKeys:
+        return IndexedKeys(self)
 
     @property
     def vectors(self) -> np.ndarray:
-        return self.get_vectors(self.keys, self.dtype)
+        return self.get(self.keys, vstack=True)
 
     @property
     def max_level(self) -> int:
@@ -887,8 +993,22 @@ def level_stats(self, level: int) -> _CompiledIndexStats:
         """
         return self._compiled.level_stats(level)
 
-    def __del__(self):
-        self.reset()
+    @property
+    def specs(self) -> Dict[str, Union[str, int, bool]]:
+        return {
+            "Class": "usearch.Index",
+            "Connectivity": self.connectivity,
+            "Size": self.size,
+            "Dimensions": self.ndim,
+            "Expansion@Add": self.expansion_add,
+            "Expansion@Search": self.expansion_search,
+            "OpenMP": USES_OPENMP,
+            "SimSIMD": USES_SIMSIMD,
+            "NativeF16": USES_NATIVE_F16,
+            "JIT": self.jit,
+            "DType": self.dtype,
+            "Path": self.path,
+        }
 
     def __repr__(self) -> str:
         f = "usearch.Index({} x {}, {}, expansion: {} & {}, {} vectors in {} levels)"
@@ -941,11 +1061,14 @@ def __init__(
     ) -> None:
         self._compiled = _CompiledIndexes()
         for index in indexes:
-            self._compiled.add(index._compiled)
-        self._compiled.add_paths(paths, view=view, threads=threads)
+            self._compiled.merge(index._compiled)
+        self._compiled.merge_paths(paths, view=view, threads=threads)
+
+    def merge(self, index: Index):
+        self._compiled.merge(index._compiled)
 
-    def add(self, index: Index):
-        self._compiled.add(index._compiled)
+    def merge_path(self, path: os.PathLike):
+        self._compiled.merge_path(os.fspath(path))
 
     def __len__(self) -> int:
         return self._compiled.__len__()
@@ -953,27 +1076,29 @@ def __len__(self) -> int:
     def search(
         self,
         vectors,
-        k: int = 10,
+        count: int = 10,
         *,
         threads: int = 0,
         exact: bool = False,
     ):
         return _search_in_compiled(
-            compiled=self._compiled,
-            vectors=vectors,
-            k=k,
-            exact=exact,
-            threads=threads,
+            self._compiled.search_many,
+            vectors,
+            # Batch scheduling:
             log=False,
             batch_size=None,
+            # Search constraints:
+            count=count,
+            exact=exact,
+            threads=threads,
         )
 
 
 def search(
     dataset: np.ndarray,
     query: np.ndarray,
-    k: int = 10,
-    metric: Union[str, MetricKind, CompiledMetric] = MetricKind.Cos,
+    count: int = 10,
+    metric: MetricLike = MetricKind.Cos,
     *,
     exact: bool = False,
     threads: int = 0,
@@ -988,11 +1113,11 @@ def search(
     :param query: Query vector or vectors (also row-major), to find in `dataset`.
     :type query: np.ndarray
 
-    :param k: Upper limit on the number of matches to find, defaults to 10
-    :type k: int, optional
+    :param count: Upper count on the number of matches to find, defaults to 10
+    :type count: int, optional
 
-    :param metric: Distance function, defaults to MetricKind.Cos
-    :type metric: Union[MetricKind, Callable, str], optional
+    :param metric: Distance function
+    :type metric: MetricLike, defaults to MetricKind.Cos
         Kind of the distance function, or the Numba `cfunc` JIT-compiled object.
         Possible `MetricKind` values: IP, Cos, L2sq, Haversine, Pearson,
         Hamming, Tanimoto, Sorensen.
@@ -1026,7 +1151,7 @@ def search(
         )
         return index.search(
             query,
-            k,
+            count,
             threads=threads,
             log=log,
             batch_size=batch_size,
@@ -1044,28 +1169,27 @@ def search(
     else:
         raise ValueError("The `metric` must be a `CompiledMetric` or a `MetricKind`")
 
-    class WrappedDataset:
-        def search(self, query, k, **kwargs):
-            kwargs.pop("exact")
-            kwargs.update(
-                dict(
-                    metric_kind=metric_kind,
-                    metric_pointer=metric_pointer,
-                    metric_signature=metric_signature,
-                )
-            )
-            assert dataset.shape[1] == query.shape[1], "Number of dimensions differs"
-            if dataset.dtype != query.dtype:
-                query = query.astype(dataset.dtype)
+    def search_batch(query, **kwargs):
+        assert dataset.shape[1] == query.shape[1], "Number of dimensions differs"
+        if dataset.dtype != query.dtype:
+            query = query.astype(dataset.dtype)
 
-            return _exact_search(dataset, query, k, **kwargs)
+        return _exact_search(
+            dataset,
+            query,
+            metric_kind=metric_kind,
+            metric_pointer=metric_pointer,
+            metric_signature=metric_signature,
+            **kwargs,
+        )
 
     return _search_in_compiled(
-        compiled=WrappedDataset(),
-        vectors=query,
-        k=k,
-        threads=threads,
-        exact=True,
+        search_batch,
+        query,
+        # Batch scheduling:
         log=log,
         batch_size=batch_size,
+        # Search constraints:
+        count=count,
+        threads=threads,
     )

From ae82b6519f6c2f4097db4d6c23d26ff8832b2f50 Mon Sep 17 00:00:00 2001
From: Arman Ghazaryan <91345263+Arman-Ghazaryan@users.noreply.github.com>
Date: Mon, 14 Aug 2023 11:18:57 +0400
Subject: [PATCH 20/70] Add: impl. of rename and remove funcs

---
 java/cloud/unum/usearch/Index.java               | 12 ++++++++++++
 .../unum/usearch/cloud_unum_usearch_Index.cpp    |  9 +++++++++
 .../unum/usearch/cloud_unum_usearch_Index.h      | 16 ++++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/java/cloud/unum/usearch/Index.java b/java/cloud/unum/usearch/Index.java
index 3b234720..ff76c306 100644
--- a/java/cloud/unum/usearch/Index.java
+++ b/java/cloud/unum/usearch/Index.java
@@ -69,6 +69,14 @@ public void view(String path) {
     c_view(c_ptr, path);
   }
 
+  public boolean remove(int key) {
+    return c_remove(c_ptr, key);
+  }
+  
+  public boolean rename(int from, int to) {
+    return c_rename(c_ptr, from, to);
+  }
+
   public static class Config {
     private String _metric = "ip";
     private String _quantization = "f32";
@@ -163,4 +171,8 @@ private static native long c_create(//
   private static native void c_load(long ptr, String path);
 
   private static native void c_view(long ptr, String path);
+
+  private static native boolean c_remove(long ptr, int key);
+
+  private static native boolean c_rename(long ptr, int from, int to);
 }
\ No newline at end of file
diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
index 3f8d4d3b..12662beb 100644
--- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
+++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
@@ -164,3 +164,12 @@ JNIEXPORT jintArray JNICALL Java_cloud_unum_usearch_Index_c_1search( //
     std::free(matches_data);
     return matches;
 }
+
+JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1remove(JNIEnv*, jclass, jlong c_ptr, jlong key) {
+    return reinterpret_cast<index_dense_t*>(c_ptr)->remove(static_cast<index_dense_t::key_t>(key)).completed;
+}
+
+JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1rename(JNIEnv*, jclass, jlong c_ptr, jlong from, jlong to) {
+    using key_t = typename index_dense_t::key_t;
+    return reinterpret_cast<index_dense_t*>(c_ptr)->rename(static_cast<key_t>(from),static_cast<key_t>(to)).completed;
+}
\ No newline at end of file
diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.h b/java/cloud/unum/usearch/cloud_unum_usearch_Index.h
index 9b8b6482..a79a87f2 100644
--- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.h
+++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.h
@@ -103,6 +103,22 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1load
 JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1view
   (JNIEnv *, jclass, jlong, jstring);
 
+/*
+ * Class:     cloud_unum_usearch_Index
+ * Method:    c_remove
+ * Signature: (JLjava/lang/String;)V
+ */
+JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1remove
+  (JNIEnv *, jclass, jlong);
+
+/*
+ * Class:     cloud_unum_usearch_Index
+ * Method:    c_rename
+ * Signature: (JLjava/lang/String;)V
+ */
+JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1rename
+  (JNIEnv *, jclass, jlong, jlong);
+  
 #ifdef __cplusplus
 }
 #endif

From f2f6b46b34fd78ef112e92e90fa82985a6e30e57 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 14 Aug 2023 12:28:48 +0400
Subject: [PATCH 21/70] Refactor: Split Index tests for key collisions

---
 cpp/test.cpp                    |  22 +-
 docs/compilation.md             |   2 +-
 include/usearch/index_dense.hpp |   3 +-
 python/scripts/test.py          | 399 --------------------------------
 python/scripts/test_index.py    | 211 +++++++++++++++++
 python/scripts/test_sparse.py   |  42 ++--
 python/scripts/test_tooling.py  | 111 +++++++++
 python/usearch/index.py         |   7 +-
 8 files changed, 364 insertions(+), 433 deletions(-)
 delete mode 100644 python/scripts/test.py
 create mode 100644 python/scripts/test_index.py
 create mode 100644 python/scripts/test_tooling.py

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 4fff6272..e7e67e55 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -41,8 +41,11 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
 
     if constexpr (punned_ak) {
         auto result = index.add(key_first, vector_first, args...);
-        expect(!result);
+        expect(!!result == index.multi());
         result.error.release();
+
+        std::size_t first_key_count = index.count(key_first);
+        expect(first_key_count == (1ul + index.multi()));
     }
 
     // Default approximate search
@@ -187,13 +190,16 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     }
 
     // Type-punned:
-    for (std::size_t connectivity : {3, 13, 50}) {
-        std::printf("- punned with connectivity %zu \n", connectivity);
-        using index_t = index_dense_gt<key_t, slot_t>;
-        metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
-        index_config_t config(connectivity);
-        index_t index = index_t::make(metric, config);
-        test_cosine<true>(index, matrix);
+    for (bool ban_collisions : {false, true}) {
+        for (std::size_t connectivity : {3, 13, 50}) {
+            std::printf("- punned with connectivity %zu \n", connectivity);
+            using index_t = index_dense_gt<key_t, slot_t>;
+            metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
+            index_dense_config_t config(connectivity);
+            config.ban_collisions = ban_collisions;
+            index_t index = index_t::make(metric, config);
+            test_cosine<true>(index, matrix);
+        }
     }
 }
 
diff --git a/docs/compilation.md b/docs/compilation.md
index cdf948a9..4765d527 100644
--- a/docs/compilation.md
+++ b/docs/compilation.md
@@ -63,7 +63,7 @@ The `-s` option will disable capturing the logs.
 The `-x` option will exit after first failure to simplify debugging.
 
 ```sh
-pip install -e . && pytest python/scripts/test.py -s -x
+pip install -e . && pytest python/scripts/test_index.py -s -x
 ```
 
 Linting:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 89babbc8..fd955a49 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -467,6 +467,7 @@ class index_dense_gt {
     std::size_t max_level() const noexcept { return typed_->max_level(); }
     index_dense_config_t const& config() const { return config_; }
     index_limits_t const& limits() const { return typed_->limits(); }
+    bool multi() const { return !config_.ban_collisions; }
 
     // The metric and its properties
     metric_t const& metric() const { return metric_; }
@@ -1222,7 +1223,7 @@ class index_dense_gt {
         bool reuse_node = free_slot != default_free_value<compressed_slot_t>();
         auto on_success = [&](member_ref_t member) {
             unique_lock_t slot_lock(slot_lookup_mutex_);
-            slot_lookup_.insert({key, static_cast<compressed_slot_t>(member.slot)});
+            slot_lookup_.insert(key_and_slot_t{key, static_cast<compressed_slot_t>(member.slot)});
             if (copy_vector) {
                 if (!reuse_node)
                     vectors_lookup_[member.slot] = vectors_tape_allocator_.allocate(metric_.bytes_per_vector());
diff --git a/python/scripts/test.py b/python/scripts/test.py
deleted file mode 100644
index 19519633..00000000
--- a/python/scripts/test.py
+++ /dev/null
@@ -1,399 +0,0 @@
-import os
-
-import pytest
-import numpy as np
-
-from usearch.io import load_matrix, save_matrix
-from usearch.eval import random_vectors
-from usearch.index import search
-
-from usearch.index import (
-    Index,
-    Indexes,
-    MetricKind,
-    ScalarKind,
-    Match,
-    Matches,
-    BatchMatches,
-)
-from usearch.index import (
-    DEFAULT_CONNECTIVITY,
-    DEFAULT_EXPANSION_ADD,
-    DEFAULT_EXPANSION_SEARCH,
-)
-
-# When running the tests concurrently in different VMs, one can get access violation errors.
-# To avoid that, we embed the OS name into the temporary files name.
-# https://github.com/unum-cloud/usearch/actions/runs/5705359698/job/15459945738
-temporary_filename = f"tmp-{os.name}"
-temporary_usearch_filename = temporary_filename + ".usearch"
-
-dimensions = [3, 97, 256]
-batch_sizes = [1, 77]
-index_types = [
-    ScalarKind.F32,
-    ScalarKind.F64,
-    ScalarKind.F16,
-    ScalarKind.I8,
-]
-numpy_types = [np.float32, np.float64, np.float16]
-
-connectivity_options = [3, 13, 50, DEFAULT_CONNECTIVITY]
-continuous_metrics = [
-    MetricKind.Cos,
-    MetricKind.L2sq,
-]
-hash_metrics = [
-    MetricKind.Hamming,
-    MetricKind.Tanimoto,
-    MetricKind.Sorensen,
-]
-
-
-@pytest.mark.parametrize("rows", batch_sizes)
-@pytest.mark.parametrize("cols", dimensions)
-def test_serializing_fbin_matrix(rows: int, cols: int):
-    """
-    Test the serialization of floating point binary matrix.
-
-    :param int rows: The number of rows in the matrix.
-    :param int cols: The number of columns in the matrix.
-    """
-    original = np.random.rand(rows, cols).astype(np.float32)
-    save_matrix(original, temporary_filename + ".fbin")
-    reconstructed = load_matrix(temporary_filename + ".fbin")
-    assert np.allclose(original, reconstructed)
-    os.remove(temporary_filename + ".fbin")
-
-
-@pytest.mark.parametrize("rows", batch_sizes)
-@pytest.mark.parametrize("cols", dimensions)
-def test_serializing_ibin_matrix(rows: int, cols: int):
-    """
-    Test the serialization of integer binary matrix.
-
-    :param int rows: The number of rows in the matrix.
-    :param int cols: The number of columns in the matrix.
-    """
-    original = np.random.randint(0, rows + 1, size=(rows, cols)).astype(np.int32)
-    save_matrix(original, temporary_filename + ".ibin")
-    reconstructed = load_matrix(temporary_filename + ".ibin")
-    assert np.allclose(original, reconstructed)
-    os.remove(temporary_filename + ".ibin")
-
-
-@pytest.mark.parametrize("rows", batch_sizes)
-@pytest.mark.parametrize("cols", dimensions)
-def test_exact_search(rows: int, cols: int):
-    """
-    Test exact search.
-
-    :param int rows: The number of rows in the matrix.
-    :param int cols: The number of columns in the matrix.
-    """
-    original = np.random.rand(rows, cols)
-    matches: BatchMatches = search(original, original, 10, exact=True)
-    top_matches = (
-        [int(m.keys[0]) for m in matches] if rows > 1 else int(matches.keys[0])
-    )
-    assert np.all(top_matches == np.arange(rows))
-
-    matches: Matches = search(original, original[0], 10, exact=True)
-    top_match = int(matches.keys[0])
-    assert top_match == 0
-
-
-@pytest.mark.parametrize("ndim", dimensions)
-@pytest.mark.parametrize("metric", continuous_metrics)
-@pytest.mark.parametrize("index_type", index_types)
-@pytest.mark.parametrize("numpy_type", numpy_types)
-@pytest.mark.parametrize("connectivity", connectivity_options)
-def test_minimal_index(
-    ndim: int,
-    metric: MetricKind,
-    index_type: ScalarKind,
-    numpy_type: str,
-    connectivity: int,
-):
-    index = Index(
-        metric=metric,
-        ndim=ndim,
-        dtype=index_type,
-        connectivity=connectivity,
-        expansion_add=DEFAULT_EXPANSION_ADD,
-        expansion_search=DEFAULT_EXPANSION_SEARCH,
-        multi=False,
-    )
-    assert index.ndim == ndim
-    assert index.connectivity == connectivity
-
-    vector = random_vectors(count=1, ndim=ndim, dtype=numpy_type).flatten()
-    index.add(42, vector)
-
-    # Ban vectors with a wrong number of dimensions
-    with pytest.raises(Exception):
-        index.add(
-            42, random_vectors(count=1, ndim=(ndim * 2), dtype=numpy_type).flatten()
-        )
-
-    # Ban duplicates unless explicitly allowed
-    with pytest.raises(Exception):
-        index.add(42, vector)
-
-    assert len(index) == 1, "Size after addition"
-    assert 42 in index, "Presence in the index"
-    assert 42 in index.keys, "Presence among keys"
-    assert 43 not in index, "Presence in the index, false positive"
-    assert index[42] is not None, "Vector recovery"
-    assert index[43] is None, "Vector recovery, false positive"
-    assert len(index[42].flatten()) == ndim
-    if numpy_type != np.byte:
-        assert np.allclose(index[42], vector, atol=0.1)
-
-    matches: Matches = index.search(vector, 10)
-    assert len(matches.keys) == 1, "Number of matches"
-    assert len(matches.keys) == len(matches.distances), "Symmetric match sub-arrays"
-    assert len({match.key for match in matches}) == 1, "Iteration over matches"
-    assert matches[0].key == 42
-    assert matches[0].distance == pytest.approx(0, abs=1e-3)
-    assert matches.computed_distances != 0
-    assert matches.visited_members != 0
-
-    # Validating the index structure and metadata:
-    assert index.max_level >= 0
-    assert index.levels_stats.nodes >= 1
-    assert index.level_stats(0).nodes == 1
-    assert str(index).startswith("usearch.")
-
-    # Try removals
-    other_vector = random_vectors(count=1, ndim=ndim, dtype=numpy_type).flatten()
-    index.add(43, other_vector)
-    assert len(index) == 2
-    index.remove(43)
-    assert len(index) == 1
-
-    # Try inserting back
-    index.add(43, other_vector)
-    assert len(index) == 2
-    index.remove(43)
-    assert len(index) == 1
-
-    index.save(temporary_usearch_filename)
-
-    # Re-populate cleared index
-    index.clear()
-    assert len(index) == 0
-    index.add(42, vector)
-    assert len(index) == 1
-    matches: Matches = index.search(vector, 10)
-    assert len(matches) == 1
-
-    index_copy = index.copy()
-    assert len(index_copy) == 1
-    assert len(index_copy[42].flatten()) == ndim
-    matches_copy: Matches = index_copy.search(vector, 10)
-    assert np.all(matches_copy.keys == matches.keys)
-
-    index.load(temporary_usearch_filename)
-    assert len(index) == 1
-    assert len(index[42].flatten()) == ndim
-
-    matches_loaded: Matches = index.search(vector, 10)
-    assert np.all(matches_loaded.keys == matches.keys)
-
-    index = Index.restore(temporary_usearch_filename, view=True)
-    assert len(index) == 1
-    assert len(index[42].flatten()) == ndim
-
-    matches_viewed: Matches = index.search(vector, 10)
-    assert np.all(matches_viewed.keys == matches.keys)
-
-    # Cleanup
-    index.reset()
-    os.remove(temporary_usearch_filename)
-
-    # Try opening a missing file
-    meta = Index.metadata(temporary_usearch_filename)
-    assert meta is None
-    index = Index.restore(temporary_usearch_filename)
-    assert index is None
-
-    # Try opening a corrupt file
-    with open(temporary_usearch_filename, "w") as file:
-        file.write("Some random string")
-    meta = Index.metadata(temporary_usearch_filename)
-    assert meta is None
-    index = Index.restore(temporary_usearch_filename)
-
-    # Try saving and opening and empty index
-    index_copy.reset()
-    index_copy.save(temporary_usearch_filename)
-    assert Index.restore(temporary_usearch_filename, view=False) is not None
-    assert Index.restore(temporary_usearch_filename, view=True) is not None
-
-    assert index is None
-    os.remove(temporary_usearch_filename)
-
-
-@pytest.mark.parametrize("ndim", dimensions)
-@pytest.mark.parametrize("metric", continuous_metrics)
-@pytest.mark.parametrize("batch_size", batch_sizes)
-@pytest.mark.parametrize("index_type", index_types)
-@pytest.mark.parametrize("numpy_type", numpy_types)
-def test_index_batch(
-    ndim: int,
-    metric: MetricKind,
-    batch_size: int,
-    index_type: ScalarKind,
-    numpy_type: str,
-):
-    index = Index(
-        ndim=ndim,
-        metric=metric,
-        dtype=index_type,
-        multi=False,
-    )
-
-    keys = np.arange(batch_size)
-    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=numpy_type)
-
-    index.add(keys, vectors, threads=2)
-    assert len(index) == batch_size
-
-    vectors_retrived = np.vstack(index.get(keys))
-    assert np.allclose(vectors_retrived.astype(numpy_type), vectors, atol=0.1)
-
-    # Ban duplicates unless explicitly allowed
-    with pytest.raises(Exception):
-        index.add(keys, vectors, threads=2)
-
-    matches: BatchMatches = index.search(vectors, 10, threads=2)
-    assert matches.keys.shape[0] == matches.distances.shape[0]
-    assert len(matches) == batch_size
-    assert np.all(np.sort(index.keys) == np.sort(keys))
-
-    if batch_size > 1:
-        assert index.max_level >= 1
-    else:
-        assert index.max_level >= 0
-    assert index.levels_stats.nodes >= batch_size
-    assert index.level_stats(0).nodes == batch_size
-
-    index.save(temporary_usearch_filename)
-    index.clear()
-    assert len(index) == 0
-
-    index.load(temporary_usearch_filename)
-    assert len(index) == batch_size
-    assert len(index[0].flatten()) == ndim
-
-    if batch_size > 1:
-        matches_loaded: BatchMatches = index.search(vectors, 10, threads=2)
-        for idx in range(len(matches_loaded)):
-            assert np.all(matches_loaded[idx].keys == matches[idx].keys)
-
-    index = Index.restore(temporary_usearch_filename, view=True)
-    assert len(index) == batch_size
-    assert len(index[0].flatten()) == ndim
-
-    if batch_size > 1:
-        matches_viewed: BatchMatches = index.search(vectors, 10, threads=2)
-        for idx in range(len(matches_viewed)):
-            assert np.all(matches_viewed[idx].keys == matches[idx].keys)
-
-    # Test clustering
-    if batch_size > 1:
-        clusters: BatchMatches = index.cluster(vectors, threads=2)
-        assert len(clusters.keys) == batch_size
-
-    # Cleanup
-    index.reset()
-    os.remove(temporary_usearch_filename)
-
-
-@pytest.mark.parametrize("metric", [MetricKind.L2sq])
-@pytest.mark.parametrize("batch_size", batch_sizes)
-@pytest.mark.parametrize("index_type", index_types)
-@pytest.mark.parametrize("numpy_type", numpy_types)
-def test_exact_recall(
-    metric: MetricKind,
-    batch_size: int,
-    index_type: ScalarKind,
-    numpy_type: str,
-):
-    ndim: int = batch_size
-    index = Index(ndim=ndim, metric=metric, dtype=index_type)
-    assert index.ndim == ndim
-
-    vectors = np.zeros((batch_size, ndim), dtype=numpy_type)
-    for i in range(batch_size):
-        vectors[i, i] = 1
-    keys = np.arange(batch_size)
-    index.add(keys, vectors)
-    assert len(index) == batch_size
-
-    # Search one at a time
-    for i in range(batch_size):
-        matches: Matches = index.search(vectors[i], 10, exact=True)
-        found_labels = matches.keys
-        assert found_labels[0] == i
-        assert matches.computed_distances == len(index)
-        assert matches.visited_members == 0, "Exact search won't traverse the graph"
-
-    # Search the whole batch
-    if batch_size > 1:
-        matches: BatchMatches = index.search(vectors, 10, exact=True)
-        assert matches.computed_distances == len(index) * len(vectors)
-        assert matches.visited_members == 0, "Exact search won't traverse the graph"
-
-        found_labels = matches.keys
-        for i in range(batch_size):
-            assert found_labels[i, 0] == i
-
-    # Match entries against themselves
-    index_copy: Index = index.copy()
-    mapping: dict = index.join(index_copy, exact=True)
-    for man, woman in mapping.items():
-        assert man == woman, "Stable marriage failed"
-
-
-def test_indexes():
-    ndim = 10
-    index_a = Index(ndim=ndim)
-    index_b = Index(ndim=ndim)
-
-    vectors = random_vectors(count=3, ndim=ndim)
-    index_a.add(42, vectors[0])
-    index_b.add(43, vectors[1])
-
-    indexes = Indexes([index_a, index_b])
-    matches = indexes.search(vectors[2], 10)
-    assert len(matches) == 2
-
-
-@pytest.mark.parametrize("bits", dimensions)
-@pytest.mark.parametrize("metric", hash_metrics)
-@pytest.mark.parametrize("connectivity", connectivity_options)
-@pytest.mark.parametrize("batch_size", batch_sizes)
-def test_bitwise_index(
-    bits: int,
-    metric: MetricKind,
-    connectivity: int,
-    batch_size: int,
-):
-    index = Index(ndim=bits, metric=metric, connectivity=connectivity)
-
-    keys = np.arange(batch_size)
-    byte_vectors = np.random.randint(2, size=(batch_size, bits))
-    bit_vectors = np.packbits(byte_vectors, axis=1)
-
-    index.add(keys, bit_vectors)
-
-    byte_vectors_retrieved = np.vstack(index.get(keys, ScalarKind.B1))
-    assert np.all(byte_vectors_retrieved == bit_vectors)
-
-    index.search(bit_vectors, 10)
-
-
-if __name__ == "__main__":
-    pytest.main(args=["python/scripts/test.py", "-s", "-x", "-v"])
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
new file mode 100644
index 00000000..06a4a49b
--- /dev/null
+++ b/python/scripts/test_index.py
@@ -0,0 +1,211 @@
+import os
+
+import pytest
+import numpy as np
+
+from usearch.io import load_matrix, save_matrix
+from usearch.eval import random_vectors
+from usearch.index import search
+
+from usearch.index import (
+    Index,
+    Indexes,
+    MetricKind,
+    ScalarKind,
+    Match,
+    Matches,
+    BatchMatches,
+)
+from usearch.index import (
+    DEFAULT_CONNECTIVITY,
+)
+
+
+ndims = [3, 97, 256]
+batch_sizes = [1, 11, 77]
+quantizations = [
+    ScalarKind.F32,
+    ScalarKind.F64,
+    ScalarKind.F16,
+    ScalarKind.I8,
+]
+dtypes = [np.float32, np.float64, np.float16]
+threads = 2
+
+connectivity_options = [3, 13, 50, DEFAULT_CONNECTIVITY]
+continuous_metrics = [MetricKind.Cos, MetricKind.L2sq]
+hash_metrics = [
+    MetricKind.Hamming,
+    MetricKind.Tanimoto,
+    MetricKind.Sorensen,
+]
+
+
+@pytest.mark.parametrize("ndim", [3, 97, 256])
+@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
+def test_index_initialization_and_addition(
+    ndim, metric, quantization, dtype, batch_size
+):
+    index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
+    index.add(keys, vectors, threads=threads)
+    assert len(index) == batch_size
+
+
+@pytest.mark.parametrize("ndim", [3, 97, 256])
+@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
+def test_index_retrieval(ndim, metric, quantization, dtype, batch_size):
+    index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
+    index.add(keys, vectors, threads=threads)
+    vectors_retrived = np.vstack(index.get(keys))
+    assert np.allclose(vectors_retrived.astype(dtype), vectors, atol=0.1)
+
+
+@pytest.mark.parametrize("ndim", [3, 97, 256])
+@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
+def test_index_search(ndim, metric, quantization, dtype, batch_size):
+    index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
+    index.add(keys, vectors, threads=threads)
+
+    if batch_size == 1:
+        matches: Matches = index.search(vectors, 10, threads=threads)
+        assert matches.keys.ndim == 1
+        assert matches.keys.shape[0] == matches.distances.shape[0]
+        assert len(matches) == batch_size
+        assert np.all(np.sort(index.keys) == np.sort(keys))
+
+    else:
+        matches: BatchMatches = index.search(vectors, 10, threads=threads)
+        assert matches.keys.ndim == 2
+        assert matches.keys.shape[0] == matches.distances.shape[0]
+        assert len(matches) == batch_size
+        assert np.all(np.sort(index.keys) == np.sort(keys))
+
+
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+def test_index_duplicates(batch_size):
+    ndim = 8
+    index = Index(ndim=ndim, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim)
+    index.add(keys, vectors, threads=threads)
+    with pytest.raises(Exception):
+        index.add(keys, vectors, threads=threads)
+
+    index = Index(ndim=ndim, multi=True)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim)
+    index.add(keys, vectors, threads=threads)
+    index.add(keys, vectors, threads=threads)
+    assert len(index) == batch_size * 2
+
+    two_per_key = index.get(keys)
+    print(two_per_key)
+    if batch_size == 1:
+        assert two_per_key.shape == (2, ndim)
+    else:
+        assert np.vstack(two_per_key).shape == (2 * batch_size, ndim)
+
+
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+def test_index_stats(batch_size):
+    ndim = 8
+    index = Index(ndim=ndim, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim)
+    index.add(keys, vectors, threads=threads)
+
+    assert index.max_level >= 0
+    assert index.levels_stats.nodes >= batch_size
+    assert index.level_stats(0).nodes == batch_size
+
+
+@pytest.mark.parametrize("ndim", [1, 3, 8, 32, 256, 4096])
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
+def test_index_save_load_restore_copy(ndim, quantization, batch_size):
+    index = Index(ndim=ndim, dtype=quantization, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim)
+    index.add(keys, vectors, threads=threads)
+
+    index.save("tmp.usearch")
+    index.clear()
+    assert len(index) == 0
+
+    index.load("tmp.usearch")
+    assert len(index) == batch_size
+    assert len(index[0].flatten()) == ndim
+
+    index = Index.restore("tmp.usearch", view=True)
+    assert len(index) == batch_size
+    assert len(index[0].flatten()) == ndim
+
+    copied_index = index.copy()
+    assert len(copied_index) == len(index)
+    assert np.allclose(np.vstack(copied_index.get(keys)), np.vstack(index.get(keys)))
+
+    os.remove("tmp.usearch")
+
+
+@pytest.mark.parametrize("batch_size", [32])
+def test_index_contains_remove_rename(batch_size):
+    if batch_size <= 1:
+        return
+
+    ndim = 8
+    index = Index(ndim=ndim, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim)
+
+    index.add(keys, vectors, threads=threads)
+    assert np.all(index.contains(keys))
+    assert np.all(index.count(keys) == np.ones(batch_size))
+
+    removed_keys = keys[: batch_size // 2]
+    remaining_keys = keys[batch_size // 2 :]
+    index.remove(removed_keys)
+    assert len(index) == (len(keys) - len(removed_keys))
+    assert np.sum(index.contains(keys)) == len(remaining_keys)
+    assert np.sum(index.count(keys)) == len(remaining_keys)
+    assert np.sum(index.count(removed_keys)) == 0
+
+    assert keys[0] not in index
+    assert keys[-1] in index
+
+    renamed_counts = index.rename(removed_keys, removed_keys)
+    assert np.sum(index.count(renamed_counts)) == 0
+
+    renamed_counts = index.rename(remaining_keys, removed_keys)
+    assert np.sum(index.count(removed_keys)) == len(index)
+
+
+@pytest.mark.parametrize("ndim", [3, 97, 256])
+@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
+@pytest.mark.parametrize("batch_size", [10, 1024])
+@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
+def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
+    if batch_size <= 1:
+        return
+
+    index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
+    index.add(keys, vectors, threads=threads)
+    clusters: BatchMatches = index.cluster(vectors, threads=threads)
+    assert len(clusters.keys) == batch_size
diff --git a/python/scripts/test_sparse.py b/python/scripts/test_sparse.py
index bb77a395..8dcc4ccb 100644
--- a/python/scripts/test_sparse.py
+++ b/python/scripts/test_sparse.py
@@ -1,32 +1,32 @@
-import os
-
 import pytest
 import numpy as np
 
 from usearch.index import (
-    SparseIndex,
-)
-from usearch.index import (
-    DEFAULT_CONNECTIVITY,
-    DEFAULT_EXPANSION_ADD,
-    DEFAULT_EXPANSION_SEARCH,
+    Index,
+    MetricKind,
+    ScalarKind,
 )
 
 
-dimensions = [3, 97, 256]
-batch_sizes = [1, 77]
-connectivity_options = [3, 13, 50, DEFAULT_CONNECTIVITY]
+@pytest.mark.parametrize("bits", [3, 97, 256, 4097])
+@pytest.mark.parametrize("metric", [MetricKind.Tanimoto])
+@pytest.mark.parametrize("connectivity", [3, 13, 50])
+@pytest.mark.parametrize("batch_size", [3, 77])
+def test_bitwise_index(
+    bits: int,
+    metric: MetricKind,
+    connectivity: int,
+    batch_size: int,
+):
+    index = Index(ndim=bits, metric=metric, connectivity=connectivity)
 
+    keys = np.arange(batch_size)
+    byte_vectors = np.random.randint(2, size=(batch_size, bits))
+    bit_vectors = np.packbits(byte_vectors, axis=1)
 
-@pytest.mark.parametrize("connectivity", connectivity_options)
-@pytest.mark.skipif(os.name == "nt", reason="Spurious behaviour on windows")
-def test_sets_index(connectivity: int):
-    index = SparseIndex(connectivity=connectivity)
-    index.add(10, np.array([10, 12, 15], dtype=np.uint32))
-    index.add(11, np.array([11, 12, 15, 16], dtype=np.uint32))
-    results = index.search(np.array([12, 15], dtype=np.uint32), 10)
-    assert list(results) == [10, 11]
+    index.add(keys, bit_vectors)
 
+    byte_vectors_retrieved = np.vstack(index.get(keys, ScalarKind.B1))
+    assert np.all(byte_vectors_retrieved == bit_vectors)
 
-if __name__ == "__main__":
-    pytest.main(args=["python/scripts/test.py", "-s", "-x", "-v"])
+    index.search(bit_vectors, 10)
diff --git a/python/scripts/test_tooling.py b/python/scripts/test_tooling.py
new file mode 100644
index 00000000..101898cb
--- /dev/null
+++ b/python/scripts/test_tooling.py
@@ -0,0 +1,111 @@
+import os
+
+import pytest
+import numpy as np
+
+from usearch.io import load_matrix, save_matrix
+from usearch.index import search
+from usearch.eval import random_vectors
+
+from usearch.index import Match, Matches, BatchMatches, Index, Indexes
+
+
+dimensions = [3, 97, 256]
+batch_sizes = [1, 77]
+
+
+@pytest.mark.parametrize("rows", batch_sizes)
+@pytest.mark.parametrize("cols", dimensions)
+def test_serializing_fbin_matrix(rows: int, cols: int):
+    """
+    Test the serialization of floating point binary matrix.
+
+    :param int rows: The number of rows in the matrix.
+    :param int cols: The number of columns in the matrix.
+    """
+    original = np.random.rand(rows, cols).astype(np.float32)
+    save_matrix(original, "tmp.fbin")
+    reconstructed = load_matrix("tmp.fbin")
+    assert np.allclose(original, reconstructed)
+    os.remove("tmp.fbin")
+
+
+@pytest.mark.parametrize("rows", batch_sizes)
+@pytest.mark.parametrize("cols", dimensions)
+def test_serializing_ibin_matrix(rows: int, cols: int):
+    """
+    Test the serialization of integer binary matrix.
+
+    :param int rows: The number of rows in the matrix.
+    :param int cols: The number of columns in the matrix.
+    """
+    original = np.random.randint(0, rows + 1, size=(rows, cols)).astype(np.int32)
+    save_matrix(original, "tmp.ibin")
+    reconstructed = load_matrix("tmp.ibin")
+    assert np.allclose(original, reconstructed)
+    os.remove("tmp.ibin")
+
+
+@pytest.mark.parametrize("rows", batch_sizes)
+@pytest.mark.parametrize("cols", dimensions)
+def test_exact_search(rows: int, cols: int):
+    """
+    Test exact search.
+
+    :param int rows: The number of rows in the matrix.
+    :param int cols: The number of columns in the matrix.
+    """
+    original = np.random.rand(rows, cols)
+    matches: BatchMatches = search(original, original, 10, exact=True)
+    top_matches = (
+        [int(m.keys[0]) for m in matches] if rows > 1 else int(matches.keys[0])
+    )
+    assert np.all(top_matches == np.arange(rows))
+
+    matches: Matches = search(original, original[0], 10, exact=True)
+    top_match = int(matches.keys[0])
+    assert top_match == 0
+
+
+def test_matches_creation_and_methods():
+    matches = Matches(
+        keys=np.array([1, 2]),
+        distances=np.array([0.5, 0.6]),
+        visited_members=2,
+        computed_distances=2,
+    )
+    assert len(matches) == 2
+    assert matches[0] == Match(key=1, distance=0.5)
+    assert matches.to_list() == [(1, 0.5), (2, 0.6)]
+
+
+def test_batch_matches_creation_and_methods():
+    keys = np.array([[1, 2], [3, 4]])
+    distances = np.array([[0.5, 0.6], [0.7, 0.8]])
+    counts = np.array([2, 2])
+    batch_matches = BatchMatches(
+        keys=keys,
+        distances=distances,
+        counts=counts,
+        visited_members=2,
+        computed_distances=2,
+    )
+
+    assert len(batch_matches) == 2
+    assert batch_matches[0].keys.tolist() == [1, 2]
+    assert batch_matches[0].distances.tolist() == [0.5, 0.6]
+    assert batch_matches.to_list() == [(1, 0.5), (2, 0.6), (3, 0.7), (4, 0.8)]
+
+
+def test_multi_index():
+    ndim = 10
+    index_a = Index(ndim=ndim)
+    index_b = Index(ndim=ndim)
+
+    vectors = random_vectors(count=3, ndim=ndim)
+    index_a.add(42, vectors[0])
+    index_b.add(43, vectors[1])
+
+    indexes = Indexes([index_a, index_b])
+    matches = indexes.search(vectors[2], 10)
+    assert len(matches) == 2
diff --git a/python/usearch/index.py b/python/usearch/index.py
index b5541615..e1ce1115 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -50,8 +50,6 @@ class CompiledMetric(NamedTuple):
     signature: MetricSignature
 
 
-os.PathLike
-
 Key = np.uint64
 
 KeyOrKeysLike = Union[Key, Iterable[Key], int, Iterable[int], np.ndarray, memoryview]
@@ -265,6 +263,9 @@ class Match:
     key: int
     distance: float
 
+    def to_tuple(self) -> tuple:
+        return self.key, self.distance
+
 
 @dataclass
 class Matches:
@@ -329,7 +330,7 @@ def __getitem__(self, index: int) -> Matches:
     def to_list(self) -> List[List[tuple]]:
         """Convert the result for each query to the list of tuples with information about its matches."""
         list_of_matches = [self.__getitem__(row) for row in range(self.__len__())]
-        return [match.to_list() for matches in list_of_matches for match in matches]
+        return [match.to_tuple() for matches in list_of_matches for match in matches]
 
     def mean_recall(self, expected: np.ndarray, count: Optional[int] = None) -> float:
         """Measures recall [0, 1] as of `Matches` that contain the corresponding

From 27b214fdfaf18880380c80652492a795b0cdec62 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 14 Aug 2023 12:29:30 +0400
Subject: [PATCH 22/70] Make: Update version in `README.md`

---
 .github/workflows/update_version.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/update_version.sh b/.github/workflows/update_version.sh
index 89471ba9..e19ce873 100755
--- a/.github/workflows/update_version.sh
+++ b/.github/workflows/update_version.sh
@@ -9,5 +9,5 @@ echo $1 > VERSION &&
     sed -i "s/^\(#define USEARCH_VERSION_MINOR \).*/\1$(echo "$1" | cut -d. -f2)/" ./include/usearch/index.hpp &&
     sed -i "s/^\(#define USEARCH_VERSION_PATCH \).*/\1$(echo "$1" | cut -d. -f3)/" ./include/usearch/index.hpp &&
     sed -i "s/<version>[0-9]\+\.[0-9]\+\.[0-9]\+/<version>$1/" README.md &&
-    sed -i "s/version = {0\.[0-9]\+\.[0-9]\+}/version = {$1}/" README.md &&
+    sed -i "s/version = {[0-9]\+\.[0-9]\+\.[0-9]\+}/version = {$1}/" README.md &&
     sed -i "s/version=\".*\"/version=\"$1\"/" wasmer.toml

From 619d9f19d55245ede90c18546bcc1f00d4c78787 Mon Sep 17 00:00:00 2001
From: Arman Ghazaryan <91345263+Arman-Ghazaryan@users.noreply.github.com>
Date: Mon, 14 Aug 2023 12:30:34 +0400
Subject: [PATCH 23/70] Add: Error handling for remove and rename

---
 .../unum/usearch/cloud_unum_usearch_Index.cpp | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
index 12662beb..071513a1 100644
--- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
+++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
@@ -165,11 +165,26 @@ JNIEXPORT jintArray JNICALL Java_cloud_unum_usearch_Index_c_1search( //
     return matches;
 }
 
-JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1remove(JNIEnv*, jclass, jlong c_ptr, jlong key) {
-    return reinterpret_cast<index_dense_t*>(c_ptr)->remove(static_cast<index_dense_t::key_t>(key)).completed;
+JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1remove(JNIEnv* env, jclass, jlong c_ptr, jlong key) {
+    using key_t = typename index_dense_t::key_t;
+    using labeling_result_t = typename index_dense_t::labeling_result_t;
+    labeling_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->remove(static_cast<key_t>(key));
+    if (!result) {
+        jclass jc = (*env).FindClass("java/lang/Error");
+        if (jc)
+            (*env).ThrowNew(jc, "Failed to remove in vector index!");
+    }
+    return result.completed;
 }
 
-JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1rename(JNIEnv*, jclass, jlong c_ptr, jlong from, jlong to) {
+JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1rename(JNIEnv* env, jclass, jlong c_ptr, jlong from, jlong to) {
     using key_t = typename index_dense_t::key_t;
-    return reinterpret_cast<index_dense_t*>(c_ptr)->rename(static_cast<key_t>(from),static_cast<key_t>(to)).completed;
+    using labeling_result_t = typename index_dense_t::labeling_result_t;
+    labeling_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->rename(static_cast<key_t>(from), static_cast<key_t>(to));
+    if (!result) {
+        jclass jc = (*env).FindClass("java/lang/Error");
+        if (jc)
+            (*env).ThrowNew(jc, "Failed to rename in vector index!");
+    }
+    return result.completed;
 }
\ No newline at end of file

From 7fbf72fde49c61f9bf2302607b5653de91c459c0 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 14 Aug 2023 09:38:17 +0100
Subject: [PATCH 24/70] Fix: `stream_length` to include levels length

---
 include/usearch/index.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index e0685418..cf4dc3cf 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2443,7 +2443,7 @@ class index_gt {
     std::size_t stream_length() const noexcept {
         std::size_t neighbors_length = 0;
         for (std::size_t i = 0; i != size(); ++i)
-            neighbors_length += node_bytes_(node_at_(i).level());
+            neighbors_length += node_bytes_(node_at_(i).level()) + sizeof(level_t);
         return sizeof(index_serialized_header_t) + neighbors_length;
     }
 

From 552e1b73ae90aeced98cdafe212e3986c074aea8 Mon Sep 17 00:00:00 2001
From: Arman Ghazaryan <91345263+Arman-Ghazaryan@users.noreply.github.com>
Date: Mon, 14 Aug 2023 13:02:51 +0400
Subject: [PATCH 25/70] Refactor: use result error as error message

---
 .../unum/usearch/cloud_unum_usearch_Index.cpp | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
index 071513a1..0439a828 100644
--- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
+++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
@@ -58,30 +58,33 @@ JNIEXPORT jlong JNICALL Java_cloud_unum_usearch_Index_c_1create( //
 
 JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1save(JNIEnv* env, jclass, jlong c_ptr, jstring path) {
     char const* path_cstr = (*env).GetStringUTFChars(path, 0);
-    if (!reinterpret_cast<index_dense_t*>(c_ptr)->save(path_cstr)) {
+    serialization_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->save(path_cstr);
+    if (!result) {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to dump vector index to path!");
+            (*env).ThrowNew(jc, result.error.release());
     }
     (*env).ReleaseStringUTFChars(path, path_cstr);
 }
 
 JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1load(JNIEnv* env, jclass, jlong c_ptr, jstring path) {
     char const* path_cstr = (*env).GetStringUTFChars(path, 0);
-    if (!reinterpret_cast<index_dense_t*>(c_ptr)->load(path_cstr)) {
+    serialization_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->load(path_cstr);
+    if (!result) {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to load vector index from path!");
+            (*env).ThrowNew(jc, result.error.release());
     }
     (*env).ReleaseStringUTFChars(path, path_cstr);
 }
 
 JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1view(JNIEnv* env, jclass, jlong c_ptr, jstring path) {
     char const* path_cstr = (*env).GetStringUTFChars(path, 0);
-    if (!reinterpret_cast<index_dense_t*>(c_ptr)->view(path_cstr)) {
+    serialization_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->view(path_cstr);
+    if (!result) {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to view vector index from path!");
+            (*env).ThrowNew(jc, result.error.release());
     }
     (*env).ReleaseStringUTFChars(path, path_cstr);
 }
@@ -122,10 +125,13 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add( //
     float_span_t vector_span = float_span_t{vector_data, static_cast<std::size_t>(vector_dims)};
 
     using key_t = typename index_dense_t::key_t;
-    if (!reinterpret_cast<index_dense_t*>(c_ptr)->add(static_cast<key_t>(key), vector_span)) {
+    using add_result_t = typename index_dense_t::add_result_t;
+
+    add_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->add(static_cast<key_t>(key), vector_span);
+    if (!result) {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to insert a new point in vector index!");
+            (*env).ThrowNew(jc, result.error.release());
     }
     (*env).ReleaseFloatArrayElements(vector, vector_data, 0);
 }
@@ -157,7 +163,7 @@ JNIEXPORT jintArray JNICALL Java_cloud_unum_usearch_Index_c_1search( //
     } else {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to find in vector index!");
+            (*env).ThrowNew(jc, result.error.release());
     }
 
     (*env).ReleaseFloatArrayElements(vector, vector_data, 0);
@@ -172,7 +178,7 @@ JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1remove(JNIEnv* env, jcla
     if (!result) {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to remove in vector index!");
+            (*env).ThrowNew(jc, result.error.release());
     }
     return result.completed;
 }
@@ -180,11 +186,12 @@ JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1remove(JNIEnv* env, jcla
 JNIEXPORT bool JNICALL Java_cloud_unum_usearch_Index_c_1rename(JNIEnv* env, jclass, jlong c_ptr, jlong from, jlong to) {
     using key_t = typename index_dense_t::key_t;
     using labeling_result_t = typename index_dense_t::labeling_result_t;
-    labeling_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->rename(static_cast<key_t>(from), static_cast<key_t>(to));
+    labeling_result_t result =
+        reinterpret_cast<index_dense_t*>(c_ptr)->rename(static_cast<key_t>(from), static_cast<key_t>(to));
     if (!result) {
         jclass jc = (*env).FindClass("java/lang/Error");
         if (jc)
-            (*env).ThrowNew(jc, "Failed to rename in vector index!");
+            (*env).ThrowNew(jc, result.error.release());
     }
     return result.completed;
 }
\ No newline at end of file

From 7e5f6a7697b5eb82d3f03955fe3eb63539cc34fa Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 14 Aug 2023 18:54:50 +0400
Subject: [PATCH 26/70] Add: Support multiple vectors per key

---
 .gitmodules                     |   5 +-
 cpp/test.cpp                    |  36 +++++-----
 include/usearch/index.hpp       |   4 +-
 include/usearch/index_dense.hpp | 114 +++++++++++++++++++-------------
 python/scripts/test_index.py    |   6 +-
 5 files changed, 91 insertions(+), 74 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 527e224d..e556527e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,7 +3,4 @@
 	url = https://github.com/ashvardanian/simsimd
 [submodule "fp16"]
 	path = fp16
-	url = https://github.com/maratyszcza/fp16
-[submodule "robin-map"]
-	path = robin-map
-	url = https://github.com/tessil/robin-map
+	url = https://github.com/maratyszcza/fp16
\ No newline at end of file
diff --git a/cpp/test.cpp b/cpp/test.cpp
index e7e67e55..3d896138 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -39,22 +39,13 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     index.reserve(10);
     index.add(key_first, vector_first, args...);
 
-    if constexpr (punned_ak) {
-        auto result = index.add(key_first, vector_first, args...);
-        expect(!!result == index.multi());
-        result.error.release();
-
-        std::size_t first_key_count = index.count(key_first);
-        expect(first_key_count == (1ul + index.multi()));
-    }
-
     // Default approximate search
-    key_t matched_labels[10] = {0};
+    key_t matched_keys[10] = {0};
     distance_t matched_distances[10] = {0};
-    std::size_t matched_count = index.search(vector_first, 5, args...).dump_to(matched_labels, matched_distances);
+    std::size_t matched_count = index.search(vector_first, 5, args...).dump_to(matched_keys, matched_distances);
 
     expect(matched_count == 1);
-    expect(matched_labels[0] == key_first);
+    expect(matched_keys[0] == key_first);
     expect(std::abs(matched_distances[0]) < 0.01);
 
     // Add more entries
@@ -63,7 +54,7 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     expect(index.size() == 3);
 
     // Perform exact search
-    matched_count = index.search(vector_first, 5, args...).dump_to(matched_labels, matched_distances);
+    matched_count = index.search(vector_first, 5, args...).dump_to(matched_keys, matched_distances);
 
     // Validate scans
     std::size_t count = 0;
@@ -91,9 +82,9 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
     // Search again over reconstructed index
     index.save("tmp.usearch");
     index.load("tmp.usearch");
-    matched_count = index.search(vector_first, 5, args...).dump_to(matched_labels, matched_distances);
+    matched_count = index.search(vector_first, 5, args...).dump_to(matched_keys, matched_distances);
     expect(matched_count == 3);
-    expect(matched_labels[0] == key_first);
+    expect(matched_keys[0] == key_first);
     expect(std::abs(matched_distances[0]) < 0.01);
 
     if constexpr (punned_ak) {
@@ -115,13 +106,24 @@ void test_cosine(index_at& index, std::vector<std::vector<scalar_at>> const& vec
         }
     });
 
+    // Check for duplicates
+    if constexpr (punned_ak) {
+        index.reserve({vectors.size() + 1u, executor.size()});
+        auto result = index.add(key_first, vector_first, args...);
+        expect(!!result == index.multi());
+        result.error.release();
+
+        std::size_t first_key_count = index.count(key_first);
+        expect(first_key_count == (1ul + index.multi()));
+    }
+
     // Search again over mapped index
     // file_head_result_t head = index_dense_metadata("tmp.usearch");
     // expect(head.size == 3);
     index.view("tmp.usearch");
-    matched_count = index.search(vector_first, 5, args...).dump_to(matched_labels, matched_distances);
+    matched_count = index.search(vector_first, 5, args...).dump_to(matched_keys, matched_distances);
     expect(matched_count == 3);
-    expect(matched_labels[0] == key_first);
+    expect(matched_keys[0] == key_first);
     expect(std::abs(matched_distances[0]) < 0.01);
 
     if constexpr (punned_ak) {
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index f0e66b8d..ac1a1c7f 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -885,7 +885,9 @@ class ring_gt {
     bool reserve(std::size_t n) noexcept {
         if (n < size())
             return false; // prevent data loss
-        n = (std::max<std::size_t>)(n, 64u);
+        if (n <= capacity())
+            return true;
+        n = (std::max<std::size_t>)(ceil2(n), 64u);
         element_t* elements = allocator_.allocate(n);
         if (!elements)
             return false;
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index fd955a49..e967fd6d 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1,11 +1,12 @@
 #pragma once
 #include <stdlib.h> // `aligned_alloc`
 
-#include <functional>   // `std::function`
-#include <numeric>      // `std::iota`
-#include <shared_mutex> // `std::shared_mutex`
-#include <thread>       // `std::thread`
-#include <vector>       // `std::vector`
+#include <functional>    // `std::function`
+#include <numeric>       // `std::iota`
+#include <shared_mutex>  // `std::shared_mutex`
+#include <thread>        // `std::thread`
+#include <unordered_set> // `std::unordered_multiset`
+#include <vector>        // `std::vector`
 
 #include <usearch/index.hpp>
 #include <usearch/index_plugins.hpp>
@@ -323,8 +324,13 @@ class index_dense_gt {
     struct key_and_slot_t {
         key_t key;
         compressed_slot_t slot;
+
+        bool any_slot() const { return slot == default_free_value<compressed_slot_t>(); }
+        static key_and_slot_t any_slot(key_t key) { return {key, default_free_value<compressed_slot_t>()}; }
     };
 
+    key_and_slot_t key_and_any_slot() {}
+
     struct lookup_key_hash_t {
         using is_transparent = void;
         std::size_t operator()(key_and_slot_t const& k) const noexcept { return std::hash<key_t>{}(k.key); }
@@ -336,12 +342,12 @@ class index_dense_gt {
         bool operator()(key_and_slot_t const& a, key_t const& b) const noexcept { return a.key == b; }
         bool operator()(key_t const& a, key_and_slot_t const& b) const noexcept { return a == b.key; }
         bool operator()(key_and_slot_t const& a, key_and_slot_t const& b) const noexcept {
-            return a.key == b.key && a.slot == b.slot;
+            return (!a.any_slot() & !b.any_slot()) ? a.key == b.key && a.slot == b.slot : a.key == b.key;
         }
     };
 
     /// @brief Multi-Map from keys to IDs, and allocated vectors.
-    tsl::robin_set<key_and_slot_t, lookup_key_hash_t, lookup_key_same_t> slot_lookup_;
+    std::unordered_multiset<key_and_slot_t, lookup_key_hash_t, lookup_key_same_t> slot_lookup_;
 
     /// @brief Mutex, controlling concurrent access to `slot_lookup_`.
     mutable shared_mutex_t slot_lookup_mutex_;
@@ -855,7 +861,7 @@ class index_dense_gt {
      */
     bool contains(key_t key) const {
         shared_lock_t lock(slot_lookup_mutex_);
-        return slot_lookup_.contains(key);
+        return slot_lookup_.find(key_and_slot_t::any_slot(key)) != slot_lookup_.end();
     }
 
     /**
@@ -864,7 +870,7 @@ class index_dense_gt {
      */
     std::size_t count(key_t key) const {
         shared_lock_t lock(slot_lookup_mutex_);
-        return slot_lookup_.count(key);
+        return slot_lookup_.count(key_and_slot_t::any_slot(key));
     }
 
     struct labeling_result_t {
@@ -890,65 +896,70 @@ class index_dense_gt {
         labeling_result_t result;
 
         unique_lock_t lookup_lock(slot_lookup_mutex_);
-        auto labeled_iterator = slot_lookup_.find(key);
-        if (labeled_iterator == slot_lookup_.end())
+        auto matching_slots = slot_lookup_.equal_range(key_and_slot_t::any_slot(key));
+        if (matching_slots.first == matching_slots.second)
             return result;
 
         // Grow the removed entries ring, if needed
+        std::size_t matching_count = std::distance(matching_slots.first, matching_slots.second);
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
-        if (free_keys_.size() == free_keys_.capacity())
-            if (!free_keys_.reserve((std::max<std::size_t>)(free_keys_.capacity() * 2, 64ul)))
-                return result.failed("Can't allocate memory for a free-list");
+        if (!free_keys_.reserve(free_keys_.size() + matching_count))
+            return result.failed("Can't allocate memory for a free-list");
 
         // A removed entry would be:
         // - present in `free_keys_`
         // - missing in the `slot_lookup_`
         // - marked in the `typed_` index with a `free_key_`
-        compressed_slot_t slot = (*labeled_iterator).slot;
-        free_keys_.push(slot);
-        slot_lookup_.erase(labeled_iterator);
-        typed_->at(slot).key = free_key_;
-        result.completed = true;
+        for (auto slots_it = matching_slots.first; slots_it != matching_slots.second; ++slots_it) {
+            compressed_slot_t slot = (*slots_it).slot;
+            free_keys_.push(slot);
+            typed_->at(slot).key = free_key_;
+        }
+        slot_lookup_.erase(matching_slots.first, matching_slots.second);
+        result.completed = matching_count;
 
         return result;
     }
 
     /**
      *  @brief Removes multiple entries with the specified keys from the index.
-     *  @param[in] labels_begin The beginning of the keys range.
-     *  @param[in] labels_end The ending of the keys range.
+     *  @param[in] keys_begin The beginning of the keys range.
+     *  @param[in] keys_end The ending of the keys range.
      *  @return The ::labeling_result_t indicating the result of the removal operation.
      *          `result.completed` will contain the number of keys that were successfully removed.
      *          `result.error` will contain an error message if an error occurred during the removal operation.
      */
     template <typename labels_iterator_at>
-    labeling_result_t remove(labels_iterator_at&& labels_begin, labels_iterator_at&& labels_end) {
+    labeling_result_t remove(labels_iterator_at keys_begin, labels_iterator_at keys_end) {
 
         labeling_result_t result;
         unique_lock_t lookup_lock(slot_lookup_mutex_);
         std::unique_lock<std::mutex> free_lock(free_keys_mutex_);
-
         // Grow the removed entries ring, if needed
-        std::size_t count_requests = std::distance(labels_begin, labels_end);
-        if (!free_keys_.reserve(free_keys_.size() + count_requests))
+        std::size_t matching_count = 0;
+        for (auto keys_it = keys_begin; keys_it != keys_end; ++keys_it)
+            matching_count += slot_lookup_.count(key_and_slot_t::any_slot(*keys_it));
+
+        if (!free_keys_.reserve(free_keys_.size() + matching_count))
             return result.failed("Can't allocate memory for a free-list");
 
         // Remove them one-by-one
-        for (auto label_it = labels_begin; label_it != labels_end; ++label_it) {
-            key_t key = *label_it;
-            auto labeled_iterator = slot_lookup_.find(key);
-            if (labeled_iterator == slot_lookup_.end())
-                continue;
-
+        for (auto keys_it = keys_begin; keys_it != keys_end; ++keys_it) {
+            key_t key = *keys_it;
+            auto matching_slots = slot_lookup_.equal_range(key_and_slot_t::any_slot(key));
             // A removed entry would be:
             // - present in `free_keys_`
             // - missing in the `slot_lookup_`
             // - marked in the `typed_` index with a `free_key_`
-            compressed_slot_t slot = (*labeled_iterator).slot;
-            free_keys_.push(slot);
-            slot_lookup_.erase(labeled_iterator);
-            typed_->at(slot).key = free_key_;
-            result.completed += 1;
+            for (auto slots_it = matching_slots.first; slots_it != matching_slots.second; ++slots_it) {
+                compressed_slot_t slot = (*slots_it).slot;
+                free_keys_.push(slot);
+                typed_->at(slot).key = free_key_;
+            }
+
+            matching_count = std::distance(matching_slots.first, matching_slots.second);
+            slot_lookup_.erase(matching_slots.first, matching_slots.second);
+            result.completed += matching_count;
         }
 
         return result;
@@ -965,16 +976,24 @@ class index_dense_gt {
     labeling_result_t rename(key_t from, key_t to) {
         labeling_result_t result;
         unique_lock_t lookup_lock(slot_lookup_mutex_);
-        auto labeled_iterator = slot_lookup_.find(from);
-        if (labeled_iterator == slot_lookup_.end())
-            return result;
 
-        compressed_slot_t slot = (*labeled_iterator).slot;
-        key_and_slot_t key_and_slot{to, slot};
-        slot_lookup_.erase(labeled_iterator);
-        slot_lookup_.insert(key_and_slot);
-        typed_->at(slot).key = to;
-        result.completed = true;
+        if (!multi() && slot_lookup_.count(key_and_slot_t::any_slot(to)))
+            return result.failed("Renaming impossible, the key is already in use");
+
+        // The `from` may map to multiple entries
+        while (true) {
+            auto slots_it = slot_lookup_.find(key_and_slot_t::any_slot(from));
+            if (slots_it == slot_lookup_.end())
+                break;
+
+            compressed_slot_t slot = (*slots_it).slot;
+            key_and_slot_t key_and_slot{to, slot};
+            slot_lookup_.erase(slots_it);
+            slot_lookup_.insert(key_and_slot);
+            typed_->at(slot).key = to;
+            ++result.completed;
+        }
+
         return result;
     }
 
@@ -1328,7 +1347,7 @@ class index_dense_gt {
             // Find the matching ID
             {
                 shared_lock_t lock(slot_lookup_mutex_);
-                auto it = slot_lookup_.find(key);
+                auto it = slot_lookup_.find(key_and_slot_t::any_slot(key));
                 if (it == slot_lookup_.end())
                     return false;
                 slot = (*it).slot;
@@ -1341,10 +1360,11 @@ class index_dense_gt {
             return true;
         } else {
             shared_lock_t lock(slot_lookup_mutex_);
-            auto equal_range_pair = slot_lookup_.equal_range(key);
+            auto equal_range_pair = slot_lookup_.equal_range(key_and_slot_t::any_slot(key));
             std::size_t count_exported = 0;
             for (auto begin = equal_range_pair.first;
                  begin != equal_range_pair.second && count_exported != vectors_limit; ++begin, ++count_exported) {
+                //
                 compressed_slot_t slot = (*begin).slot;
                 byte_t const* punned_vector = reinterpret_cast<byte_t const*>(vectors_lookup_[slot]);
                 byte_t* reconstructed_vector = (byte_t*)reconstructed + metric_.bytes_per_vector() * count_exported;
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index 06a4a49b..c60cde45 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -114,11 +114,7 @@ def test_index_duplicates(batch_size):
     assert len(index) == batch_size * 2
 
     two_per_key = index.get(keys)
-    print(two_per_key)
-    if batch_size == 1:
-        assert two_per_key.shape == (2, ndim)
-    else:
-        assert np.vstack(two_per_key).shape == (2 * batch_size, ndim)
+    assert np.vstack(two_per_key).shape == (2 * batch_size, ndim)
 
 
 @pytest.mark.parametrize("batch_size", [1, 7, 1024])

From 89a0b753fee56389edaae6fd2758d5fb3df3e855 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 14 Aug 2023 18:57:38 +0400
Subject: [PATCH 27/70] Make: Remove `robin-map` dependency

---
 .github/workflows/prerelease.yml |  4 ++--
 Package.swift                    |  1 -
 binding.gyp                      | 28 ++++++++++++++++------------
 build.gradle                     |  2 +-
 build.rs                         |  1 -
 c/CMakeLists.txt                 |  1 -
 cpp/CMakeLists.txt               |  1 -
 docs/compilation.md              |  3 +--
 include/usearch/index_dense.hpp  |  2 --
 pyproject.toml                   |  2 +-
 robin-map                        |  1 -
 setup.py                         |  2 +-
 wasm/CMakeLists.txt              |  1 -
 13 files changed, 22 insertions(+), 27 deletions(-)
 delete mode 160000 robin-map

diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
index b6c030cb..a328166d 100644
--- a/.github/workflows/prerelease.yml
+++ b/.github/workflows/prerelease.yml
@@ -66,7 +66,7 @@ jobs:
       - name: Build locally
         run: python -m pip install .
       - name: Test with PyTest
-        run: pytest python/scripts/test.py
+        run: pytest python/scripts/
 
 
   test_python_37:
@@ -95,7 +95,7 @@ jobs:
         run: python -m pip install .
 
       - name: Test with PyTest
-        run: pytest python/scripts/test.py
+        run: pytest python/scripts/
 
 
   test_javascript:
diff --git a/Package.swift b/Package.swift
index 37158ecc..f0aec20c 100644
--- a/Package.swift
+++ b/Package.swift
@@ -19,7 +19,6 @@ let package = Package(
             cxxSettings: [
                 .headerSearchPath("../include/"),
                 .headerSearchPath("../fp16/include/"),
-                .headerSearchPath("../robin-map/include/"),
                 .headerSearchPath("../simismd/include/")
             ]
         ),
diff --git a/binding.gyp b/binding.gyp
index f62e49e1..4c7fc952 100644
--- a/binding.gyp
+++ b/binding.gyp
@@ -2,32 +2,36 @@
     "targets": [
         {
             "target_name": "usearch",
-            "sources": [
-                "javascript/lib.cpp"
-            ],
+            "sources": ["javascript/lib.cpp"],
             "include_dirs": [
                 "<!@(node -p \"require('node-addon-api').include\")",
                 "include",
                 "fp16/include",
-                "robin-map/include",
-                "simsimd/include"
+                "simsimd/include",
+            ],
+            "dependencies": ["<!(node -p \"require('node-addon-api').gyp\")"],
+            "cflags": [
+                "-fexceptions",
+                "-Wno-unknown-pragmas",
+                "-Wno-maybe-uninitialized",
             ],
-            "dependencies": [
-                "<!(node -p \"require('node-addon-api').gyp\")"
+            "cflags_cc": [
+                "-fexceptions",
+                "-Wno-unknown-pragmas",
+                "-Wno-maybe-uninitialized",
+                "-std=c++11",
             ],
-            "cflags": ["-fexceptions", "-Wno-unknown-pragmas", "-Wno-maybe-uninitialized"],
-            "cflags_cc": ["-fexceptions", "-Wno-unknown-pragmas", "-Wno-maybe-uninitialized", "-std=c++11"],
             "xcode_settings": {
                 "GCC_ENABLE_CPP_EXCEPTIONS": "YES",
                 "CLANG_CXX_LIBRARY": "libc++",
-                "MACOSX_DEPLOYMENT_TARGET": "10.15"
+                "MACOSX_DEPLOYMENT_TARGET": "10.15",
             },
             "msvs_settings": {
                 "VCCLCompilerTool": {
                     "ExceptionHandling": 1,
-                    "AdditionalOptions": ["-std:c++11"]
+                    "AdditionalOptions": ["-std:c++11"],
                 }
-            }
+            },
         }
     ]
 }
diff --git a/build.gradle b/build.gradle
index 44f5c4d2..7b96b9ea 100644
--- a/build.gradle
+++ b/build.gradle
@@ -45,7 +45,7 @@ model {
                         include "**/*.cpp"
                     }
                     exportedHeaders {
-                        srcDirs "include", "fp16/include", "robin-map/include", "simsimd/include", "${Jvm.current().javaHome}/include"
+                        srcDirs "include", "fp16/include", "simsimd/include", "${Jvm.current().javaHome}/include"
                     }
                 }
             }
diff --git a/build.rs b/build.rs
index 951817cc..f2592600 100644
--- a/build.rs
+++ b/build.rs
@@ -6,7 +6,6 @@ fn main() {
         .include("include")
         .include("rust")
         .include("fp16/include")
-        .include("robin-map/include")
         .include("simsimd/include")
         .compile("usearch");
 
diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
index f673ada7..47e8660e 100644
--- a/c/CMakeLists.txt
+++ b/c/CMakeLists.txt
@@ -3,7 +3,6 @@
 set(USEARCH_PUNNED_INCLUDE_DIRS
     "${CMAKE_CURRENT_SOURCE_DIR}/../include"
     "${CMAKE_CURRENT_SOURCE_DIR}/../fp16/include"
-    "${CMAKE_CURRENT_SOURCE_DIR}/../robin-map/include"
     "${CMAKE_CURRENT_SOURCE_DIR}/../simsimd/include"
     "${CMAKE_CURRENT_SOURCE_DIR}/"
 )
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7844e295..03dd3dd8 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -79,7 +79,6 @@ include(clipp)
 set(USEARCH_PUNNED_INCLUDE_DIRS
     "${CMAKE_CURRENT_SOURCE_DIR}/../include"
     "${CMAKE_CURRENT_SOURCE_DIR}/../fp16/include"
-    "${CMAKE_CURRENT_SOURCE_DIR}/../robin-map/include"
     "${CMAKE_CURRENT_SOURCE_DIR}/../simsimd/include"
     "${CMAKE_CURRENT_SOURCE_DIR}/"
 )
diff --git a/docs/compilation.md b/docs/compilation.md
index 4765d527..890fbf83 100644
--- a/docs/compilation.md
+++ b/docs/compilation.md
@@ -134,7 +134,6 @@ g++ -shared -o USearchJNI.dll cloud_unum_usearch_Index.o -Wl,--add-stdcall-alias
 g++ -std=c++11 -c -fPIC \
     -I../../../../include \
     -I../../../../fp16/include \
-    -I../../../../robin-map/include \
     -I../../../../simsimd/include \
     -I${JAVA_HOME}/include -I${JAVA_HOME}/include/darwin cloud_unum_usearch_Index.cpp -o cloud_unum_usearch_Index.o
 g++ -dynamiclib -o libusearch.dylib cloud_unum_usearch_Index.o -lc
@@ -168,7 +167,7 @@ cmake -B ./build_release -DUSEARCH_BUILD_CLIB=1 && make -C ./build_release -j
 Linux:
 
 ```sh
-g++ -std=c++11 -shared -fPIC c/lib.cpp -I ./include/  -I ./fp16/include/ -I ./robin-map/include/ -o libusearch_c.so
+g++ -std=c++11 -shared -fPIC c/lib.cpp -I ./include/  -I ./fp16/include/ -o libusearch_c.so
 ```
 
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index e967fd6d..4052491a 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -11,8 +11,6 @@
 #include <usearch/index.hpp>
 #include <usearch/index_plugins.hpp>
 
-#include <tsl/robin_set.h>
-
 namespace unum {
 namespace usearch {
 
diff --git a/pyproject.toml b/pyproject.toml
index 8327e1c3..0ea33932 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ filterwarnings = ["error"]
 
 [tool.cibuildwheel]
 test-requires = ["pytest", "numpy"]
-test-command = "pytest {project}/python/scripts/test.py"
+test-command = "pytest {project}/python/scripts"
 build-verbosity = 0
 skip = ["*musllinux*", "*i686*", "pp*", "cp36-*"]
 
diff --git a/robin-map b/robin-map
deleted file mode 160000
index 851a59e0..00000000
--- a/robin-map
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d
diff --git a/setup.py b/setup.py
index a6fa3182..5b160321 100644
--- a/setup.py
+++ b/setup.py
@@ -90,7 +90,7 @@
         "Topic :: Database :: Database Engines/Servers",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
-    include_dirs=["include", "fp16/include", "robin-map/include", "simsimd/include"],
+    include_dirs=["include", "fp16/include", "simsimd/include"],
     ext_modules=ext_modules,
     install_requires=[
         "numpy",
diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt
index ed6f6c2c..ec3c5311 100644
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -7,7 +7,6 @@ set(USEARCH_PUNNED_INCLUDE_DIRS
     "${CMAKE_CURRENT_SOURCE_DIR}/../c"
     "${CMAKE_CURRENT_SOURCE_DIR}/../include"
     "${CMAKE_CURRENT_SOURCE_DIR}/../fp16/include"
-    "${CMAKE_CURRENT_SOURCE_DIR}/../robin-map/include"
 )
 
 add_executable(index lib.cpp)

From 9b724daa58bc28e9c4849c324f34b0fa8376bedb Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 14 Aug 2023 22:23:28 +0400
Subject: [PATCH 28/70] Refactor: Consistent `multi` in bindings

---
 cpp/test.cpp                    | 4 ++--
 include/usearch/index_dense.hpp | 8 ++++----
 python/lib.cpp                  | 2 +-
 python/usearch/index.py         | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index 3d896138..9f68df88 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -192,13 +192,13 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
     }
 
     // Type-punned:
-    for (bool ban_collisions : {false, true}) {
+    for (bool multi : {false, true}) {
         for (std::size_t connectivity : {3, 13, 50}) {
             std::printf("- punned with connectivity %zu \n", connectivity);
             using index_t = index_dense_gt<key_t, slot_t>;
             metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
             index_dense_config_t config(connectivity);
-            config.ban_collisions = ban_collisions;
+            config.multi = multi;
             index_t index = index_t::make(metric, config);
             test_cosine<true>(index, matrix);
         }
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 4052491a..94b39534 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -95,7 +95,7 @@ struct index_dense_config_t : public index_config_t {
     std::size_t expansion_add = default_expansion_add();
     std::size_t expansion_search = default_expansion_search();
     bool exclude_vectors = false;
-    bool ban_collisions = false;
+    bool multi = false;
 
     index_dense_config_t(index_config_t base) noexcept : index_config_t(base) {}
 
@@ -471,7 +471,7 @@ class index_dense_gt {
     std::size_t max_level() const noexcept { return typed_->max_level(); }
     index_dense_config_t const& config() const { return config_; }
     index_limits_t const& limits() const { return typed_->limits(); }
-    bool multi() const { return !config_.ban_collisions; }
+    bool multi() const { return config_.multi; }
 
     // The metric and its properties
     metric_t const& metric() const { return metric_; }
@@ -1215,7 +1215,7 @@ class index_dense_gt {
         key_t key, scalar_at const* vector, //
         std::size_t thread, bool force_vector_copy, cast_t const& cast) {
 
-        if (config_.ban_collisions && contains(key))
+        if (!multi() && contains(key))
             return add_result_t{}.failed("Duplicate keys not allowed in high-level wrappers");
 
         // Cast the vector, if needed for compatibility with `metric_`
@@ -1340,7 +1340,7 @@ class index_dense_gt {
     template <typename scalar_at>
     std::size_t get_(key_t key, scalar_at* reconstructed, std::size_t vectors_limit, cast_t const& cast) const {
 
-        if (config_.ban_collisions) {
+        if (!multi()) {
             compressed_slot_t slot;
             // Find the matching ID
             {
diff --git a/python/lib.cpp b/python/lib.cpp
index 3d96d281..b8d2b18e 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -165,7 +165,7 @@ static dense_index_py_t make_index(      //
     bool multi) {
 
     index_dense_config_t config(connectivity, expansion_add, expansion_search);
-    config.ban_collisions = !multi;
+    config.multi = multi;
 
     metric_t metric =  //
         metric_uintptr //
diff --git a/python/usearch/index.py b/python/usearch/index.py
index e1ce1115..771895a9 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -407,7 +407,7 @@ def __init__(
         connectivity: Optional[int] = None,
         expansion_add: Optional[int] = None,
         expansion_search: Optional[int] = None,
-        multi: bool = True,
+        multi: bool = False,
         path: Optional[os.PathLike] = None,
         view: bool = False,
     ) -> None:

From e6ed1a71052fb7a646aa8d1b7864369da4c83159 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 15 Aug 2023 19:51:08 +0400
Subject: [PATCH 29/70] Fix: Type-casting in Python retrieval

---
 python/scripts/test_index.py |  4 ++--
 python/usearch/index.py      | 27 +++++++++++----------------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index c60cde45..f5a8274d 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -66,8 +66,8 @@ def test_index_retrieval(ndim, metric, quantization, dtype, batch_size):
     keys = np.arange(batch_size)
     vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
     index.add(keys, vectors, threads=threads)
-    vectors_retrived = np.vstack(index.get(keys))
-    assert np.allclose(vectors_retrived.astype(dtype), vectors, atol=0.1)
+    vectors_retrived = np.vstack(index.get(keys, dtype))
+    assert np.allclose(vectors_retrived, vectors, atol=0.1)
 
 
 @pytest.mark.parametrize("ndim", [3, 97, 256])
diff --git a/python/usearch/index.py b/python/usearch/index.py
index 771895a9..b298cc05 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -82,19 +82,11 @@ def _normalize_dtype(dtype, metric: MetricKind = MetricKind.Cos) -> ScalarKind:
         "float32": ScalarKind.F32,
         "float16": ScalarKind.F16,
         "int8": ScalarKind.I8,
-    }
-    if isinstance(dtype, np.dtype):
-        dtype = dtype.name
-    return _normalize[dtype]
-
-
-def _to_numpy_compatible_dtype(dtype: ScalarKind) -> ScalarKind:
-    _normalize = {
-        ScalarKind.F64: ScalarKind.F64,
-        ScalarKind.F32: ScalarKind.F32,
-        ScalarKind.F16: ScalarKind.F16,
-        ScalarKind.I8: ScalarKind.F16,
-        ScalarKind.B1: ScalarKind.B1,
+        np.float64: ScalarKind.F64,
+        np.float32: ScalarKind.F32,
+        np.float16: ScalarKind.F16,
+        np.int8: ScalarKind.I8,
+        np.uint8: ScalarKind.B1,
     }
     return _normalize[dtype]
 
@@ -104,9 +96,11 @@ def _to_numpy_dtype(dtype: ScalarKind):
         ScalarKind.F64: np.float64,
         ScalarKind.F32: np.float32,
         ScalarKind.F16: np.float16,
-        ScalarKind.I8: np.float16,
+        ScalarKind.I8: np.int8,
         ScalarKind.B1: np.uint8,
     }
+    if dtype in _normalize.values():
+        return dtype
     return _normalize[dtype]
 
 
@@ -660,8 +654,9 @@ def get(
         """
         if not dtype:
             dtype = self.dtype
+        else:
+            dtype = _normalize_dtype(dtype)
 
-        get_dtype = _to_numpy_compatible_dtype(dtype)
         view_dtype = _to_numpy_dtype(dtype)
 
         def cast(result):
@@ -675,7 +670,7 @@ def cast(result):
         if not isinstance(keys, np.ndarray):
             keys = np.array(keys, dtype=Key)
 
-        results = self._compiled.get_many(keys, get_dtype)
+        results = self._compiled.get_many(keys, dtype)
         results = [cast(result) for result in results]
         return results[0] if is_one else results
 

From 5e50f6b38fced8e91ff359e13c97dea3625e6f71 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 15 Aug 2023 20:11:09 +0400
Subject: [PATCH 30/70] Add: Clustering limited to number of clusters

---
 include/usearch/index_dense.hpp | 11 +++--
 python/lib.cpp                  | 86 ++++++++++++++++++++++++++++++---
 python/usearch/index.py         |  5 +-
 3 files changed, 91 insertions(+), 11 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 94b39534..d8af410e 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -327,8 +327,6 @@ class index_dense_gt {
         static key_and_slot_t any_slot(key_t key) { return {key, default_free_value<compressed_slot_t>()}; }
     };
 
-    key_and_slot_t key_and_any_slot() {}
-
     struct lookup_key_hash_t {
         using is_transparent = void;
         std::size_t operator()(key_and_slot_t const& k) const noexcept { return std::hash<key_t>{}(k.key); }
@@ -340,7 +338,7 @@ class index_dense_gt {
         bool operator()(key_and_slot_t const& a, key_t const& b) const noexcept { return a.key == b; }
         bool operator()(key_t const& a, key_and_slot_t const& b) const noexcept { return a == b.key; }
         bool operator()(key_and_slot_t const& a, key_and_slot_t const& b) const noexcept {
-            return (!a.any_slot() & !b.any_slot()) ? a.key == b.key && a.slot == b.slot : a.key == b.key;
+            return (!a.any_slot() && !b.any_slot()) ? a.key == b.key && a.slot == b.slot : a.key == b.key;
         }
     };
 
@@ -540,6 +538,13 @@ class index_dense_gt {
     bool get(key_t key, f64_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f64); }
     // clang-format on
 
+    /**
+     *  @brief Computes the distance between two managed entities.
+     */
+    distance_punned_t distance_between(key_t a, key_t b) const {
+        return metric_proxy_t{*this}(typed_->at(a), typed_->at(b));
+    }
+
     /**
      *  @brief Reserves memory for the index and the labeled lookup.
      *  @return `true` if the memory reservation was successful, `false` otherwise.
diff --git a/python/lib.cpp b/python/lib.cpp
index b8d2b18e..f815cc81 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -636,11 +636,17 @@ static void cluster_typed(                                              //
  *      4. number of computed pairwise distances.
  */
 template <typename index_at>
-static py::tuple cluster_many_in_index( //
-    index_at& index, py::buffer vectors, std::size_t level, std::size_t threads) {
-
-    if (level == 0)
-        return py::tuple(5);
+static py::tuple cluster_many_in_index(  //
+    index_at& index, py::buffer vectors, //
+    std::size_t level, std::size_t count, std::size_t threads) {
+
+    // Determine the level, which would contain enough clusters for it
+    if (level == 0) {
+        for (; level <= index.max_level(); ++level) {
+            if (index.stats(level).nodes < count)
+                break;
+        }
+    }
 
     if (index.limits().threads_search < threads)
         throw std::invalid_argument("Can't use that many threads!");
@@ -660,10 +666,10 @@ static py::tuple cluster_many_in_index( //
     std::atomic<std::size_t> stats_visited_members(0);
     std::atomic<std::size_t> stats_computed_distances(0);
 
-    // Those would be set for one for al entries, in case of success
+    // Those would be set for one for all entries, in case of success
     auto counts_py1d = counts_py.template mutable_unchecked<1>();
     for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx)
-        counts_py1d(vector_idx) = 0;
+        counts_py1d(vector_idx) = 1;
 
     // clang-format off
     switch (numpy_string_to_kind(vectors_info.format)) {
@@ -676,6 +682,71 @@ static py::tuple cluster_many_in_index( //
     }
     // clang-format on
 
+    struct cluster_t {
+        key_t key = 0;
+        union {
+            std::size_t popularity = 0;
+            key_t replacement;
+        };
+    };
+
+    // Now once we have identified the closest clusters,
+    // we can try reducing their quantity, refining
+    std::vector<cluster_t> clusters(vectors_count);
+    auto keys_py2d = keys_py.template mutable_unchecked<2>();
+    for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx)
+        clusters[vector_idx].key = keys_py2d(vector_idx, 0), clusters[vector_idx].popularity = 1;
+
+    // Sort by cluster key
+    std::sort(clusters.begin(), clusters.end(), [](cluster_t& a, cluster_t& b) { return a.key < b.key; });
+
+    // Transform into run-length encoding
+    std::size_t last_idx = 0;
+    for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
+        if (clusters[last_idx].key == clusters[current_idx].key) {
+            clusters[last_idx].popularity++;
+        } else {
+            last_idx++;
+            clusters[last_idx] = clusters[current_idx];
+        }
+    }
+    clusters.resize(last_idx + 1);
+
+    // Drop smaller clusters iteratively merging those into the closest ones.
+    std::sort(clusters.begin(), clusters.end(), [](cluster_t& a, cluster_t& b) { return a.popularity > b.popularity; });
+
+    // Instead of doing it at once, use the `cluster_t::replacement` property to plan future re-mapping.
+    for (std::size_t cluster_idx = count; cluster_idx < clusters.size(); ++cluster_idx) {
+        key_t dropped_cluster_key = clusters[cluster_idx].key;
+        key_t target_key = dropped_cluster_key;
+        distance_t target_distance = std::numeric_limits<distance_t>::max();
+        for (std::size_t candidate_idx = 0; candidate_idx != count; ++candidate_idx) {
+            key_t cluster_key = clusters[candidate_idx].key;
+            distance_t cluster_distance = index.distance_between(dropped_cluster_key, cluster_key);
+            if (cluster_distance <= target_distance)
+                target_key = cluster_key, target_distance = cluster_distance;
+        }
+        clusters[cluster_idx].replacement = target_key;
+    }
+
+    // Sort dropped clusters by name to accelerate future lookups
+    std::sort(clusters.begin() + count, clusters.end(), [](cluster_t& a, cluster_t& b) { return a.key < b.key; });
+
+    // Replace evicted clusters
+    for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx) {
+        key_t& cluster_key = keys_py2d(vector_idx, 0);
+
+        // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
+        cluster_t cluster_key_wrapped;
+        cluster_key_wrapped.key = cluster_key;
+        auto displaced_range = std::equal_range(clusters.begin() + count, clusters.end(), cluster_key_wrapped,
+                                                [](cluster_t const& a, cluster_t const& b) { return a.key < b.key; });
+        if (displaced_range.first == displaced_range.second)
+            continue;
+
+        cluster_key = displaced_range.first->replacement;
+    }
+
     py::tuple results(5);
     results[0] = keys_py;
     results[1] = distances_py;
@@ -904,6 +975,7 @@ PYBIND11_MODULE(compiled, m) {
         "cluster_many", &cluster_many_in_index<dense_index_py_t>, //
         py::arg("query"),                                         //
         py::arg("level") = 1,                                     //
+        py::arg("count") = 0,                                     //
         py::arg("threads") = 0                                    //
     );
 
diff --git a/python/usearch/index.py b/python/usearch/index.py
index b298cc05..27e353a6 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -934,8 +934,10 @@ def cluster(
         :return: Matches for one or more queries
         :rtype: Union[Matches, BatchMatches]
         """
-        if level is None and count is None:
+        if level is None:
             level = 1
+        if count is None:
+            count = 0
 
         return _search_in_compiled(
             self._compiled.cluster_many,
@@ -945,6 +947,7 @@ def cluster(
             batch_size=batch_size,
             # Search constraints:
             level=level,
+            count=count,
             threads=threads,
         )
 

From b935fe37ec256788c770e6e2ea359def91871ca4 Mon Sep 17 00:00:00 2001
From: Davit Vardanyan <78792753+davvard@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:57:03 +0400
Subject: [PATCH 31/70] Docs: Fix JavaScript documentation

---
 docs/conf.py                  |   2 +-
 docs/javascript/reference.rst |   3 +
 javascript/docs.js            | 111 ++++++++++++++++++++++++++++++++++
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 javascript/docs.js

diff --git a/docs/conf.py b/docs/conf.py
index e4645776..6bcbb732 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -47,4 +47,4 @@
 breathe_projects = {'USearch': '../build/xml'}
 breathe_default_project = 'USearch'
 
-js_source_path = '../javascript/usearch.js'
+js_source_path = '../javascript/docs.js'
diff --git a/docs/javascript/reference.rst b/docs/javascript/reference.rst
index c6ee91ee..e700fb6e 100644
--- a/docs/javascript/reference.rst
+++ b/docs/javascript/reference.rst
@@ -7,3 +7,6 @@ usearch.Index
 
 .. js:autoclass:: Index
    :members:
+
+.. js:autoclass:: Matches
+   :members:
diff --git a/javascript/docs.js b/javascript/docs.js
new file mode 100644
index 00000000..2f533cea
--- /dev/null
+++ b/javascript/docs.js
@@ -0,0 +1,111 @@
+/** Search result object. */
+class Matches {
+    /**
+     * @param {BigUint64Array} keys - The keys of the nearest neighbors found.
+     * @param {Float32Array} distances - The distances of the nearest neighbors found.
+     * @param {bigint} count - The count of nearest neighbors found.
+     */
+    constructor(keys, distances, count) {
+        this.keys = keys;
+        this.distances = distances;
+        this.count = count;
+    }
+}
+
+/** K-Approximate Nearest Neighbors search index. */
+class Index {
+    /**
+     * Constructs a new index.
+     * 
+     * @param {bigint} dimensions
+     * @param {string} metric
+     * @param {string} quantization
+     * @param {bigint} capacity
+     * @param {bigint} connectivity
+     * @param {bigint} expansion_add
+     * @param {bigint} expansion_search
+     */
+    constructor(
+        dimensions,
+        metric,
+        quantization,
+        capacity,
+        connectivity,
+        expansion_add,
+        expansion_search
+    ) {}
+
+    /**
+     * Returns the dimensionality of vectors.
+     * @return {bigint} The dimensionality of vectors.
+     */
+    dimensions() {}
+
+    /**
+     * Returns the bigint of vectors currently indexed.
+     * @return {bigint} The bigint of vectors currently indexed.
+     */
+    size() {}
+
+    /**
+     * Returns index capacity.
+     * @return {bigint} The capacity of index.
+     */
+    capacity() {}
+
+    /**
+     * Returns connectivity.
+     * @return {bigint} The connectivity of index.
+     */
+    connectivity() {}
+
+    /** 
+     * Write index to a file.
+     * @param {string} path File path to write.
+     */
+    save(path) {}
+
+    /** 
+     * Load index from a file.
+     * @param {string} path File path to read.
+     */
+    load(path) {}
+
+    /** 
+     * View index from a file, without loading into RAM.
+     * @param {string} path File path to read.
+     */
+    view(path) {}
+
+    /** 
+     * Add n vectors of dimension d to the index.
+     * 
+     * @param {bigint | bigint[]} keys Input identifiers for every vector.
+     * @param {Float32Array | Float32Array[]} mat Input matrix, matrix of size n * d.
+     */
+    add(keys, mat) {}
+
+    /** 
+     * Query n vectors of dimension d to the index. Return at most k vectors for each. 
+     * If there are not enough results for a query, the result array is padded with -1s.
+     *
+     * @param {Float32Array} mat Input vectors to search, matrix of size n * d.
+     * @param {bigint} k The bigint of nearest neighbors to search for.
+     * @return {Matches} Output of the search result.
+     */
+    search(mat, k) {}
+
+    /** 
+     * Check if an entry is contained in the index.
+     * 
+     * @param {bigint} key Identifier to look up.
+     */
+    contains(key) {}
+
+    /** 
+     * Remove a vector from the index.
+     * 
+     * @param {bigint} key Input identifier for every vector to be removed.
+     */
+    remove(key) {}
+}

From 9fd27676318f6913f93e007e7c2c656eb94bee69 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sat, 19 Aug 2023 13:27:51 +0400
Subject: [PATCH 32/70] Fix: Persisting the flag for multi-indexes

---
 include/usearch/index_dense.hpp | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index d8af410e..f9cadf9a 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -63,19 +63,21 @@ struct index_dense_head_t {
     misaligned_ref_gt<std::uint64_t> count_present;
     misaligned_ref_gt<std::uint64_t> count_deleted;
     misaligned_ref_gt<std::uint64_t> dimensions;
+    misaligned_ref_gt<bool> multi;
 
     index_dense_head_t(byte_t* ptr) noexcept
-        : magic((char const*)exchange(ptr, ptr + sizeof(magic_t))), //
-          version_major(exchange(ptr, ptr + sizeof(version_t))),    //
-          version_minor(exchange(ptr, ptr + sizeof(version_t))),    //
-          version_patch(exchange(ptr, ptr + sizeof(version_t))),    //
-          kind_metric(exchange(ptr, ptr + sizeof(metric_kind_t))),  //
-          kind_scalar(exchange(ptr, ptr + sizeof(scalar_kind_t))),  //
-          kind_key(exchange(ptr, ptr + sizeof(scalar_kind_t))),     //
-          kind_compressed_slot(exchange(ptr, ptr + sizeof(scalar_kind_t))),
-          count_present(exchange(ptr, ptr + sizeof(std::uint64_t))),
-          count_deleted(exchange(ptr, ptr + sizeof(std::uint64_t))),
-          dimensions(exchange(ptr, ptr + sizeof(std::uint64_t))) {}
+        : magic((char const*)exchange(ptr, ptr + sizeof(magic_t))),         //
+          version_major(exchange(ptr, ptr + sizeof(version_t))),            //
+          version_minor(exchange(ptr, ptr + sizeof(version_t))),            //
+          version_patch(exchange(ptr, ptr + sizeof(version_t))),            //
+          kind_metric(exchange(ptr, ptr + sizeof(metric_kind_t))),          //
+          kind_scalar(exchange(ptr, ptr + sizeof(scalar_kind_t))),          //
+          kind_key(exchange(ptr, ptr + sizeof(scalar_kind_t))),             //
+          kind_compressed_slot(exchange(ptr, ptr + sizeof(scalar_kind_t))), //
+          count_present(exchange(ptr, ptr + sizeof(std::uint64_t))),        //
+          count_deleted(exchange(ptr, ptr + sizeof(std::uint64_t))),        //
+          dimensions(exchange(ptr, ptr + sizeof(std::uint64_t))),           //
+          multi(exchange(ptr, ptr + sizeof(bool))) {}
 };
 
 struct index_dense_head_result_t {
@@ -674,6 +676,7 @@ class index_dense_gt {
             head.count_present = size();
             head.count_deleted = typed_->size() - size();
             head.dimensions = dimensions();
+            head.multi = multi();
 
             if (!callback(&buffer, sizeof(buffer)))
                 return result.failed("Failed to serialize into stream");
@@ -762,6 +765,7 @@ class index_dense_gt {
                 return result.failed("Slot type doesn't match, consider rebuilding");
 
             metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar);
+            config_.multi = head.multi;
         }
 
         // Pull the actual proximity graph
@@ -838,6 +842,7 @@ class index_dense_gt {
                 return result.failed("Slot type doesn't match, consider rebuilding");
 
             metric_ = metric_t(head.dimensions, head.kind_metric, head.kind_scalar);
+            config_.multi = head.multi;
             offset += sizeof(buffer);
         }
 

From 7ec46991bd118dc20f0bf6e08ab2b4afb3706743 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 20 Aug 2023 19:07:39 +0400
Subject: [PATCH 33/70] Fix: `match_t` constructors and type names

---
 include/usearch/index.hpp | 55 +++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 975533c5..5db0e878 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1123,13 +1123,13 @@ struct dummy_executor_t {
 };
 
 /**
- *  @brief  An example of what a USearch-compatible label-to-label mapping should look like.
+ *  @brief  An example of what a USearch-compatible key-to-key mapping should look like.
  *
  *  This is particularly helpful for "Semantic Joins", where we map entries of one collection
  *  to entries of another. In assymetric setups, where A -> B is needed, but B -> A is not,
  *  this can be passed to minimize memory usage.
  */
-struct dummy_label_to_label_mapping_t {
+struct dummy_key_to_key_mapping_t {
     struct member_ref_t {
         template <typename key_at> member_ref_t& operator=(key_at&&) noexcept { return *this; }
     };
@@ -1147,7 +1147,7 @@ template <typename object_at> static constexpr bool is_dummy() {
            std::is_same<object_t, dummy_progress_t>::value ||  //
            std::is_same<object_t, dummy_prefetch_t>::value ||  //
            std::is_same<object_t, dummy_executor_t>::value ||  //
-           std::is_same<object_t, dummy_label_to_label_mapping_t>::value;
+           std::is_same<object_t, dummy_key_to_key_mapping_t>::value;
 }
 
 template <typename, typename at> struct has_reset_gt {
@@ -1987,6 +1987,28 @@ class index_gt {
     struct match_t {
         member_cref_t member;
         distance_t distance;
+
+        inline match_t(member_cref_t member, distance_t distance) noexcept : member(member), distance(distance) {}
+
+        inline match_t(match_t&& other) noexcept
+            : member({other.member.key.ptr(), other.member.slot}), distance(other.distance) {}
+
+        inline match_t(match_t const& other) noexcept
+            : member({other.member.key.ptr(), other.member.slot}), distance(other.distance) {}
+
+        inline match_t& operator=(match_t const& other) noexcept {
+            member.key.reset(other.member.key.ptr());
+            member.slot = other.member.slot;
+            distance = other.distance;
+            return *this;
+        }
+
+        inline match_t& operator=(match_t&& other) noexcept {
+            member.key.reset(other.member.key.ptr());
+            member.slot = other.member.slot;
+            distance = other.distance;
+            return *this;
+        }
     };
 
     class search_result_t {
@@ -2076,7 +2098,7 @@ class index_gt {
         error_t error{};
         std::size_t visited_members{};
         std::size_t computed_distances{};
-        match_t cluster{{nullptr}, 0};
+        match_t cluster{{nullptr}, std::numeric_limits<distance_t>::max()};
 
         explicit operator bool() const noexcept { return !error; }
         cluster_result_t failed(error_t message) noexcept {
@@ -2670,10 +2692,10 @@ class index_gt {
      *  @param[in] executor Thread-pool to execute the job in parallel.
      *  @param[in] progress Callback to report the execution progress.
      */
-    template <typename values_at, typename metric_at,                       //
-              typename slot_transition_at = dummy_label_to_label_mapping_t, //
-              typename executor_at = dummy_executor_t,                      //
-              typename progress_at = dummy_progress_t,                      //
+    template <typename values_at, typename metric_at,                   //
+              typename slot_transition_at = dummy_key_to_key_mapping_t, //
+              typename executor_at = dummy_executor_t,                  //
+              typename progress_at = dummy_progress_t,                  //
               typename prefetch_at = dummy_prefetch_t>
     void compact(                             //
         values_at&& values,                   //
@@ -3301,10 +3323,10 @@ template < //
     typename men_metric_at,   //
     typename women_metric_at, //
 
-    typename man_to_woman_at = dummy_label_to_label_mapping_t, //
-    typename woman_to_man_at = dummy_label_to_label_mapping_t, //
-    typename executor_at = dummy_executor_t,                   //
-    typename progress_at = dummy_progress_t                    //
+    typename man_to_woman_at = dummy_key_to_key_mapping_t, //
+    typename woman_to_man_at = dummy_key_to_key_mapping_t, //
+    typename executor_at = dummy_executor_t,               //
+    typename progress_at = dummy_progress_t                //
     >
 static join_result_t join(               //
     men_at const& men,                   //
@@ -3384,6 +3406,7 @@ static join_result_t join(               //
     std::atomic<std::size_t> engagements{0};
     std::atomic<std::size_t> computed_distances{0};
     std::atomic<std::size_t> visited_members{0};
+    std::atomic<char const*> atomic_error{nullptr};
 
     // Concurrently process all the men
     executor.parallel([&](std::size_t thread_idx) {
@@ -3394,7 +3417,7 @@ static join_result_t join(               //
         compressed_slot_t free_man_slot;
 
         // While there exist a free man who still has a woman to propose to.
-        while (true) {
+        while (!atomic_error.load(std::memory_order_relaxed)) {
             std::size_t passed_rounds = 0;
             std::size_t total_rounds = 0;
             {
@@ -3419,7 +3442,8 @@ static join_result_t join(               //
             visited_members += candidates.visited_members;
             computed_distances += candidates.computed_distances;
             if (!candidates) {
-                // TODO:
+                atomic_error = candidates.error.release();
+                break;
             }
 
             auto match = candidates.back();
@@ -3462,6 +3486,9 @@ static join_result_t join(               //
         }
     });
 
+    if (atomic_error)
+        return result.failed(atomic_error.load());
+
     // Export the "slots" into keys:
     std::size_t intersection_size = 0;
     for (std::size_t man_slot = 0; man_slot != men.size(); ++man_slot) {

From 492b181b7ba0512af2d9008af92d10344676cbd1 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 20 Aug 2023 19:10:24 +0400
Subject: [PATCH 34/70] Add: `distance_between` and `cluster` APIs

---
 docs/compilation.md             |   4 +-
 include/usearch/index_dense.hpp | 389 +++++++++++++++++++++++++++++---
 python/lib.cpp                  | 255 +++++++++------------
 python/scripts/test_index.py    |  24 +-
 python/usearch/index.py         |  41 ++--
 setup.py                        |   6 +-
 6 files changed, 503 insertions(+), 216 deletions(-)

diff --git a/docs/compilation.md b/docs/compilation.md
index 890fbf83..5922eda0 100644
--- a/docs/compilation.md
+++ b/docs/compilation.md
@@ -53,7 +53,7 @@ cppcheck --enable=all --force --suppress=cstyleCast --suppress=unusedFunction \
 Testing:
 
 ```sh
-cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug && ./build_debug/test
+cmake -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug && ./build_debug/test
 ```
 
 ## Python 3
@@ -63,7 +63,7 @@ The `-s` option will disable capturing the logs.
 The `-x` option will exit after first failure to simplify debugging.
 
 ```sh
-pip install -e . && pytest python/scripts/test_index.py -s -x
+pip install -e . && pytest python/scripts/ -s -x
 ```
 
 Linting:
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index f9cadf9a..f4a15e92 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -249,13 +249,16 @@ class index_dense_gt {
 
     using serialization_config_t = index_dense_serialization_config_t;
 
+    using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>;
+    using tape_allocator_t = memory_mapping_allocator_gt<64>;
+
   private:
     /// @brief Schema: input buffer, bytes in input buffer, output buffer.
     using cast_t = std::function<bool(byte_t const*, std::size_t, byte_t*)>;
     /// @brief Punned index.
     using index_t = index_gt<                 //
         distance_t, key_t, compressed_slot_t, //
-        aligned_allocator_gt<byte_t, 64>, memory_mapping_allocator_gt<64>>;
+        dynamic_allocator_t, tape_allocator_t>;
     using index_allocator_t = aligned_allocator_gt<index_t, 64>;
 
     using member_iterator_t = typename index_t::member_iterator_t;
@@ -420,13 +423,13 @@ class index_dense_gt {
      *  @brief Constructs an instance of ::index_dense_gt.
      *  @param[in] metric One of the provided or an @b ad-hoc metric, type-punned.
      *  @param[in] config The index configuration (optional).
-     *  @param[in] free_label The key used for freed vectors (optional).
+     *  @param[in] free_key The key used for freed vectors (optional).
      *  @return An instance of ::index_dense_gt.
      */
     static index_dense_gt make(           //
         metric_t metric,                  //
         index_dense_config_t config = {}, //
-        key_t free_label = default_free_value<key_t>()) {
+        key_t free_key = default_free_value<key_t>()) {
 
         scalar_kind_t scalar_kind = metric.scalar_kind();
         std::size_t hardware_threads = std::thread::hardware_concurrency();
@@ -436,7 +439,7 @@ class index_dense_gt {
         result.cast_buffer_.resize(hardware_threads * metric.bytes_per_vector());
         result.casts_ = make_casts_(scalar_kind);
         result.metric_ = metric;
-        result.free_key_ = free_label;
+        result.free_key_ = free_key;
 
         // Fill the thread IDs.
         result.available_threads_.resize(hardware_threads);
@@ -498,6 +501,8 @@ class index_dense_gt {
     stats_t stats() const { return typed_->stats(); }
     stats_t stats(std::size_t level) const { return typed_->stats(level); }
 
+    dynamic_allocator_t const& allocator() const { return typed_->dynamic_allocator(); }
+
     /**
      *  @brief  A relatively accurate lower bound on the amount of memory consumed by the system.
      *          In practice it's error will be below 10%.
@@ -513,6 +518,14 @@ class index_dense_gt {
     }
 
     static constexpr std::size_t any_thread() { return std::numeric_limits<std::size_t>::max(); }
+    static constexpr distance_t infinite_distance() { return std::numeric_limits<distance_t>::max(); }
+
+    struct aggregated_distances_t {
+        std::size_t count = 0;
+        distance_t mean = infinite_distance();
+        distance_t min = infinite_distance();
+        distance_t max = infinite_distance();
+    };
 
     // clang-format off
     add_result_t add(key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool force_vector_copy = true) { return add_(key, vector, thread, force_vector_copy, casts_.from_b1x8); }
@@ -527,28 +540,104 @@ class index_dense_gt {
     search_result_t search(f32_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_f32); }
     search_result_t search(f64_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, thread, exact, casts_.from_f64); }
 
+    bool get(key_t key, b1x8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_b1x8); }
+    bool get(key_t key, i8_bits_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_i8); }
+    bool get(key_t key, f16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f16); }
+    bool get(key_t key, f32_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f32); }
+    bool get(key_t key, f64_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f64); }
+
     cluster_result_t cluster(b1x8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_b1x8); }
     cluster_result_t cluster(i8_bits_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_i8); }
     cluster_result_t cluster(f16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f16); }
     cluster_result_t cluster(f32_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f32); }
     cluster_result_t cluster(f64_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from_f64); }
 
-    bool get(key_t key, b1x8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_b1x8); }
-    bool get(key_t key, i8_bits_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_i8); }
-    bool get(key_t key, f16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f16); }
-    bool get(key_t key, f32_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f32); }
-    bool get(key_t key, f64_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to_f64); }
+    aggregated_distances_t distance_between(key_t key, b1x8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_b1x8); }
+    aggregated_distances_t distance_between(key_t key, i8_bits_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_i8); }
+    aggregated_distances_t distance_between(key_t key, f16_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_f16); }
+    aggregated_distances_t distance_between(key_t key, f32_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_f32); }
+    aggregated_distances_t distance_between(key_t key, f64_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to_f64); }
     // clang-format on
 
     /**
-     *  @brief Computes the distance between two managed entities.
+     *  @brief  Computes the distance between two managed entities.
+     *          If either key maps into more than one vector, will aggregate results
+     *          exporting the mean, maximum, and minimum values.
+     */
+    aggregated_distances_t distance_between(key_t a, key_t b, std::size_t = any_thread()) const {
+        shared_lock_t lock(slot_lookup_mutex_);
+        auto a_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(a));
+        auto b_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(b));
+        bool a_missing = a_range.first == a_range.second;
+        bool b_missing = b_range.first == b_range.second;
+        aggregated_distances_t result;
+        if (!a_missing || !b_missing)
+            return result;
+
+        result.min = std::numeric_limits<distance_t>::max();
+        result.max = std::numeric_limits<distance_t>::min();
+        result.mean = 0;
+        result.count = 0;
+
+        while (a_range.first != a_range.second) {
+            key_and_slot_t a_key_and_slot = *a_range.first;
+            byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot];
+            while (b_range.first != b_range.second) {
+                key_and_slot_t b_key_and_slot = *b_range.first;
+                byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot];
+                distance_t a_b_distance = metric_(a_vector, b_vector);
+
+                result.mean += a_b_distance;
+                result.min = (std::min)(result.min, a_b_distance);
+                result.max = (std::max)(result.max, a_b_distance);
+                result.count++;
+
+                //
+                ++b_range.first;
+            }
+            ++a_range.first;
+        }
+
+        result.mean /= result.count;
+        return result;
+    }
+
+    /**
+     *  @brief  Identifies a node in a given `level`, that is the closest to the `key`.
      */
-    distance_punned_t distance_between(key_t a, key_t b) const {
-        return metric_proxy_t{*this}(typed_->at(a), typed_->at(b));
+    cluster_result_t cluster(key_t key, std::size_t level, std::size_t thread = any_thread()) const {
+
+        // Check if such `key` is even present.
+        shared_lock_t slots_lock(slot_lookup_mutex_);
+        auto key_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(key));
+        cluster_result_t result;
+        if (key_range.first == key_range.second)
+            return result.failed("Key missing!");
+
+        index_cluster_config_t cluster_config;
+        thread_lock_t lock = thread_lock_(thread);
+        cluster_config.thread = lock.thread_id;
+        cluster_config.expansion = config_.expansion_search;
+        metric_proxy_t metric{*this};
+        auto allow = [=](member_cref_t const& member) noexcept { return member.key != free_key_; };
+
+        // Find the closest cluster for any vector under that key.
+        while (key_range.first != key_range.second) {
+            key_and_slot_t key_and_slot = *key_range.first;
+            byte_t const* vector_data = vectors_lookup_[key_and_slot.slot];
+            cluster_result_t new_result = typed_->cluster(vector_data, level, metric, cluster_config, allow);
+            if (!new_result)
+                return new_result;
+            if (new_result.cluster.distance < result.cluster.distance)
+                result = std::move(new_result);
+
+            ++key_range.first;
+        }
+        return result;
     }
 
     /**
-     *  @brief Reserves memory for the index and the labeled lookup.
+     *  @brief Reserves memory for the index and the keyed lookup.
      *  @return `true` if the memory reservation was successful, `false` otherwise.
      */
     bool reserve(index_limits_t limits) {
@@ -594,7 +683,10 @@ class index_dense_gt {
         vectors_lookup_.clear();
         free_keys_.clear();
         vectors_tape_allocator_.reset();
-        available_threads_.clear();
+
+        // Reset the thread IDs.
+        available_threads_.resize(std::thread::hardware_concurrency());
+        std::iota(available_threads_.begin(), available_threads_.end(), 0ul);
     }
 
     /**
@@ -775,7 +867,7 @@ class index_dense_gt {
         if (typed_->size() != static_cast<std::size_t>(matrix_rows))
             return result.failed("Index size and the number of vectors doesn't match");
 
-        reindex_labels_();
+        reindex_keys_();
         return result;
     }
 
@@ -859,7 +951,7 @@ class index_dense_gt {
             for (std::uint64_t slot = 0; slot != matrix_rows; ++slot)
                 vectors_lookup_[slot] = (byte_t*)vectors_buffer.data() + matrix_cols * slot;
 
-        reindex_labels_();
+        reindex_keys_();
         return result;
     }
 
@@ -937,8 +1029,8 @@ class index_dense_gt {
      *          `result.completed` will contain the number of keys that were successfully removed.
      *          `result.error` will contain an error message if an error occurred during the removal operation.
      */
-    template <typename labels_iterator_at>
-    labeling_result_t remove(labels_iterator_at keys_begin, labels_iterator_at keys_end) {
+    template <typename keys_iterator_at>
+    labeling_result_t remove(keys_iterator_at keys_begin, keys_iterator_at keys_end) {
 
         labeling_result_t result;
         unique_lock_t lookup_lock(slot_lookup_mutex_);
@@ -1165,11 +1257,11 @@ class index_dense_gt {
         return result;
     }
 
-    template <                                                     //
-        typename man_to_woman_at = dummy_label_to_label_mapping_t, //
-        typename woman_to_man_at = dummy_label_to_label_mapping_t, //
-        typename executor_at = dummy_executor_t,                   //
-        typename progress_at = dummy_progress_t                    //
+    template <                                                 //
+        typename man_to_woman_at = dummy_key_to_key_mapping_t, //
+        typename woman_to_man_at = dummy_key_to_key_mapping_t, //
+        typename executor_at = dummy_executor_t,               //
+        typename progress_at = dummy_progress_t                //
         >
     join_result_t join(                                     //
         index_dense_gt const& women,                        //
@@ -1316,12 +1408,58 @@ class index_dense_gt {
         return typed_->cluster(vector_data, level, metric_proxy_t{*this}, cluster_config, allow);
     }
 
+    template <typename scalar_at>
+    aggregated_distances_t distance_between_( //
+        key_t key, scalar_at const* vector,   //
+        std::size_t thread, cast_t const& cast) const {
+
+        // Cast the vector, if needed for compatibility with `metric_`
+        thread_lock_t lock = thread_lock_(thread);
+        byte_t const* vector_data = reinterpret_cast<byte_t const*>(vector);
+        {
+            byte_t* casted_data = cast_buffer_.data() + metric_.bytes_per_vector() * lock.thread_id;
+            bool casted = cast(vector_data, dimensions(), casted_data);
+            if (casted)
+                vector_data = casted_data;
+        }
+
+        // Check if such `key` is even present.
+        shared_lock_t slots_lock(slot_lookup_mutex_);
+        auto key_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(key));
+        aggregated_distances_t result;
+        if (key_range.first == key_range.second)
+            return result;
+
+        result.min = std::numeric_limits<distance_t>::max();
+        result.max = std::numeric_limits<distance_t>::min();
+        result.mean = 0;
+        result.count = 0;
+
+        while (key_range.first != key_range.second) {
+            key_and_slot_t key_and_slot = *key_range.first;
+            byte_t const* a_vector = vectors_lookup_[key_and_slot.slot];
+            byte_t const* b_vector = vector_data;
+            distance_t a_b_distance = metric_(a_vector, b_vector);
+
+            result.mean += a_b_distance;
+            result.min = (std::min)(result.min, a_b_distance);
+            result.max = (std::max)(result.max, a_b_distance);
+            result.count++;
+
+            //
+            ++key_range.first;
+        }
+
+        result.mean /= result.count;
+        return result;
+    }
+
     compressed_slot_t lookup_id_(key_t key) const {
         shared_lock_t lock(slot_lookup_mutex_);
         return slot_lookup_.at(key);
     }
 
-    void reindex_labels_() {
+    void reindex_keys_() {
 
         // Estimate number of entries first
         std::size_t count_total = typed_->size();
@@ -1429,19 +1567,19 @@ using index_dense_big_t = index_dense_gt<uuid_t, uint40_t>;
  */
 template < //
 
-    typename men_label_at,   //
-    typename women_label_at, //
-    typename men_slot_at,    //
-    typename women_slot_at,  //
+    typename men_key_at,    //
+    typename women_key_at,  //
+    typename men_slot_at,   //
+    typename women_slot_at, //
 
-    typename man_to_woman_at = dummy_label_to_label_mapping_t, //
-    typename woman_to_man_at = dummy_label_to_label_mapping_t, //
-    typename executor_at = dummy_executor_t,                   //
-    typename progress_at = dummy_progress_t                    //
+    typename man_to_woman_at = dummy_key_to_key_mapping_t, //
+    typename woman_to_man_at = dummy_key_to_key_mapping_t, //
+    typename executor_at = dummy_executor_t,               //
+    typename progress_at = dummy_progress_t                //
     >
-static join_result_t join(                                      //
-    index_dense_gt<men_label_at, men_slot_at> const& men,       //
-    index_dense_gt<women_label_at, women_slot_at> const& women, //
+static join_result_t join(                                    //
+    index_dense_gt<men_key_at, men_slot_at> const& men,       //
+    index_dense_gt<women_key_at, women_slot_at> const& women, //
 
     index_join_config_t config = {},                    //
     man_to_woman_at&& man_to_woman = man_to_woman_at{}, //
@@ -1457,5 +1595,186 @@ static join_result_t join(                                      //
         std::forward<progress_at>(progress));
 }
 
+struct clustering_result_t {
+    error_t error{};
+    std::size_t clusters{};
+    std::size_t visited_members{};
+    std::size_t computed_distances{};
+
+    explicit operator bool() const noexcept { return !error; }
+    clustering_result_t failed(error_t message) noexcept {
+        error = std::move(message);
+        return std::move(*this);
+    }
+};
+
+struct clustering_config_t {
+    std::size_t target_clusters = 0;
+};
+
+/**
+ *  @brief  Implements clustering, classifying the given objects (vectors of member keys)
+ *          into a given number of clusters.
+ *
+ *  @param[in] queries_begin Iterator targeting the fiest query.
+ *  @param[in] queries_end
+ *  @param[in] executor Thread-pool to execute the job in parallel.
+ *  @param[in] progress Callback to report the execution progress.
+ */
+template <                                   //
+    typename key_at,                         //
+    typename slot_at,                        //
+    typename queries_iterator_at,            //
+    typename executor_at = dummy_executor_t, //
+    typename progress_at = dummy_progress_t  //
+    >
+static clustering_result_t cluster(               //
+    index_dense_gt<key_at, slot_at> const& index, //
+    queries_iterator_at queries_begin,            //
+    queries_iterator_at queries_end,              //
+    //
+    std::size_t max_clusters,               //
+    key_at* cluster_keys,                   //
+    distance_punned_t* cluster_distances,   //
+    executor_at&& executor = executor_at{}, //
+    progress_at&& progress = progress_at{}) {
+
+    using index_t = index_dense_gt<key_at, slot_at>;
+    using key_t = typename index_t::key_t;
+    using distance_t = typename index_t::distance_t;
+
+    std::size_t const queries_count = queries_end - queries_begin;
+
+    // Skip the first few top level, assuming they can't even potentially have enough clusters
+    std::size_t level = index.max_level();
+    if (max_clusters)
+        for (; level > 1; --level) {
+            if (index.stats(level).nodes < max_clusters)
+                break;
+        }
+    else
+        max_clusters = index.stats(1).nodes, level = 1;
+
+    clustering_result_t result;
+    if (index.max_level() < 1)
+        return result.failed("Index too small to cluster!");
+
+    // A structure used to track the popularity of a specific cluster
+    struct cluster_t {
+        key_t key = 0;
+        union {
+            std::size_t popularity = 0;
+            key_t replacement;
+        };
+    };
+
+    auto smaller_key = [](cluster_t const& a, cluster_t const& b) { return a.key < b.key; };
+    auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
+
+    std::atomic<std::size_t> visited_members(0);
+    std::atomic<std::size_t> computed_distances(0);
+    std::atomic<char const*> atomic_error{nullptr};
+
+repeat_clustering:
+    // Concurrently perform search until a certain depth
+    executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
+        auto result = index.cluster(queries_begin[query_idx], level, thread_idx);
+        if (!result) {
+            atomic_error = result.error.release();
+            return false;
+        }
+
+        cluster_keys[query_idx] = result.cluster.member.key;
+        cluster_distances[query_idx] = result.cluster.distance;
+
+        visited_members += result.visited_members;
+        computed_distances += result.computed_distances;
+        return true;
+    });
+
+    if (atomic_error)
+        return result.failed(atomic_error.load());
+
+    // Now once we have identified the closest clusters,
+    // we can try reducing their quantity, refining
+    using dynamic_allocator_t = typename index_t::dynamic_allocator_t;
+    using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
+    using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
+    buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
+    if (!clusters)
+        return result.failed("Out of memory!");
+
+    for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx)
+        clusters[query_idx].key = cluster_keys[query_idx], clusters[query_idx].popularity = 1;
+
+    // Sort by cluster key
+    std::sort(clusters.begin(), clusters.end(), smaller_key);
+
+    // Transform into run-length encoding, cmoputing the number of unique clusters
+    std::size_t unique_clusters = 0;
+    {
+        std::size_t last_idx = 0;
+        for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
+            if (clusters[last_idx].key == clusters[current_idx].key) {
+                clusters[last_idx].popularity++;
+            } else {
+                last_idx++;
+                clusters[last_idx] = clusters[current_idx];
+            }
+        }
+        unique_clusters = last_idx + 1;
+    }
+
+    // In some cases the queries may be co-located, all mapping into the same cluster on that
+    // level. In that case we refine the granularity and dive deeper into clusters:
+    if (unique_clusters < max_clusters && level > 1) {
+        level--;
+        goto repeat_clustering;
+    }
+
+    // Drop smaller clusters iteratively merging those into the closest ones
+    if (max_clusters < unique_clusters) {
+        std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity);
+
+        // Instead of doing it at once, use the `cluster_t::replacement` property to plan future re-mapping
+        for (std::size_t cluster_idx = max_clusters; cluster_idx < unique_clusters; ++cluster_idx) {
+            key_t dropped_cluster_key = clusters[cluster_idx].key;
+            key_t target_key = dropped_cluster_key;
+            distance_t target_distance = std::numeric_limits<distance_t>::max();
+            for (std::size_t candidate_idx = 0; candidate_idx != max_clusters; ++candidate_idx) {
+                key_t cluster_key = clusters[candidate_idx].key;
+                distance_t cluster_distance = index.distance_between(dropped_cluster_key, cluster_key).mean;
+                if (cluster_distance <= target_distance)
+                    target_key = cluster_key, target_distance = cluster_distance;
+            }
+            clusters[cluster_idx].replacement = target_key;
+        }
+
+        // Sort dropped clusters by name to accelerate future lookups
+        std::sort(clusters.data() + max_clusters, clusters.data() + unique_clusters, smaller_key);
+
+        // Replace evicted clusters
+        for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx) {
+            key_t& cluster_key = cluster_keys[query_idx];
+            distance_t& cluster_distance = cluster_distances[query_idx];
+
+            // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
+            cluster_t cluster_key_wrapped;
+            cluster_key_wrapped.key = cluster_key;
+            auto displaced_range = std::equal_range(clusters.data() + max_clusters, clusters.data() + unique_clusters,
+                                                    cluster_key_wrapped, smaller_key);
+            if (displaced_range.first == displaced_range.second)
+                continue;
+
+            cluster_key = displaced_range.first->replacement;
+            cluster_distance = index.distance_between(cluster_key, queries_begin[query_idx], 0).mean;
+        }
+    }
+
+    result.computed_distances = computed_distances;
+    result.visited_members = visited_members;
+    return result;
+}
+
 } // namespace usearch
 } // namespace unum
diff --git a/python/lib.cpp b/python/lib.cpp
index f815cc81..73715f0d 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -576,57 +576,88 @@ static py::tuple search_many_brute_force(    //
     return results;
 }
 
-template <typename scalar_at>
-static void cluster_typed(                                              //
-    dense_index_py_t& index, py::buffer_info& vectors_info,             //
-    std::size_t level, std::size_t threads,                             //
-    py::array_t<key_t>& keys_py, py::array_t<distance_t>& distances_py, //
-    std::atomic<std::size_t>& stats_visited_members, std::atomic<std::size_t>& stats_computed_distances) {
+template <typename scalar_at> struct rows_lookup_gt {
+    byte_t* data_;
+    std::size_t stride_;
+
+    rows_lookup_gt(void* data, std::size_t stride) noexcept : data_((byte_t*)data), stride_(stride) {}
+    scalar_at* operator[](std::size_t i) const noexcept { return reinterpret_cast<scalar_at*>(data_ + i * stride_); }
+    std::ptrdiff_t operator-(rows_lookup_gt const& other) const noexcept { return (data_ - other.data_) / stride_; }
+    rows_lookup_gt operator+(std::size_t n) const noexcept { return {data_ + stride_ * n, stride_}; }
+    template <typename other_scalar_at> rows_lookup_gt<other_scalar_at> as() const noexcept { return {data_, stride_}; }
+};
 
-    auto keys_py2d = keys_py.template mutable_unchecked<2>();
-    auto distances_py2d = distances_py.template mutable_unchecked<2>();
+/**
+ *  @param queries Matrix of vectors to search for.
+ *  @param count Number of clusters to produce.
+ *
+ *  @return Tuple with:
+ *      1. vector of cluster IDs,
+ *      2. vector of distances to those clusters,
+ *      3. array with match counts, set to all ones,
+ *      4. number of visited nodes,
+ *      4. number of computed pairwise distances.
+ */
+template <typename index_at>
+static py::tuple cluster_vectors(index_at& index, py::buffer queries, std::size_t count, std::size_t threads) {
 
-    Py_ssize_t vectors_count = vectors_info.shape[0];
-    byte_t const* vectors_data = reinterpret_cast<byte_t const*>(vectors_info.ptr);
+    if (index.limits().threads_search < threads)
+        throw std::invalid_argument("Can't use that many threads!");
 
-    if (!threads)
-        threads = std::thread::hardware_concurrency();
-    if (!index.reserve(index_limits_t(index.size(), threads)))
-        throw std::invalid_argument("Out of memory!");
+    py::buffer_info queries_info = queries.request();
+    if (queries_info.ndim != 2)
+        throw std::invalid_argument("Expects a matrix of queries to add!");
 
-    atomic_error_t atomic_error{nullptr};
-    executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
-        scalar_at const* vector = (scalar_at const*)(vectors_data + task_idx * vectors_info.strides[0]);
-        dense_cluster_result_t result = index.cluster(vector, level, thread_idx);
-        if (!result) {
-            atomic_error = result.error.release();
-            return false;
-        }
+    std::size_t queries_count = static_cast<std::size_t>(queries_info.shape[0]);
+    std::size_t queries_stride = static_cast<std::size_t>(queries_info.strides[0]);
+    std::size_t queries_dimensions = static_cast<std::size_t>(queries_info.shape[1]);
+    if (queries_dimensions != index.scalar_words())
+        throw std::invalid_argument("The number of vector dimensions doesn't match!");
 
-        keys_py2d(task_idx, 0) = result.cluster.member.key;
-        distances_py2d(task_idx, 0) = result.cluster.distance;
+    py::array_t<key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
+    py::array_t<distance_t> distances_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
+    clustering_result_t cluster_result;
+    executor_default_t executor{threads};
 
-        stats_visited_members += result.visited_members;
-        stats_computed_distances += result.computed_distances;
+    auto keys_py2d = keys_py.template mutable_unchecked<2>();
+    auto distances_py2d = distances_py.template mutable_unchecked<2>();
+    key_t* keys_ptr = reinterpret_cast<key_t*>(&keys_py2d(0, 0));
+    distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
-        // We don't want to check for signals from multiple threads
-        if (thread_idx == 0)
-            if (PyErr_CheckSignals() != 0)
-                return false;
-        return true;
-    });
+    rows_lookup_gt<byte_t const> queries_begin(queries_info.ptr, queries_stride);
+    rows_lookup_gt<byte_t const> queries_end = queries_begin + queries_count;
 
-    // Raise the error from a single thread
-    auto error = atomic_error.load();
-    if (error) {
-        PyErr_SetString(PyExc_RuntimeError, error);
-        throw py::error_already_set();
+    // clang-format off
+    switch (numpy_string_to_kind(queries_info.format)) {
+    case scalar_kind_t::b1x8_k: cluster_result = cluster(index, queries_begin.as<b1x8_t const>(), queries_end.as<b1x8_t const>(), count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::i8_k: cluster_result = cluster(index, queries_begin.as<i8_bits_t const>(), queries_end.as<i8_bits_t const>(), count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f16_k: cluster_result = cluster(index, queries_begin.as<f16_t const>(), queries_end.as<f16_t const>(), count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f32_k: cluster_result = cluster(index, queries_begin.as<f32_t const>(), queries_end.as<f32_t const>(), count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f64_k: cluster_result = cluster(index, queries_begin.as<f64_t const>(), queries_end.as<f64_t const>(), count, keys_ptr, distances_ptr, executor); break;
+    default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + queries_info.format);
     }
+    // clang-format on
+
+    cluster_result.error.raise();
+
+    // Those would be set to 1 for all entries, in case of success
+    py::array_t<Py_ssize_t> counts_py(queries_count);
+    auto counts_py1d = counts_py.template mutable_unchecked<1>();
+    for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx)
+        counts_py1d(static_cast<Py_ssize_t>(query_idx)) = 1;
+
+    py::tuple results(5);
+    results[0] = keys_py;
+    results[1] = distances_py;
+    results[2] = counts_py;
+    results[3] = cluster_result.visited_members;
+    results[4] = cluster_result.computed_distances;
+    return results;
 }
 
 /**
- *  @param vectors Matrix of vectors to search for.
- *  @param level Graph level to query.
+ *  @param queries Array of keys to cluster.
+ *  @param count Number of clusters to produce.
  *
  *  @return Tuple with:
  *      1. vector of cluster IDs,
@@ -636,123 +667,41 @@ static void cluster_typed(                                              //
  *      4. number of computed pairwise distances.
  */
 template <typename index_at>
-static py::tuple cluster_many_in_index(  //
-    index_at& index, py::buffer vectors, //
-    std::size_t level, std::size_t count, std::size_t threads) {
-
-    // Determine the level, which would contain enough clusters for it
-    if (level == 0) {
-        for (; level <= index.max_level(); ++level) {
-            if (index.stats(level).nodes < count)
-                break;
-        }
-    }
+static py::tuple cluster_keys(index_at& index, py::array_t<key_t> queries_py, std::size_t count, std::size_t threads) {
 
     if (index.limits().threads_search < threads)
         throw std::invalid_argument("Can't use that many threads!");
 
-    py::buffer_info vectors_info = vectors.request();
-    if (vectors_info.ndim != 2)
-        throw std::invalid_argument("Expects a matrix of vectors to add!");
-
-    Py_ssize_t vectors_count = vectors_info.shape[0];
-    Py_ssize_t vectors_dimensions = vectors_info.shape[1];
-    if (vectors_dimensions != static_cast<Py_ssize_t>(index.scalar_words()))
-        throw std::invalid_argument("The number of vector dimensions doesn't match!");
-
-    py::array_t<key_t> keys_py({vectors_count, Py_ssize_t(1)});
-    py::array_t<distance_t> distances_py({vectors_count, Py_ssize_t(1)});
-    py::array_t<Py_ssize_t> counts_py(vectors_count);
-    std::atomic<std::size_t> stats_visited_members(0);
-    std::atomic<std::size_t> stats_computed_distances(0);
-
-    // Those would be set for one for all entries, in case of success
-    auto counts_py1d = counts_py.template mutable_unchecked<1>();
-    for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx)
-        counts_py1d(vector_idx) = 1;
+    std::size_t queries_count = static_cast<std::size_t>(queries_py.size());
+    auto queries_py1d = queries_py.template unchecked<1>();
+    key_t const* queries_begin = &queries_py1d(0);
+    key_t const* queries_end = queries_begin + queries_count;
 
-    // clang-format off
-    switch (numpy_string_to_kind(vectors_info.format)) {
-    case scalar_kind_t::b1x8_k: cluster_typed<b1x8_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
-    case scalar_kind_t::i8_k: cluster_typed<i8_bits_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
-    case scalar_kind_t::f16_k: cluster_typed<f16_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
-    case scalar_kind_t::f32_k: cluster_typed<f32_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
-    case scalar_kind_t::f64_k: cluster_typed<f64_t>(index, vectors_info, level, threads, keys_py, distances_py, stats_visited_members, stats_computed_distances); break;
-    default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + vectors_info.format);
-    }
-    // clang-format on
-
-    struct cluster_t {
-        key_t key = 0;
-        union {
-            std::size_t popularity = 0;
-            key_t replacement;
-        };
-    };
+    py::array_t<key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
+    py::array_t<distance_t> distances_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
+    executor_default_t executor{threads};
 
-    // Now once we have identified the closest clusters,
-    // we can try reducing their quantity, refining
-    std::vector<cluster_t> clusters(vectors_count);
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
-    for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx)
-        clusters[vector_idx].key = keys_py2d(vector_idx, 0), clusters[vector_idx].popularity = 1;
-
-    // Sort by cluster key
-    std::sort(clusters.begin(), clusters.end(), [](cluster_t& a, cluster_t& b) { return a.key < b.key; });
-
-    // Transform into run-length encoding
-    std::size_t last_idx = 0;
-    for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
-        if (clusters[last_idx].key == clusters[current_idx].key) {
-            clusters[last_idx].popularity++;
-        } else {
-            last_idx++;
-            clusters[last_idx] = clusters[current_idx];
-        }
-    }
-    clusters.resize(last_idx + 1);
-
-    // Drop smaller clusters iteratively merging those into the closest ones.
-    std::sort(clusters.begin(), clusters.end(), [](cluster_t& a, cluster_t& b) { return a.popularity > b.popularity; });
-
-    // Instead of doing it at once, use the `cluster_t::replacement` property to plan future re-mapping.
-    for (std::size_t cluster_idx = count; cluster_idx < clusters.size(); ++cluster_idx) {
-        key_t dropped_cluster_key = clusters[cluster_idx].key;
-        key_t target_key = dropped_cluster_key;
-        distance_t target_distance = std::numeric_limits<distance_t>::max();
-        for (std::size_t candidate_idx = 0; candidate_idx != count; ++candidate_idx) {
-            key_t cluster_key = clusters[candidate_idx].key;
-            distance_t cluster_distance = index.distance_between(dropped_cluster_key, cluster_key);
-            if (cluster_distance <= target_distance)
-                target_key = cluster_key, target_distance = cluster_distance;
-        }
-        clusters[cluster_idx].replacement = target_key;
-    }
-
-    // Sort dropped clusters by name to accelerate future lookups
-    std::sort(clusters.begin() + count, clusters.end(), [](cluster_t& a, cluster_t& b) { return a.key < b.key; });
-
-    // Replace evicted clusters
-    for (Py_ssize_t vector_idx = 0; vector_idx != vectors_count; ++vector_idx) {
-        key_t& cluster_key = keys_py2d(vector_idx, 0);
+    auto distances_py2d = distances_py.template mutable_unchecked<2>();
+    key_t* keys_ptr = reinterpret_cast<key_t*>(&keys_py2d(0, 0));
+    distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
-        // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
-        cluster_t cluster_key_wrapped;
-        cluster_key_wrapped.key = cluster_key;
-        auto displaced_range = std::equal_range(clusters.begin() + count, clusters.end(), cluster_key_wrapped,
-                                                [](cluster_t const& a, cluster_t const& b) { return a.key < b.key; });
-        if (displaced_range.first == displaced_range.second)
-            continue;
+    clustering_result_t cluster_result =
+        cluster(index, queries_begin, queries_end, count, keys_ptr, distances_ptr, executor);
+    cluster_result.error.raise();
 
-        cluster_key = displaced_range.first->replacement;
-    }
+    // Those would be set to 1 for all entries, in case of success
+    py::array_t<Py_ssize_t> counts_py(queries_count);
+    auto counts_py1d = counts_py.template mutable_unchecked<1>();
+    for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx)
+        counts_py1d(static_cast<Py_ssize_t>(query_idx)) = 1;
 
     py::tuple results(5);
     results[0] = keys_py;
     results[1] = distances_py;
     results[2] = counts_py;
-    results[3] = stats_visited_members.load();
-    results[4] = stats_computed_distances.load();
+    results[3] = cluster_result.visited_members;
+    results[4] = cluster_result.computed_distances;
     return results;
 }
 
@@ -761,7 +710,7 @@ static std::unordered_map<key_t, key_t> join_index(       //
     std::size_t max_proposals, bool exact) {
 
     std::unordered_map<key_t, key_t> a_to_b;
-    dummy_label_to_label_mapping_t b_to_a;
+    dummy_key_to_key_mapping_t b_to_a;
     a_to_b.reserve((std::min)(a.size(), b.size()));
 
     index_join_config_t config;
@@ -965,18 +914,24 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(                                                      //
         "search_many", &search_many_in_index<dense_index_py_t>, //
-        py::arg("query"),                                       //
+        py::arg("queries"),                                     //
         py::arg("count") = 10,                                  //
         py::arg("exact") = false,                               //
         py::arg("threads") = 0                                  //
     );
 
-    i.def(                                                        //
-        "cluster_many", &cluster_many_in_index<dense_index_py_t>, //
-        py::arg("query"),                                         //
-        py::arg("level") = 1,                                     //
-        py::arg("count") = 0,                                     //
-        py::arg("threads") = 0                                    //
+    i.def(                                                     //
+        "cluster_vectors", &cluster_vectors<dense_index_py_t>, //
+        py::arg("queries"),                                    //
+        py::arg("count") = 0,                                  //
+        py::arg("threads") = 0                                 //
+    );
+
+    i.def(                                               //
+        "cluster_keys", &cluster_keys<dense_index_py_t>, //
+        py::arg("queries"),                              //
+        py::arg("count") = 0,                            //
+        py::arg("threads") = 0                           //
     );
 
     i.def(
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index f5a8274d..0eed64fb 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -192,16 +192,30 @@ def test_index_contains_remove_rename(batch_size):
 
 @pytest.mark.parametrize("ndim", [3, 97, 256])
 @pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
-@pytest.mark.parametrize("batch_size", [10, 1024])
+@pytest.mark.parametrize("batch_size", [100, 1024])
 @pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
-def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
-    if batch_size <= 1:
-        return
+def test_index_vectors_clustering(ndim, metric, quantization, dtype, batch_size):
+    index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
+    index.add(keys, vectors, threads=threads)
+
+    clusters: BatchMatches = index.cluster(vectors=vectors, threads=threads)
+    assert len(clusters.keys) == batch_size
+
 
+@pytest.mark.parametrize("ndim", [3, 97, 256])
+@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
+@pytest.mark.parametrize("batch_size", [100, 1024])
+@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
+def test_index_members_clustering(ndim, metric, quantization, dtype, batch_size):
     index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
     keys = np.arange(batch_size)
     vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
     index.add(keys, vectors, threads=threads)
-    clusters: BatchMatches = index.cluster(vectors, threads=threads)
+
+    # If no argument is provided, we cluster the present entries
+    clusters: BatchMatches = index.cluster(threads=threads)
     assert len(clusters.keys) == batch_size
diff --git a/python/usearch/index.py b/python/usearch/index.py
index 27e353a6..800b527d 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -907,23 +907,21 @@ def join(
 
     def cluster(
         self,
-        vectors: np.ndarray,
         *,
+        vectors: Optional[np.ndarray] = None,
+        keys: Optional[np.ndarray] = None,
         count: Optional[int] = None,
-        level: Optional[int] = None,
         threads: int = 0,
         log: Union[str, bool] = False,
         batch_size: int = 0,
     ) -> Union[Matches, BatchMatches]:
         """
-        Performs approximate nearest neighbors search for one or more queries.
+        Clusters already indexed or provided `vectors`, mapping them to various centroids.
 
-        :param vectors: Query vector or vectors.
-        :type vectors: VectorOrVectorsLike
-        :param count: Number of clusters to produce, can be inferred from `level`
+        :param vectors: .
+        :type vectors: Optional[VectorOrVectorsLike]
+        :param count: Upper bound on the number of clusters to produce
         :type count: Optional[int], defaults to None
-        :param level: Graph level to target - higher means coarse, can be inferred from `count`
-        :type level: Optional[int], defaults to None
 
         :param threads: Optimal number of cores to use,
         :type threads: int, defaults to 0
@@ -934,22 +932,23 @@ def cluster(
         :return: Matches for one or more queries
         :rtype: Union[Matches, BatchMatches]
         """
-        if level is None:
-            level = 1
         if count is None:
             count = 0
 
-        return _search_in_compiled(
-            self._compiled.cluster_many,
-            vectors,
-            # Batch scheduling:
-            log=log,
-            batch_size=batch_size,
-            # Search constraints:
-            level=level,
-            count=count,
-            threads=threads,
-        )
+        if vectors is not None:
+            assert keys is None, "You can either cluster vectors or member keys"
+            results = self._compiled.cluster_vectors(
+                vectors, count=count, threads=threads
+            )
+        else:
+            if keys is None:
+                keys = self._compiled.get_keys_in_slice()
+            if not isinstance(keys, np.ndarray):
+                keys = np.array(keys)
+            keys = keys.astype(Key)
+            results = self._compiled.cluster_keys(keys, count=count, threads=threads)
+
+        return BatchMatches(*results)
 
     @property
     def keys(self) -> IndexedKeys:
diff --git a/setup.py b/setup.py
index 5b160321..2a966f94 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
 ]
 
 if sys.platform == "linux":
-    compile_args.append("-std=c++11")
+    compile_args.append("-std=c++17")
     compile_args.append("-O3")  # Maximize performance
     compile_args.append("-g")  # Simplify debugging
     compile_args.append("-Wno-unknown-pragmas")
@@ -26,7 +26,7 @@
     # MacOS 10.15 or higher is needed for `aligned_alloc` support.
     # https://github.com/unum-cloud/usearch/actions/runs/4975434891/jobs/8902603392
     compile_args.append("-mmacosx-version-min=10.15")
-    compile_args.append("-std=c++11")
+    compile_args.append("-std=c++17")
     compile_args.append("-O3")  # Maximize performance
     compile_args.append("-g")  # Simplify debugging
     compile_args.append("-Wno-unknown-pragmas")
@@ -37,7 +37,7 @@
     # link_args.append("-Xpreprocessor -lomp")
 
 if sys.platform == "win32":
-    compile_args.append("/std:c++14")
+    compile_args.append("/std:c++17")
     compile_args.append("/O2")
 
 ext_modules = [

From c428f54a61200780bd43577ad8645815779cdda7 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 20 Aug 2023 19:11:22 +0400
Subject: [PATCH 35/70] Make: Freeze Sphinx version

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 56fa16c0..75b39adf 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -319,7 +319,7 @@ jobs:
         run: |
               sudo apt update && 
               sudo apt install -y doxygen graphviz dia git && 
-              pip install sphinx sphinx-js breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery && 
+              pip install sphinx==7.1.2 sphinx-js breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery && 
               npm install -g jsdoc
       - name: Install USearch from PyPi
         run: pip install usearch

From 9ede10fed93d3d1b572b675c2a6f4a696d032bb7 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 20 Aug 2023 21:18:19 +0400
Subject: [PATCH 36/70] Refactor: `multi` support in C 99 bindings

---
 c/lib.cpp                    | 38 +++++++++++++++++++---------------
 c/usearch.h                  | 40 ++++++++++++++++++++++++------------
 docs/compilation.md          |  2 +-
 python/scripts/test_index.py | 18 +++++-----------
 4 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/c/lib.cpp b/c/lib.cpp
index 9570fd87..2053a506 100644
--- a/c/lib.cpp
+++ b/c/lib.cpp
@@ -58,13 +58,13 @@ add_result_t add_(index_dense_t* index, usearch_key_t key, void const* vector, s
     }
 }
 
-bool get_(index_dense_t* index, usearch_key_t key, void* vector, scalar_kind_t kind) {
+bool get_(index_dense_t* index, usearch_key_t key, size_t count, void* vector, scalar_kind_t kind) {
     switch (kind) {
-    case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector);
-    case scalar_kind_t::f64_k: return index->get(key, (f64_t*)vector);
-    case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector);
-    case scalar_kind_t::i8_k: return index->get(key, (i8_bits_t*)vector);
-    case scalar_kind_t::b1x8_k: return index->get(key, (b1x8_t*)vector);
+    case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector, count);
+    case scalar_kind_t::f64_k: return index->get(key, (f64_t*)vector, count);
+    case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector, count);
+    case scalar_kind_t::i8_k: return index->get(key, (i8_bits_t*)vector, count);
+    case scalar_kind_t::b1x8_k: return index->get(key, (b1x8_t*)vector, count);
     default: return search_result_t().failed("Unknown scalar kind!");
     }
 }
@@ -87,6 +87,7 @@ USEARCH_EXPORT usearch_index_t usearch_init(usearch_init_options_t* options, use
     assert(options && error);
 
     index_dense_config_t config(options->connectivity, options->expansion_add, options->expansion_search);
+    config.multi = options->multi;
     metric_kind_t metric_kind = to_native_metric(options->metric_kind);
     scalar_kind_t scalar_kind = to_native_scalar(options->quantization);
 
@@ -167,9 +168,14 @@ USEARCH_EXPORT bool usearch_contains(usearch_index_t index, usearch_key_t key, u
     return reinterpret_cast<index_dense_t*>(index)->contains(key);
 }
 
+USEARCH_EXPORT size_t usearch_count(usearch_index_t index, usearch_key_t key, usearch_error_t*) {
+    assert(index);
+    return reinterpret_cast<index_dense_t*>(index)->count(key);
+}
+
 USEARCH_EXPORT size_t usearch_search(                                                            //
     usearch_index_t index, void const* vector, usearch_scalar_kind_t kind, size_t results_limit, //
-    usearch_key_t* found_labels, usearch_distance_t* found_distances, usearch_error_t* error) {
+    usearch_key_t* found_keys, usearch_distance_t* found_distances, usearch_error_t* error) {
 
     assert(index && vector && error);
     search_result_t result =
@@ -179,18 +185,18 @@ USEARCH_EXPORT size_t usearch_search(
         return 0;
     }
 
-    return result.dump_to(found_labels, found_distances);
+    return result.dump_to(found_keys, found_distances);
 }
 
-USEARCH_EXPORT bool usearch_get(              //
-    usearch_index_t index, usearch_key_t key, //
-    void* vector, usearch_scalar_kind_t kind, usearch_error_t*) {
+USEARCH_EXPORT size_t usearch_get(                          //
+    usearch_index_t index, usearch_key_t key, size_t count, //
+    void* vectors, usearch_scalar_kind_t kind, usearch_error_t*) {
 
-    assert(index && vector);
-    return get_(reinterpret_cast<index_dense_t*>(index), key, vector, to_native_scalar(kind));
+    assert(index && vectors);
+    return get_(reinterpret_cast<index_dense_t*>(index), key, count, vectors, to_native_scalar(kind));
 }
 
-USEARCH_EXPORT bool usearch_remove(usearch_index_t index, usearch_key_t key, usearch_error_t* error) {
+USEARCH_EXPORT size_t usearch_remove(usearch_index_t index, usearch_key_t key, usearch_error_t* error) {
 
     assert(index && error);
     labeling_result_t result = reinterpret_cast<index_dense_t*>(index)->remove(key);
@@ -199,8 +205,8 @@ USEARCH_EXPORT bool usearch_remove(usearch_index_t index, usearch_key_t key, use
     return result.completed;
 }
 
-USEARCH_EXPORT bool usearch_rename(usearch_index_t index, usearch_key_t from, usearch_key_t to,
-                                   usearch_error_t* error) {
+USEARCH_EXPORT size_t usearch_rename(usearch_index_t index, usearch_key_t from, usearch_key_t to,
+                                     usearch_error_t* error) {
 
     assert(index && error);
     labeling_result_t result = reinterpret_cast<index_dense_t*>(index)->rename(from, to);
diff --git a/c/usearch.h b/c/usearch.h
index 1498a9ac..f43bcf23 100644
--- a/c/usearch.h
+++ b/c/usearch.h
@@ -83,6 +83,10 @@ USEARCH_EXPORT typedef struct usearch_init_options_t {
      *  @brief The @b optional expansion factor used for index construction during search operations.
      */
     size_t expansion_search;
+    /**
+     *  @brief When set allows multiple vectors to map to the same key.
+     */
+    bool multi;
 } usearch_init_options_t;
 
 /**
@@ -151,48 +155,58 @@ USEARCH_EXPORT void usearch_add(        //
  */
 USEARCH_EXPORT bool usearch_contains(usearch_index_t, usearch_key_t, usearch_error_t* error);
 
+/**
+ *  @brief Counts the number of entries in the index under a specific key.
+ *  @param[in] key The key to be checked.
+ *  @param[out] error Pointer to a string where the error message will be stored, if an error occurs.
+ *  @return Number of vectors found under that key.
+ */
+USEARCH_EXPORT size_t usearch_count(usearch_index_t, usearch_key_t, usearch_error_t* error);
+
 /**
  *  @brief Performs k-Approximate Nearest Neighbors (kANN) Search for closest vectors to query.
  *  @param[in] query_vector Pointer to the query vector data.
  *  @param[in] query_kind The scalar type used in the query vector data.
- *  @param[in] results_limit Upper bound on the number of neighbors to search, the "k" in "kANN".
- *  @param[out] found_keys Output buffer for up to `results_limit` nearest neighbors keys.
- *  @param[out] found_distances Output buffer for up to `results_limit` distances to nearest neighbors.
+ *  @param[in] count Upper bound on the number of neighbors to search, the "k" in "kANN".
+ *  @param[out] keys Output buffer for up to `count` nearest neighbors keys.
+ *  @param[out] distances Output buffer for up to `count` distances to nearest neighbors.
  *  @param[out] error Pointer to a string where the error message will be stored, if an error occurs.
  *  @return Number of found matches.
  */
-USEARCH_EXPORT size_t usearch_search(                                                                  //
-    usearch_index_t, void const* query_vector, usearch_scalar_kind_t query_kind, size_t results_limit, //
-    usearch_key_t* found_keys, usearch_distance_t* found_distances, usearch_error_t* error);
+USEARCH_EXPORT size_t usearch_search(                           //
+    usearch_index_t,                                            //
+    void const* query_vector, usearch_scalar_kind_t query_kind, //
+    size_t count, usearch_key_t* keys, usearch_distance_t* distances, usearch_error_t* error);
 
 /**
  *  @brief Retrieves the vector associated with the given key from the index.
  *  @param[in] key The key of the vector to retrieve.
  *  @param[out] vector Pointer to the memory where the vector data will be copied.
+ *  @param[in] count Number of vectors that can be fitted into `vector` for multi-vector entries.
  *  @param[in] vector_kind The scalar type used in the vector data.
  *  @param[out] error Pointer to a string where the error message will be stored, if an error occurs.
- *  @return `true` if the vector is successfully retrieved, `false` if the vector is not found.
+ *  @return Number of vectors found under that name and exported to `vector`.
  */
-USEARCH_EXPORT bool usearch_get(        //
-    usearch_index_t, usearch_key_t key, //
+USEARCH_EXPORT size_t usearch_get(                    //
+    usearch_index_t, usearch_key_t key, size_t count, //
     void* vector, usearch_scalar_kind_t vector_kind, usearch_error_t* error);
 
 /**
  *  @brief Removes the vector associated with the given key from the index.
  *  @param[in] key The key of the vector to be removed.
  *  @param[out] error Pointer to a string where the error message will be stored, if an error occurs.
- *  @return `true` if the vector is successfully removed, `false` if the vector is not found.
+ *  @return Number of vectors found under that name and dropped from the index.
  */
-USEARCH_EXPORT bool usearch_remove(usearch_index_t, usearch_key_t key, usearch_error_t* error);
+USEARCH_EXPORT size_t usearch_remove(usearch_index_t, usearch_key_t key, usearch_error_t* error);
 
 /**
  *  @brief Renames the vector to map to a different key.
  *  @param[in] from The key of the vector to be renamed.
  *  @param[in] to New key for found entry.
  *  @param[out] error Pointer to a string where the error message will be stored, if an error occurs.
- *  @return `true` if the vector is successfully removed, `false` if the vector is not found.
+ *  @return Number of vectors found under that name and renamed.
  */
-USEARCH_EXPORT bool usearch_rename(usearch_index_t, usearch_key_t from, usearch_key_t to, usearch_error_t* error);
+USEARCH_EXPORT size_t usearch_rename(usearch_index_t, usearch_key_t from, usearch_key_t to, usearch_error_t* error);
 
 #ifdef __cplusplus
 }
diff --git a/docs/compilation.md b/docs/compilation.md
index 5922eda0..9bef42be 100644
--- a/docs/compilation.md
+++ b/docs/compilation.md
@@ -155,7 +155,7 @@ There are a few ways to compile the C 99 USearch SDK.
 Using the Makefile:
 
 ```sh
-make -C ./c make -C ./c libusearch_c.so
+make -C ./c libusearch_c.so -C ./c libusearch_c.so
 ```
 
 Using CMake:
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index 0eed64fb..a7909bb5 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -195,7 +195,7 @@ def test_index_contains_remove_rename(batch_size):
 @pytest.mark.parametrize("batch_size", [100, 1024])
 @pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
-def test_index_vectors_clustering(ndim, metric, quantization, dtype, batch_size):
+def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
     index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
     keys = np.arange(batch_size)
     vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
@@ -204,18 +204,10 @@ def test_index_vectors_clustering(ndim, metric, quantization, dtype, batch_size)
     clusters: BatchMatches = index.cluster(vectors=vectors, threads=threads)
     assert len(clusters.keys) == batch_size
 
-
-@pytest.mark.parametrize("ndim", [3, 97, 256])
-@pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
-@pytest.mark.parametrize("batch_size", [100, 1024])
-@pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
-@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
-def test_index_members_clustering(ndim, metric, quantization, dtype, batch_size):
-    index = Index(ndim=ndim, metric=metric, dtype=quantization, multi=False)
-    keys = np.arange(batch_size)
-    vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
-    index.add(keys, vectors, threads=threads)
-
     # If no argument is provided, we cluster the present entries
     clusters: BatchMatches = index.cluster(threads=threads)
     assert len(clusters.keys) == batch_size
+
+    # If no argument is provided, we cluster the present entries
+    clusters: BatchMatches = index.cluster(keys=keys[:50], threads=threads)
+    assert len(clusters.keys) == 50

From d1fd90a062025d018dbfc1a8fd95b70d863d666a Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 20 Aug 2023 21:18:32 +0400
Subject: [PATCH 37/70] Refactor: Placeholder for #206

---
 include/usearch/index.hpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5db0e878..b93053d0 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1044,7 +1044,7 @@ using return_type_gt =
  *  @brief  An example of what a USearch-compatible ad-hoc filter would look like.
  *
  *  A similar function object can be passed to search queries to further filter entries
- *  on their auxiliary properties, such as some categorical labels stored in an external DBMS.
+ *  on their auxiliary properties, such as some categorical keys stored in an external DBMS.
  */
 struct dummy_predicate_t {
     template <typename member_at> constexpr bool operator()(member_at&&) const noexcept { return true; }
@@ -1061,6 +1061,12 @@ struct dummy_callback_t {
     template <typename member_at> void operator()(member_at&&) const noexcept {}
 };
 
+struct progress_t {
+    std::size_t thread;
+    std::size_t task;
+    std::size_t total;
+};
+
 /**
  *  @brief  An example of what a USearch-compatible progress-bar should look like.
  *
@@ -1069,6 +1075,7 @@ struct dummy_callback_t {
  */
 struct dummy_progress_t {
     inline void operator()(std::size_t /*progress*/, std::size_t /*total*/) const noexcept {}
+    inline bool stop(progress_t) const noexcept { return false; }
 };
 
 /**

From 4d2fccd65f134333dbae90e020da34a0192812ee Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 20 Aug 2023 21:19:03 +0400
Subject: [PATCH 38/70] Refactor: Black formatting

---
 docs/conf.py | 53 ++++++++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index e4645776..277827f8 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -6,45 +6,50 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = 'Unum · USearch'
-copyright = '2023, Unum'
-author = 'Unum'
-release = open('../VERSION', 'r').read().strip()
-with open('_static/custom.js', 'r+') as js:
+project = "Unum · USearch"
+copyright = "2023, Unum"
+author = "Unum"
+release = open("../VERSION", "r").read().strip()
+with open("_static/custom.js", "r+") as js:
     content = js.read()
     js.seek(0)
     js.truncate()
-    js.write(content.replace('$(VERSION)', release))
+    js.write(content.replace("$(VERSION)", release))
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
 extensions = [
-    'breathe', 'm2r2',
-    'sphinx.ext.autodoc', 'sphinx_js',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.napoleon',
-    'sphinxcontrib.jquery',
-    'sphinxcontrib.googleanalytics']
+    "breathe",
+    "m2r2",
+    "sphinx.ext.autodoc",
+    "sphinx_js",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
+    "sphinxcontrib.jquery",
+    "sphinxcontrib.googleanalytics",
+]
 
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '*.md']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "*.md"]
 
 
-googleanalytics_id = '341385789'
+googleanalytics_id = "341385789"
 googleanalytics_enabled = True
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_logo = '../assets/unum.png'
-html_theme = 'furo'
-html_static_path = ['_static']
-html_css_files = ['custom.css']
-html_js_files = ['custom.js']
-html_baseurl = '/docs/usearch/'
+html_logo = "../assets/unum.png"
+html_theme = "furo"
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_js_files = ["custom.js"]
+html_baseurl = "/docs/usearch/"
 
-breathe_projects = {'USearch': '../build/xml'}
-breathe_default_project = 'USearch'
+breathe_projects = {"USearch": "../build/xml"}
+breathe_default_project = "USearch"
 
-js_source_path = '../javascript/usearch.js'
+js_language = "typescript"
+jsdoc_config_path = "../tsconfig.json"
+js_source_path = "../javascript/usearch.js"

From f6f12fe9b5f816f05086406e3848b790b5c4eab3 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 21 Aug 2023 01:36:26 +0400
Subject: [PATCH 39/70] Add: `pairwise_distance` and clustering fixes

---
 include/usearch/index.hpp       |   3 +-
 include/usearch/index_dense.hpp | 122 +++++++++++++++++++-------------
 python/lib.cpp                  |  44 +++++++++---
 python/scripts/test_index.py    |   7 +-
 python/usearch/index.py         |  49 +++++++++++--
 5 files changed, 160 insertions(+), 65 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index b93053d0..a96bdbe1 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -2400,7 +2400,8 @@ class index_gt {
         if (!next.reserve(expansion))
             return result.failed("Out of memory!");
 
-        result.cluster.member = at(search_for_one_(query, metric, prefetch, entry_slot_, max_level_, level, context));
+        result.cluster.member =
+            at(search_for_one_(query, metric, prefetch, entry_slot_, max_level_, level - 1, context));
         result.cluster.distance = context.measure(query, result.cluster.member, metric);
 
         // Normalize stats
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index f4a15e92..1b778e38 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -502,6 +502,7 @@ class index_dense_gt {
     stats_t stats(std::size_t level) const { return typed_->stats(level); }
 
     dynamic_allocator_t const& allocator() const { return typed_->dynamic_allocator(); }
+    key_t const& free_key() const { return free_key_; }
 
     /**
      *  @brief  A relatively accurate lower bound on the amount of memory consumed by the system.
@@ -571,7 +572,7 @@ class index_dense_gt {
         bool a_missing = a_range.first == a_range.second;
         bool b_missing = b_range.first == b_range.second;
         aggregated_distances_t result;
-        if (!a_missing || !b_missing)
+        if (a_missing || b_missing)
             return result;
 
         result.min = std::numeric_limits<distance_t>::max();
@@ -1633,6 +1634,7 @@ static clustering_result_t cluster(               //
     queries_iterator_at queries_begin,            //
     queries_iterator_at queries_end,              //
     //
+    std::size_t min_clusters,               //
     std::size_t max_clusters,               //
     key_at* cluster_keys,                   //
     distance_punned_t* cluster_distances,   //
@@ -1647,35 +1649,40 @@ static clustering_result_t cluster(               //
 
     // Skip the first few top level, assuming they can't even potentially have enough clusters
     std::size_t level = index.max_level();
-    if (max_clusters)
+    if (min_clusters)
         for (; level > 1; --level) {
-            if (index.stats(level).nodes < max_clusters)
+            if (index.stats(level).nodes < min_clusters)
                 break;
         }
     else
-        max_clusters = index.stats(1).nodes, level = 1;
+        level = 1, max_clusters = index.stats(1).nodes, min_clusters = 2;
 
     clustering_result_t result;
-    if (index.max_level() < 1)
+    if (index.max_level() < 2)
         return result.failed("Index too small to cluster!");
 
     // A structure used to track the popularity of a specific cluster
     struct cluster_t {
-        key_t key = 0;
-        union {
-            std::size_t popularity = 0;
-            key_t replacement;
-        };
+        key_t centroid;
+        key_t merged_into;
+        std::size_t popularity;
     };
 
-    auto smaller_key = [](cluster_t const& a, cluster_t const& b) { return a.key < b.key; };
+    auto smaller_key = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
     auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
 
     std::atomic<std::size_t> visited_members(0);
     std::atomic<std::size_t> computed_distances(0);
     std::atomic<char const*> atomic_error{nullptr};
 
-repeat_clustering:
+    using dynamic_allocator_t = typename index_t::dynamic_allocator_t;
+    using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
+    using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
+    buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
+    if (!clusters)
+        return result.failed("Out of memory!");
+
+map_to_clusters:
     // Concurrently perform search until a certain depth
     executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
         auto result = index.cluster(queries_begin[query_idx], level, thread_idx);
@@ -1697,15 +1704,10 @@ static clustering_result_t cluster(               //
 
     // Now once we have identified the closest clusters,
     // we can try reducing their quantity, refining
-    using dynamic_allocator_t = typename index_t::dynamic_allocator_t;
-    using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
-    using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
-    buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
-    if (!clusters)
-        return result.failed("Out of memory!");
-
     for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx)
-        clusters[query_idx].key = cluster_keys[query_idx], clusters[query_idx].popularity = 1;
+        clusters[query_idx].centroid = cluster_keys[query_idx], //
+            clusters[query_idx].merged_into = index.free_key(), //
+            clusters[query_idx].popularity = 1;
 
     // Sort by cluster key
     std::sort(clusters.begin(), clusters.end(), smaller_key);
@@ -1715,7 +1717,7 @@ static clustering_result_t cluster(               //
     {
         std::size_t last_idx = 0;
         for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
-            if (clusters[last_idx].key == clusters[current_idx].key) {
+            if (clusters[last_idx].centroid == clusters[current_idx].centroid) {
                 clusters[last_idx].popularity++;
             } else {
                 last_idx++;
@@ -1727,46 +1729,70 @@ static clustering_result_t cluster(               //
 
     // In some cases the queries may be co-located, all mapping into the same cluster on that
     // level. In that case we refine the granularity and dive deeper into clusters:
-    if (unique_clusters < max_clusters && level > 1) {
+    if (unique_clusters < min_clusters && level > 1) {
         level--;
-        goto repeat_clustering;
+        goto map_to_clusters;
     }
 
-    // Drop smaller clusters iteratively merging those into the closest ones
-    if (max_clusters < unique_clusters) {
-        std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity);
-
-        // Instead of doing it at once, use the `cluster_t::replacement` property to plan future re-mapping
-        for (std::size_t cluster_idx = max_clusters; cluster_idx < unique_clusters; ++cluster_idx) {
-            key_t dropped_cluster_key = clusters[cluster_idx].key;
-            key_t target_key = dropped_cluster_key;
-            distance_t target_distance = std::numeric_limits<distance_t>::max();
-            for (std::size_t candidate_idx = 0; candidate_idx != max_clusters; ++candidate_idx) {
-                key_t cluster_key = clusters[candidate_idx].key;
-                distance_t cluster_distance = index.distance_between(dropped_cluster_key, cluster_key).mean;
-                if (cluster_distance <= target_distance)
-                    target_key = cluster_key, target_distance = cluster_distance;
+    // If clusters are too numerous, merge the ones that are too close to each other.
+    std::size_t merge_cycles = 0;
+merge_nearby_clusters:
+    if (unique_clusters > max_clusters) {
+
+        struct cluster_merge_t {
+            std::size_t from_idx = 0;
+            std::size_t to_idx = 0;
+        };
+
+        cluster_merge_t merge;
+        distance_t merge_distance = std::numeric_limits<distance_t>::max();
+        for (std::size_t first_idx = 0; first_idx != unique_clusters; ++first_idx) {
+            key_t first_key = clusters[first_idx].centroid;
+            for (std::size_t second_idx = 0; second_idx != first_idx; ++second_idx) {
+                key_t second_key = clusters[second_idx].centroid;
+                distance_t distance = index.distance_between(first_key, second_key, 0).mean;
+                if (distance < merge_distance)
+                    merge = {first_idx, second_idx}, merge_distance = distance;
             }
-            clusters[cluster_idx].replacement = target_key;
         }
 
+        if (clusters[merge.from_idx].popularity > clusters[merge.to_idx].popularity)
+            std::swap(merge.from_idx, merge.to_idx);
+
+        clusters[merge.from_idx].merged_into = clusters[merge.to_idx].centroid;
+        clusters[merge.to_idx].popularity += exchange(clusters[merge.from_idx].popularity, 0);
+
+        // Move the merged entry to the end
+        // std::partition(clusters.data(), clusters.data() + unique_clusters,
+        //                [&](cluster_t const& c) { return c.merged_into == index.free_key(); });
+        if (merge.from_idx != (unique_clusters - 1))
+            std::swap(clusters[merge.from_idx], clusters[unique_clusters - 1]);
+        unique_clusters--;
+        merge_cycles++;
+        goto merge_nearby_clusters;
+    }
+
+    // Replace evicted clusters
+    if (merge_cycles) {
         // Sort dropped clusters by name to accelerate future lookups
-        std::sort(clusters.data() + max_clusters, clusters.data() + unique_clusters, smaller_key);
+        auto clusters_end = clusters.data() + max_clusters + merge_cycles;
+        std::sort(clusters.data(), clusters_end, smaller_key);
 
-        // Replace evicted clusters
         for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx) {
             key_t& cluster_key = cluster_keys[query_idx];
             distance_t& cluster_distance = cluster_distances[query_idx];
 
-            // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
-            cluster_t cluster_key_wrapped;
-            cluster_key_wrapped.key = cluster_key;
-            auto displaced_range = std::equal_range(clusters.data() + max_clusters, clusters.data() + unique_clusters,
-                                                    cluster_key_wrapped, smaller_key);
-            if (displaced_range.first == displaced_range.second)
-                continue;
+            // Recursively trace replacements of that cluster
+            while (true) {
+                // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
+                cluster_t updated_cluster;
+                updated_cluster.centroid = cluster_key;
+                updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, smaller_key);
+                if (updated_cluster.merged_into == index.free_key())
+                    break;
+                cluster_key = updated_cluster.merged_into;
+            }
 
-            cluster_key = displaced_range.first->replacement;
             cluster_distance = index.distance_between(cluster_key, queries_begin[query_idx], 0).mean;
         }
     }
diff --git a/python/lib.cpp b/python/lib.cpp
index 73715f0d..a0723ebf 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -599,7 +599,9 @@ template <typename scalar_at> struct rows_lookup_gt {
  *      4. number of computed pairwise distances.
  */
 template <typename index_at>
-static py::tuple cluster_vectors(index_at& index, py::buffer queries, std::size_t count, std::size_t threads) {
+static py::tuple cluster_vectors(        //
+    index_at& index, py::buffer queries, //
+    std::size_t min_count, std::size_t max_count, std::size_t threads) {
 
     if (index.limits().threads_search < threads)
         throw std::invalid_argument("Can't use that many threads!");
@@ -629,11 +631,11 @@ static py::tuple cluster_vectors(index_at& index, py::buffer queries, std::size_
 
     // clang-format off
     switch (numpy_string_to_kind(queries_info.format)) {
-    case scalar_kind_t::b1x8_k: cluster_result = cluster(index, queries_begin.as<b1x8_t const>(), queries_end.as<b1x8_t const>(), count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::i8_k: cluster_result = cluster(index, queries_begin.as<i8_bits_t const>(), queries_end.as<i8_bits_t const>(), count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::f16_k: cluster_result = cluster(index, queries_begin.as<f16_t const>(), queries_end.as<f16_t const>(), count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::f32_k: cluster_result = cluster(index, queries_begin.as<f32_t const>(), queries_end.as<f32_t const>(), count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::f64_k: cluster_result = cluster(index, queries_begin.as<f64_t const>(), queries_end.as<f64_t const>(), count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::b1x8_k: cluster_result = cluster(index, queries_begin.as<b1x8_t const>(), queries_end.as<b1x8_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::i8_k: cluster_result = cluster(index, queries_begin.as<i8_bits_t const>(), queries_end.as<i8_bits_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f16_k: cluster_result = cluster(index, queries_begin.as<f16_t const>(), queries_end.as<f16_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f32_k: cluster_result = cluster(index, queries_begin.as<f32_t const>(), queries_end.as<f32_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f64_k: cluster_result = cluster(index, queries_begin.as<f64_t const>(), queries_end.as<f64_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
     default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + queries_info.format);
     }
     // clang-format on
@@ -667,7 +669,9 @@ static py::tuple cluster_vectors(index_at& index, py::buffer queries, std::size_
  *      4. number of computed pairwise distances.
  */
 template <typename index_at>
-static py::tuple cluster_keys(index_at& index, py::array_t<key_t> queries_py, std::size_t count, std::size_t threads) {
+static py::tuple cluster_keys(                      //
+    index_at& index, py::array_t<key_t> queries_py, //
+    std::size_t min_count, std::size_t max_count, std::size_t threads) {
 
     if (index.limits().threads_search < threads)
         throw std::invalid_argument("Can't use that many threads!");
@@ -687,7 +691,7 @@ static py::tuple cluster_keys(index_at& index, py::array_t<key_t> queries_py, st
     distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
     clustering_result_t cluster_result =
-        cluster(index, queries_begin, queries_end, count, keys_ptr, distances_ptr, executor);
+        cluster(index, queries_begin, queries_end, min_count, max_count, keys_ptr, distances_ptr, executor);
     cluster_result.error.raise();
 
     // Those would be set to 1 for all entries, in case of success
@@ -923,14 +927,16 @@ PYBIND11_MODULE(compiled, m) {
     i.def(                                                     //
         "cluster_vectors", &cluster_vectors<dense_index_py_t>, //
         py::arg("queries"),                                    //
-        py::arg("count") = 0,                                  //
+        py::arg("min_count") = 0,                              //
+        py::arg("max_count") = 0,                              //
         py::arg("threads") = 0                                 //
     );
 
     i.def(                                               //
         "cluster_keys", &cluster_keys<dense_index_py_t>, //
         py::arg("queries"),                              //
-        py::arg("count") = 0,                            //
+        py::arg("min_count") = 0,                        //
+        py::arg("max_count") = 0,                        //
         py::arg("threads") = 0                           //
     );
 
@@ -1066,6 +1072,24 @@ PYBIND11_MODULE(compiled, m) {
             return results_py;
         });
 
+    i.def( //
+        "pairwise_distances",
+        [](dense_index_py_t const& index, py::array_t<key_t> const& left_py,
+           py::array_t<key_t> const& right_py) -> py::array_t<distance_t> {
+            py::array_t<distance_t> results_py(left_py.size());
+            auto results_py1d = results_py.template mutable_unchecked<1>();
+            auto left_py1d = left_py.template unchecked<1>();
+            auto right_py1d = right_py.template unchecked<1>();
+            for (Py_ssize_t task_idx = 0; task_idx != left_py.size(); ++task_idx)
+                results_py1d(task_idx) = index.distance_between(left_py1d(task_idx), right_py1d(task_idx)).min;
+            return results_py;
+        });
+
+    i.def( //
+        "pairwise_distance", [](dense_index_py_t const& index, key_t left, key_t right) -> distance_t {
+            return index.distance_between(left, right).min;
+        });
+
     i.def("get_many", &get_many<dense_index_py_t>, py::arg("keys"), py::arg("dtype") = scalar_kind_t::f32_k);
 
     i.def(
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index a7909bb5..ce8da992 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -192,7 +192,7 @@ def test_index_contains_remove_rename(batch_size):
 
 @pytest.mark.parametrize("ndim", [3, 97, 256])
 @pytest.mark.parametrize("metric", [MetricKind.Cos, MetricKind.L2sq])
-@pytest.mark.parametrize("batch_size", [100, 1024])
+@pytest.mark.parametrize("batch_size", [500, 1024])
 @pytest.mark.parametrize("quantization", [ScalarKind.F32, ScalarKind.I8])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
 def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
@@ -211,3 +211,8 @@ def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
     # If no argument is provided, we cluster the present entries
     clusters: BatchMatches = index.cluster(keys=keys[:50], threads=threads)
     assert len(clusters.keys) == 50
+
+    # If no argument is provided, we cluster the present entries
+    clusters: BatchMatches = index.cluster(min_count=3, max_count=10, threads=threads)
+    unique_clusters = set(clusters.keys.flatten().tolist())
+    assert len(unique_clusters) >= 3 and len(unique_clusters) <= 10
diff --git a/python/usearch/index.py b/python/usearch/index.py
index 800b527d..75748790 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -351,6 +351,22 @@ def __repr__(self) -> str:
         return f"usearch.BatchMatches({np.sum(self.counts)} across {len(self)} queries)"
 
 
+@dataclass
+class Clustering:
+    def __init__(
+        self,
+        index: Index,
+        queries: np.ndarray,
+        matches: BatchMatches,
+    ) -> None:
+        self.index = index
+        self.queries = queries
+        self.matches = matches
+
+    def __repr__(self) -> str:
+        return f"usearch.Clustering(for {len(self.queries)} queries)"
+
+
 class IndexedKeys:
     """Smart-reference for the range of keys present in a specific `Index`"""
 
@@ -910,7 +926,8 @@ def cluster(
         *,
         vectors: Optional[np.ndarray] = None,
         keys: Optional[np.ndarray] = None,
-        count: Optional[int] = None,
+        min_count: Optional[int] = None,
+        max_count: Optional[int] = None,
         threads: int = 0,
         log: Union[str, bool] = False,
         batch_size: int = 0,
@@ -932,13 +949,18 @@ def cluster(
         :return: Matches for one or more queries
         :rtype: Union[Matches, BatchMatches]
         """
-        if count is None:
-            count = 0
+        if min_count is None:
+            min_count = 0
+        if max_count is None:
+            max_count = 0
 
         if vectors is not None:
             assert keys is None, "You can either cluster vectors or member keys"
             results = self._compiled.cluster_vectors(
-                vectors, count=count, threads=threads
+                vectors,
+                min_count=min_count,
+                max_count=max_count,
+                threads=threads,
             )
         else:
             if keys is None:
@@ -946,10 +968,27 @@ def cluster(
             if not isinstance(keys, np.ndarray):
                 keys = np.array(keys)
             keys = keys.astype(Key)
-            results = self._compiled.cluster_keys(keys, count=count, threads=threads)
+            results = self._compiled.cluster_keys(
+                keys,
+                min_count=min_count,
+                max_count=max_count,
+                threads=threads,
+            )
 
         return BatchMatches(*results)
 
+    def pairwise_distance(
+        self, left: KeyOrKeysLike, right: KeyOrKeysLike
+    ) -> Union[np.ndarray, float]:
+        assert isinstance(left, Iterable) == isinstance(right, Iterable)
+
+        if not isinstance(left, Iterable):
+            return self._compiled.pairwise_distance(int(left), int(right))
+        else:
+            left = np.array(left).astype(Key)
+            right = np.array(right).astype(Key)
+            return self._compiled.pairwise_distances(left, right)
+
     @property
     def keys(self) -> IndexedKeys:
         return IndexedKeys(self)

From a41597138d11bdae2686663c5df67dadac45fe6b Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 21 Aug 2023 14:34:24 +0400
Subject: [PATCH 40/70] Improve: Parallel cluster refinement

---
 include/usearch/index_dense.hpp   | 36 ++++++++++++++++++-------------
 include/usearch/index_plugins.hpp | 12 +++++------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 1b778e38..861bfb01 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1677,10 +1677,6 @@ static clustering_result_t cluster(               //
 
     using dynamic_allocator_t = typename index_t::dynamic_allocator_t;
     using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
-    using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
-    buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
-    if (!clusters)
-        return result.failed("Out of memory!");
 
 map_to_clusters:
     // Concurrently perform search until a certain depth
@@ -1702,6 +1698,11 @@ static clustering_result_t cluster(               //
     if (atomic_error)
         return result.failed(atomic_error.load());
 
+    using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
+    buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
+    if (!clusters)
+        return result.failed("Out of memory!");
+
     // Now once we have identified the closest clusters,
     // we can try reducing their quantity, refining
     for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx)
@@ -1712,7 +1713,7 @@ static clustering_result_t cluster(               //
     // Sort by cluster key
     std::sort(clusters.begin(), clusters.end(), smaller_key);
 
-    // Transform into run-length encoding, cmoputing the number of unique clusters
+    // Transform into run-length encoding, computing the number of unique clusters
     std::size_t unique_clusters = 0;
     {
         std::size_t last_idx = 0;
@@ -1744,18 +1745,23 @@ static clustering_result_t cluster(               //
             std::size_t to_idx = 0;
         };
 
-        cluster_merge_t merge;
-        distance_t merge_distance = std::numeric_limits<distance_t>::max();
-        for (std::size_t first_idx = 0; first_idx != unique_clusters; ++first_idx) {
+        std::atomic<cluster_merge_t> atomic_merge;
+        std::atomic<distance_t> atomic_merge_distance = std::numeric_limits<distance_t>::max();
+
+        executor.dynamic(unique_clusters * unique_clusters, [&](std::size_t thread_idx, std::size_t task_idx) {
+            std::size_t first_idx = task_idx / unique_clusters;
+            std::size_t second_idx = task_idx % unique_clusters;
+            if (first_idx == second_idx)
+                return true;
             key_t first_key = clusters[first_idx].centroid;
-            for (std::size_t second_idx = 0; second_idx != first_idx; ++second_idx) {
-                key_t second_key = clusters[second_idx].centroid;
-                distance_t distance = index.distance_between(first_key, second_key, 0).mean;
-                if (distance < merge_distance)
-                    merge = {first_idx, second_idx}, merge_distance = distance;
-            }
-        }
+            key_t second_key = clusters[second_idx].centroid;
+            distance_t distance = index.distance_between(first_key, second_key, thread_idx).mean;
+            if (distance < atomic_merge_distance)
+                atomic_merge_distance = distance, atomic_merge = {first_idx, second_idx};
+            return true;
+        });
 
+        cluster_merge_t merge = atomic_merge.load();
         if (clusters[merge.from_idx].popularity > clusters[merge.to_idx].popularity)
             std::swap(merge.from_idx, merge.to_idx);
 
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 89c0317b..0cecafd3 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -597,13 +597,13 @@ class executor_openmp_t {
 #pragma omp cancel for
                 }
             }
-        } else {
-            std::atomic_bool stop{false};
+            return;
+        }
+        std::atomic_bool stop{false};
 #pragma omp parallel for schedule(dynamic, 1) shared(stop)
-            for (std::size_t i = 0; i != tasks; ++i) {
-                if (!stop.load(std::memory_order_relaxed) && !thread_aware_function(omp_get_thread_num(), i))
-                    stop.store(true, std::memory_order_relaxed);
-            }
+        for (std::size_t i = 0; i != tasks; ++i) {
+            if (!stop.load(std::memory_order_relaxed) && !thread_aware_function(omp_get_thread_num(), i))
+                stop.store(true, std::memory_order_relaxed);
         }
     }
 

From f3d56fa67ced3a7b3a6d250b06eab3c1ba0ff2e6 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 21 Aug 2023 17:24:52 +0000
Subject: [PATCH 41/70] Fix: Support platforms without 16-byte atomic store

---
 include/usearch/index_dense.hpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 861bfb01..47ceea17 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1745,7 +1745,8 @@ static clustering_result_t cluster(               //
             std::size_t to_idx = 0;
         };
 
-        std::atomic<cluster_merge_t> atomic_merge;
+        std::mutex merge_mutex;
+        cluster_merge_t merge;
         std::atomic<distance_t> atomic_merge_distance = std::numeric_limits<distance_t>::max();
 
         executor.dynamic(unique_clusters * unique_clusters, [&](std::size_t thread_idx, std::size_t task_idx) {
@@ -1756,12 +1757,14 @@ static clustering_result_t cluster(               //
             key_t first_key = clusters[first_idx].centroid;
             key_t second_key = clusters[second_idx].centroid;
             distance_t distance = index.distance_between(first_key, second_key, thread_idx).mean;
-            if (distance < atomic_merge_distance)
-                atomic_merge_distance = distance, atomic_merge = {first_idx, second_idx};
+            if (distance < atomic_merge_distance) {
+                std::unique_lock<std::mutex> lock(merge_mutex);
+                atomic_merge_distance = distance;
+                merge = {first_idx, second_idx};
+            }
             return true;
         });
 
-        cluster_merge_t merge = atomic_merge.load();
         if (clusters[merge.from_idx].popularity > clusters[merge.to_idx].popularity)
             std::swap(merge.from_idx, merge.to_idx);
 

From d17a8e0f3ae5dd7f7e41ef1ca73ff44bba7a7fb3 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 21 Aug 2023 17:25:06 +0000
Subject: [PATCH 42/70] Docs: Add Arxiv dataset for benchmarks

---
 docs/benchmarks.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 63d5880d..1b9f46ae 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -191,6 +191,14 @@ mkdir -p datasets/deep_1B/ && \
     wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/groundtruth.public.10K.ibin -P datasets/deep_1B/
 ```
 
+### Arxiv with E5
+
+```sh
+mkdir -p datasets/arxiv_2M/ && \
+    wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/abstract.e5-base-v2.fbin -P datasets/arxiv_2M/ &&
+    wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/title.e5-base-v2.fbin -P datasets/arxiv_2M/
+```
+
 ## Profiling
 
 With `perf`:

From da9f3a9558dea684b48ed6990cb539842fc59cec Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 21 Aug 2023 17:26:03 +0000
Subject: [PATCH 43/70] Fix: Multi-vector keys

---
 include/usearch/index_dense.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 47ceea17..8c9a0556 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -342,9 +342,7 @@ class index_dense_gt {
         using is_transparent = void;
         bool operator()(key_and_slot_t const& a, key_t const& b) const noexcept { return a.key == b; }
         bool operator()(key_t const& a, key_and_slot_t const& b) const noexcept { return a == b.key; }
-        bool operator()(key_and_slot_t const& a, key_and_slot_t const& b) const noexcept {
-            return (!a.any_slot() && !b.any_slot()) ? a.key == b.key && a.slot == b.slot : a.key == b.key;
-        }
+        bool operator()(key_and_slot_t const& a, key_and_slot_t const& b) const noexcept { return a.key == b.key; }
     };
 
     /// @brief Multi-Map from keys to IDs, and allocated vectors.
@@ -1519,6 +1517,7 @@ class index_dense_gt {
                 if (!casted)
                     std::memcpy(reconstructed_vector, punned_vector, metric_.bytes_per_vector());
             }
+            std::printf("exported %zu of %zu limit \n", count_exported, vectors_limit);
             return count_exported;
         }
     }

From 7741f561acc2cac9a90cdc14b8cdb1446a697fbc Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 21 Aug 2023 17:26:54 +0000
Subject: [PATCH 44/70] Fix: Default initialization

---
 include/usearch/index.hpp       | 4 +++-
 include/usearch/index_dense.hpp | 1 -
 python/scripts/test_index.py    | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index a96bdbe1..d33e2c48 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1995,6 +1995,8 @@ class index_gt {
         member_cref_t member;
         distance_t distance;
 
+        inline match_t() noexcept : member({nullptr, 0}), distance(std::numeric_limits<distance_t>::max()) {}
+
         inline match_t(member_cref_t member, distance_t distance) noexcept : member(member), distance(distance) {}
 
         inline match_t(match_t&& other) noexcept
@@ -2105,7 +2107,7 @@ class index_gt {
         error_t error{};
         std::size_t visited_members{};
         std::size_t computed_distances{};
-        match_t cluster{{nullptr}, std::numeric_limits<distance_t>::max()};
+        match_t cluster{};
 
         explicit operator bool() const noexcept { return !error; }
         cluster_result_t failed(error_t message) noexcept {
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 8c9a0556..1392c356 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1517,7 +1517,6 @@ class index_dense_gt {
                 if (!casted)
                     std::memcpy(reconstructed_vector, punned_vector, metric_.bytes_per_vector());
             }
-            std::printf("exported %zu of %zu limit \n", count_exported, vectors_limit);
             return count_exported;
         }
     }
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index ce8da992..4942afd1 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -9,7 +9,6 @@
 
 from usearch.index import (
     Index,
-    Indexes,
     MetricKind,
     ScalarKind,
     Match,

From d678810333d52329e6d8da1c49e82a9abb2759f0 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 07:50:42 +0000
Subject: [PATCH 45/70] Improve: `clustering` API

---
 include/usearch/index_dense.hpp   | 440 +++++++++++++++---------------
 include/usearch/index_plugins.hpp |  25 +-
 python/lib.cpp                    |  25 +-
 3 files changed, 253 insertions(+), 237 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 1392c356..86dde0c4 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -110,6 +110,15 @@ struct index_dense_config_t : public index_config_t {
           expansion_search(expansion_search ? expansion_search : default_expansion_search()) {}
 };
 
+struct index_dense_clustering_config_t {
+    std::size_t min_clusters = 0;
+    std::size_t max_clusters = 0;
+    enum mode_t {
+        merge_smallest_k,
+        merge_closest_k,
+    } mode = merge_smallest_k;
+};
+
 struct index_dense_serialization_config_t {
     bool exclude_vectors = false;
     bool use_64_bit_dimensions = false;
@@ -565,11 +574,30 @@ class index_dense_gt {
      */
     aggregated_distances_t distance_between(key_t a, key_t b, std::size_t = any_thread()) const {
         shared_lock_t lock(slot_lookup_mutex_);
+        aggregated_distances_t result;
+        if (!multi()) {
+            auto a_it = slot_lookup_.find(key_and_slot_t::any_slot(a));
+            auto b_it = slot_lookup_.find(key_and_slot_t::any_slot(b));
+            bool a_missing = a_it == slot_lookup_.end();
+            bool b_missing = b_it == slot_lookup_.end();
+            if (a_missing || b_missing)
+                return result;
+
+            key_and_slot_t a_key_and_slot = *a_it;
+            byte_t const* a_vector = vectors_lookup_[a_key_and_slot.slot];
+            key_and_slot_t b_key_and_slot = *b_it;
+            byte_t const* b_vector = vectors_lookup_[b_key_and_slot.slot];
+            distance_t a_b_distance = metric_(a_vector, b_vector);
+
+            result.mean = result.min = result.max = a_b_distance;
+            result.count = 1;
+            return result;
+        }
+
         auto a_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(a));
         auto b_range = slot_lookup_.equal_range(key_and_slot_t::any_slot(b));
         bool a_missing = a_range.first == a_range.second;
         bool b_missing = b_range.first == b_range.second;
-        aggregated_distances_t result;
         if (a_missing || b_missing)
             return result;
 
@@ -1282,6 +1310,201 @@ class index_dense_gt {
             std::forward<progress_at>(progress));
     }
 
+    struct clustering_result_t {
+        error_t error{};
+        std::size_t clusters{};
+        std::size_t visited_members{};
+        std::size_t computed_distances{};
+
+        explicit operator bool() const noexcept { return !error; }
+        clustering_result_t failed(error_t message) noexcept {
+            error = std::move(message);
+            return std::move(*this);
+        }
+    };
+
+    /**
+     *  @brief  Implements clustering, classifying the given objects (vectors of member keys)
+     *          into a given number of clusters.
+     *
+     *  @param[in] queries_begin Iterator targeting the fiest query.
+     *  @param[in] queries_end
+     *  @param[in] executor Thread-pool to execute the job in parallel.
+     *  @param[in] progress Callback to report the execution progress.
+     */
+    template <                                   //
+        typename queries_iterator_at,            //
+        typename executor_at = dummy_executor_t, //
+        typename progress_at = dummy_progress_t  //
+        >
+    clustering_result_t cluster(                //
+        queries_iterator_at queries_begin,      //
+        queries_iterator_at queries_end,        //
+        index_dense_clustering_config_t config, //
+        key_t* cluster_keys,                    //
+        distance_t* cluster_distances,          //
+        executor_at&& executor = executor_at{}, //
+        progress_at&& progress = progress_at{}) {
+
+        std::size_t const queries_count = queries_end - queries_begin;
+
+        // Skip the first few top level, assuming they can't even potentially have enough clusters
+        std::size_t level = max_level();
+        if (config.min_clusters)
+            for (; level > 1; --level) {
+                if (stats(level).nodes < config.min_clusters)
+                    break;
+            }
+        else
+            level = 1, config.max_clusters = stats(1).nodes, config.min_clusters = 2;
+
+        clustering_result_t result;
+        if (max_level() < 2)
+            return result.failed("Index too small to cluster!");
+
+        // A structure used to track the popularity of a specific cluster
+        struct cluster_t {
+            key_t centroid;
+            key_t merged_into;
+            std::size_t popularity;
+            byte_t* vector;
+        };
+
+        auto smaller_key = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
+        auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
+
+        std::atomic<std::size_t> visited_members(0);
+        std::atomic<std::size_t> computed_distances(0);
+        std::atomic<char const*> atomic_error{nullptr};
+
+        using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
+        using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
+        buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
+        if (!clusters)
+            return result.failed("Out of memory!");
+
+    map_to_clusters:
+        // Concurrently perform search until a certain depth
+        executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
+            auto result = cluster(queries_begin[query_idx], level, thread_idx);
+            if (!result) {
+                atomic_error = result.error.release();
+                return false;
+            }
+
+            cluster_keys[query_idx] = result.cluster.member.key;
+            cluster_distances[query_idx] = result.cluster.distance;
+
+            // Export in case we need to refine afterwards
+            clusters[query_idx].centroid = result.cluster.member.key;
+            clusters[query_idx].vector = vectors_lookup_[result.cluster.member.slot];
+            clusters[query_idx].merged_into = free_key();
+            clusters[query_idx].popularity = 1;
+
+            visited_members += result.visited_members;
+            computed_distances += result.computed_distances;
+            return true;
+        });
+
+        if (atomic_error)
+            return result.failed(atomic_error.load());
+
+        // Now once we have identified the closest clusters,
+        // we can try reducing their quantity, refining
+        std::sort(clusters.begin(), clusters.end(), smaller_key);
+
+        // Transform into run-length encoding, computing the number of unique clusters
+        std::size_t unique_clusters = 0;
+        {
+            std::size_t last_idx = 0;
+            for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
+                if (clusters[last_idx].centroid == clusters[current_idx].centroid) {
+                    clusters[last_idx].popularity++;
+                } else {
+                    last_idx++;
+                    clusters[last_idx] = clusters[current_idx];
+                }
+            }
+            unique_clusters = last_idx + 1;
+        }
+
+        // In some cases the queries may be co-located, all mapping into the same cluster on that
+        // level. In that case we refine the granularity and dive deeper into clusters:
+        if (unique_clusters < config.min_clusters && level > 1) {
+            level--;
+            goto map_to_clusters;
+        }
+
+        // If clusters are too numerous, merge the ones that are too close to each other.
+        std::size_t merge_cycles = 0;
+    merge_nearby_clusters:
+        if (unique_clusters > config.max_clusters) {
+
+            struct cluster_merge_t {
+                std::size_t from_idx = 0;
+                std::size_t to_idx = 0;
+            };
+
+            cluster_merge_t merge;
+            distance_t merge_distance = std::numeric_limits<distance_t>::max();
+
+            for (std::size_t first_idx = 0; first_idx != unique_clusters; ++first_idx) {
+                for (std::size_t second_idx = 0; second_idx != first_idx; ++second_idx) {
+                    distance_t distance = metric_(clusters[first_idx].vector, clusters[second_idx].vector);
+                    if (distance < merge_distance) {
+                        merge_distance = distance;
+                        merge = {first_idx, second_idx};
+                    }
+                }
+            }
+
+            if (clusters[merge.from_idx].popularity > clusters[merge.to_idx].popularity)
+                std::swap(merge.from_idx, merge.to_idx);
+
+            clusters[merge.from_idx].merged_into = clusters[merge.to_idx].centroid;
+            clusters[merge.to_idx].popularity += exchange(clusters[merge.from_idx].popularity, 0);
+
+            // Move the merged entry to the end
+            // std::partition(clusters.data(), clusters.data() + unique_clusters,
+            //                [&](cluster_t const& c) { return c.merged_into == free_key(); });
+            if (merge.from_idx != (unique_clusters - 1))
+                std::swap(clusters[merge.from_idx], clusters[unique_clusters - 1]);
+            unique_clusters--;
+            merge_cycles++;
+            goto merge_nearby_clusters;
+        }
+
+        // Replace evicted clusters
+        if (merge_cycles) {
+            // Sort dropped clusters by name to accelerate future lookups
+            auto clusters_end = clusters.data() + config.max_clusters + merge_cycles;
+            std::sort(clusters.data(), clusters_end, smaller_key);
+
+            executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
+                key_t& cluster_key = cluster_keys[query_idx];
+                distance_t& cluster_distance = cluster_distances[query_idx];
+
+                // Recursively trace replacements of that cluster
+                while (true) {
+                    // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
+                    cluster_t updated_cluster;
+                    updated_cluster.centroid = cluster_key;
+                    updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, smaller_key);
+                    if (updated_cluster.merged_into == free_key())
+                        break;
+                    cluster_key = updated_cluster.merged_into;
+                }
+
+                cluster_distance = distance_between(cluster_key, queries_begin[query_idx], thread_idx).mean;
+                return true;
+            });
+        }
+
+        result.computed_distances = computed_distances;
+        result.visited_members = visited_members;
+        return result;
+    }
+
   private:
     struct thread_lock_t {
         index_dense_gt const& parent;
@@ -1594,220 +1817,5 @@ static join_result_t join(                                    //
         std::forward<progress_at>(progress));
 }
 
-struct clustering_result_t {
-    error_t error{};
-    std::size_t clusters{};
-    std::size_t visited_members{};
-    std::size_t computed_distances{};
-
-    explicit operator bool() const noexcept { return !error; }
-    clustering_result_t failed(error_t message) noexcept {
-        error = std::move(message);
-        return std::move(*this);
-    }
-};
-
-struct clustering_config_t {
-    std::size_t target_clusters = 0;
-};
-
-/**
- *  @brief  Implements clustering, classifying the given objects (vectors of member keys)
- *          into a given number of clusters.
- *
- *  @param[in] queries_begin Iterator targeting the fiest query.
- *  @param[in] queries_end
- *  @param[in] executor Thread-pool to execute the job in parallel.
- *  @param[in] progress Callback to report the execution progress.
- */
-template <                                   //
-    typename key_at,                         //
-    typename slot_at,                        //
-    typename queries_iterator_at,            //
-    typename executor_at = dummy_executor_t, //
-    typename progress_at = dummy_progress_t  //
-    >
-static clustering_result_t cluster(               //
-    index_dense_gt<key_at, slot_at> const& index, //
-    queries_iterator_at queries_begin,            //
-    queries_iterator_at queries_end,              //
-    //
-    std::size_t min_clusters,               //
-    std::size_t max_clusters,               //
-    key_at* cluster_keys,                   //
-    distance_punned_t* cluster_distances,   //
-    executor_at&& executor = executor_at{}, //
-    progress_at&& progress = progress_at{}) {
-
-    using index_t = index_dense_gt<key_at, slot_at>;
-    using key_t = typename index_t::key_t;
-    using distance_t = typename index_t::distance_t;
-
-    std::size_t const queries_count = queries_end - queries_begin;
-
-    // Skip the first few top level, assuming they can't even potentially have enough clusters
-    std::size_t level = index.max_level();
-    if (min_clusters)
-        for (; level > 1; --level) {
-            if (index.stats(level).nodes < min_clusters)
-                break;
-        }
-    else
-        level = 1, max_clusters = index.stats(1).nodes, min_clusters = 2;
-
-    clustering_result_t result;
-    if (index.max_level() < 2)
-        return result.failed("Index too small to cluster!");
-
-    // A structure used to track the popularity of a specific cluster
-    struct cluster_t {
-        key_t centroid;
-        key_t merged_into;
-        std::size_t popularity;
-    };
-
-    auto smaller_key = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
-    auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
-
-    std::atomic<std::size_t> visited_members(0);
-    std::atomic<std::size_t> computed_distances(0);
-    std::atomic<char const*> atomic_error{nullptr};
-
-    using dynamic_allocator_t = typename index_t::dynamic_allocator_t;
-    using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>;
-
-map_to_clusters:
-    // Concurrently perform search until a certain depth
-    executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
-        auto result = index.cluster(queries_begin[query_idx], level, thread_idx);
-        if (!result) {
-            atomic_error = result.error.release();
-            return false;
-        }
-
-        cluster_keys[query_idx] = result.cluster.member.key;
-        cluster_distances[query_idx] = result.cluster.distance;
-
-        visited_members += result.visited_members;
-        computed_distances += result.computed_distances;
-        return true;
-    });
-
-    if (atomic_error)
-        return result.failed(atomic_error.load());
-
-    using clusters_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<cluster_t>;
-    buffer_gt<cluster_t, clusters_allocator_t> clusters(queries_count);
-    if (!clusters)
-        return result.failed("Out of memory!");
-
-    // Now once we have identified the closest clusters,
-    // we can try reducing their quantity, refining
-    for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx)
-        clusters[query_idx].centroid = cluster_keys[query_idx], //
-            clusters[query_idx].merged_into = index.free_key(), //
-            clusters[query_idx].popularity = 1;
-
-    // Sort by cluster key
-    std::sort(clusters.begin(), clusters.end(), smaller_key);
-
-    // Transform into run-length encoding, computing the number of unique clusters
-    std::size_t unique_clusters = 0;
-    {
-        std::size_t last_idx = 0;
-        for (std::size_t current_idx = 1; current_idx != clusters.size(); ++current_idx) {
-            if (clusters[last_idx].centroid == clusters[current_idx].centroid) {
-                clusters[last_idx].popularity++;
-            } else {
-                last_idx++;
-                clusters[last_idx] = clusters[current_idx];
-            }
-        }
-        unique_clusters = last_idx + 1;
-    }
-
-    // In some cases the queries may be co-located, all mapping into the same cluster on that
-    // level. In that case we refine the granularity and dive deeper into clusters:
-    if (unique_clusters < min_clusters && level > 1) {
-        level--;
-        goto map_to_clusters;
-    }
-
-    // If clusters are too numerous, merge the ones that are too close to each other.
-    std::size_t merge_cycles = 0;
-merge_nearby_clusters:
-    if (unique_clusters > max_clusters) {
-
-        struct cluster_merge_t {
-            std::size_t from_idx = 0;
-            std::size_t to_idx = 0;
-        };
-
-        std::mutex merge_mutex;
-        cluster_merge_t merge;
-        std::atomic<distance_t> atomic_merge_distance = std::numeric_limits<distance_t>::max();
-
-        executor.dynamic(unique_clusters * unique_clusters, [&](std::size_t thread_idx, std::size_t task_idx) {
-            std::size_t first_idx = task_idx / unique_clusters;
-            std::size_t second_idx = task_idx % unique_clusters;
-            if (first_idx == second_idx)
-                return true;
-            key_t first_key = clusters[first_idx].centroid;
-            key_t second_key = clusters[second_idx].centroid;
-            distance_t distance = index.distance_between(first_key, second_key, thread_idx).mean;
-            if (distance < atomic_merge_distance) {
-                std::unique_lock<std::mutex> lock(merge_mutex);
-                atomic_merge_distance = distance;
-                merge = {first_idx, second_idx};
-            }
-            return true;
-        });
-
-        if (clusters[merge.from_idx].popularity > clusters[merge.to_idx].popularity)
-            std::swap(merge.from_idx, merge.to_idx);
-
-        clusters[merge.from_idx].merged_into = clusters[merge.to_idx].centroid;
-        clusters[merge.to_idx].popularity += exchange(clusters[merge.from_idx].popularity, 0);
-
-        // Move the merged entry to the end
-        // std::partition(clusters.data(), clusters.data() + unique_clusters,
-        //                [&](cluster_t const& c) { return c.merged_into == index.free_key(); });
-        if (merge.from_idx != (unique_clusters - 1))
-            std::swap(clusters[merge.from_idx], clusters[unique_clusters - 1]);
-        unique_clusters--;
-        merge_cycles++;
-        goto merge_nearby_clusters;
-    }
-
-    // Replace evicted clusters
-    if (merge_cycles) {
-        // Sort dropped clusters by name to accelerate future lookups
-        auto clusters_end = clusters.data() + max_clusters + merge_cycles;
-        std::sort(clusters.data(), clusters_end, smaller_key);
-
-        for (std::size_t query_idx = 0; query_idx != queries_count; ++query_idx) {
-            key_t& cluster_key = cluster_keys[query_idx];
-            distance_t& cluster_distance = cluster_distances[query_idx];
-
-            // Recursively trace replacements of that cluster
-            while (true) {
-                // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
-                cluster_t updated_cluster;
-                updated_cluster.centroid = cluster_key;
-                updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, smaller_key);
-                if (updated_cluster.merged_into == index.free_key())
-                    break;
-                cluster_key = updated_cluster.merged_into;
-            }
-
-            cluster_distance = index.distance_between(cluster_key, queries_begin[query_idx], 0).mean;
-        }
-    }
-
-    result.computed_distances = computed_distances;
-    result.visited_members = visited_members;
-    return result;
-}
-
 } // namespace usearch
 } // namespace unum
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 0cecafd3..725e454d 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -583,22 +583,21 @@ class executor_openmp_t {
      *  @param tasks                 The total number of tasks to be executed.
      *  @param thread_aware_function The thread-aware function to be called for each thread index and task index.
      *  @throws If an exception occurs during execution of the thread-aware function.
-     *
-     *  Uses OpenMP cancellation points, if `OMP_CANCELLATION` environment variable is set.
-     *  http://jakascorner.com/blog/2016/08/omp-cancel.html
      */
     template <typename thread_aware_function_at>
     void dynamic(std::size_t tasks, thread_aware_function_at&& thread_aware_function) noexcept(false) {
-        if (omp_get_cancellation()) {
-#pragma omp parallel for schedule(dynamic, 1)
-            for (std::size_t i = 0; i != tasks; ++i) {
-#pragma omp cancellation point for
-                if (!thread_aware_function(omp_get_thread_num(), i)) {
-#pragma omp cancel for
-                }
-            }
-            return;
-        }
+        // OpenMP cancellation points are not yet available on most platforms, and require
+        // the `OMP_CANCELLATION` environment variable to be set.
+        // http://jakascorner.com/blog/2016/08/omp-cancel.html
+        // if (omp_get_cancellation()) {
+        // #pragma omp parallel for schedule(dynamic, 1)
+        //     for (std::size_t i = 0; i != tasks; ++i) {
+        // #pragma omp cancellation point for
+        //         if (!thread_aware_function(omp_get_thread_num(), i)) {
+        // #pragma omp cancel for
+        //         }
+        //     }
+        // }
         std::atomic_bool stop{false};
 #pragma omp parallel for schedule(dynamic, 1) shared(stop)
         for (std::size_t i = 0; i != tasks; ++i) {
diff --git a/python/lib.cpp b/python/lib.cpp
index a0723ebf..1c43a075 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -52,6 +52,7 @@ using dense_add_result_t = typename index_dense_t::add_result_t;
 using dense_search_result_t = typename index_dense_t::search_result_t;
 using dense_labeling_result_t = typename index_dense_t::labeling_result_t;
 using dense_cluster_result_t = typename index_dense_t::cluster_result_t;
+using dense_clustering_result_t = typename index_dense_t::clustering_result_t;
 
 struct dense_index_py_t : public index_dense_t {
     using native_t = index_dense_t;
@@ -618,7 +619,7 @@ static py::tuple cluster_vectors(        //
 
     py::array_t<key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
     py::array_t<distance_t> distances_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
-    clustering_result_t cluster_result;
+    dense_clustering_result_t cluster_result;
     executor_default_t executor{threads};
 
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
@@ -626,16 +627,20 @@ static py::tuple cluster_vectors(        //
     key_t* keys_ptr = reinterpret_cast<key_t*>(&keys_py2d(0, 0));
     distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
+    index_dense_clustering_config_t config;
+    config.min_clusters = min_count;
+    config.max_clusters = max_count;
+
     rows_lookup_gt<byte_t const> queries_begin(queries_info.ptr, queries_stride);
     rows_lookup_gt<byte_t const> queries_end = queries_begin + queries_count;
 
     // clang-format off
     switch (numpy_string_to_kind(queries_info.format)) {
-    case scalar_kind_t::b1x8_k: cluster_result = cluster(index, queries_begin.as<b1x8_t const>(), queries_end.as<b1x8_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::i8_k: cluster_result = cluster(index, queries_begin.as<i8_bits_t const>(), queries_end.as<i8_bits_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::f16_k: cluster_result = cluster(index, queries_begin.as<f16_t const>(), queries_end.as<f16_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::f32_k: cluster_result = cluster(index, queries_begin.as<f32_t const>(), queries_end.as<f32_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
-    case scalar_kind_t::f64_k: cluster_result = cluster(index, queries_begin.as<f64_t const>(), queries_end.as<f64_t const>(), min_count, max_count, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::b1x8_k: cluster_result = index.cluster(queries_begin.as<b1x8_t const>(), queries_end.as<b1x8_t const>(), config, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::i8_k: cluster_result = index.cluster(queries_begin.as<i8_bits_t const>(), queries_end.as<i8_bits_t const>(), config, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f16_k: cluster_result = index.cluster(queries_begin.as<f16_t const>(), queries_end.as<f16_t const>(), config, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f32_k: cluster_result = index.cluster(queries_begin.as<f32_t const>(), queries_end.as<f32_t const>(), config, keys_ptr, distances_ptr, executor); break;
+    case scalar_kind_t::f64_k: cluster_result = index.cluster(queries_begin.as<f64_t const>(), queries_end.as<f64_t const>(), config, keys_ptr, distances_ptr, executor); break;
     default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + queries_info.format);
     }
     // clang-format on
@@ -690,8 +695,12 @@ static py::tuple cluster_keys(                      //
     key_t* keys_ptr = reinterpret_cast<key_t*>(&keys_py2d(0, 0));
     distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
-    clustering_result_t cluster_result =
-        cluster(index, queries_begin, queries_end, min_count, max_count, keys_ptr, distances_ptr, executor);
+    index_dense_clustering_config_t config;
+    config.min_clusters = min_count;
+    config.max_clusters = max_count;
+
+    dense_clustering_result_t cluster_result =
+        index.cluster(queries_begin, queries_end, config, keys_ptr, distances_ptr, executor);
     cluster_result.error.raise();
 
     // Those would be set to 1 for all entries, in case of success

From 40e803c7843a1e57c885b219b2d2a402b05da235 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 13:34:08 +0000
Subject: [PATCH 46/70] Add: `unfair_shared_mutex_t` for C++ 11 compat.

---
 include/usearch/index.hpp         |  9 +---
 include/usearch/index_dense.hpp   |  4 +-
 include/usearch/index_plugins.hpp | 83 +++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 10 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index d33e2c48..80b45ac2 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -1061,12 +1061,6 @@ struct dummy_callback_t {
     template <typename member_at> void operator()(member_at&&) const noexcept {}
 };
 
-struct progress_t {
-    std::size_t thread;
-    std::size_t task;
-    std::size_t total;
-};
-
 /**
  *  @brief  An example of what a USearch-compatible progress-bar should look like.
  *
@@ -1074,8 +1068,7 @@ struct progress_t {
  *  saving, and loading from disk, or index-level joins.
  */
 struct dummy_progress_t {
-    inline void operator()(std::size_t /*progress*/, std::size_t /*total*/) const noexcept {}
-    inline bool stop(progress_t) const noexcept { return false; }
+    inline void operator()(std::size_t, std::size_t) const noexcept {}
 };
 
 /**
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 86dde0c4..6f65471d 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -329,8 +329,8 @@ class index_dense_gt {
     /// @brief Mutex, controlling concurrent access to `available_threads_`.
     mutable std::mutex available_threads_mutex_;
 
-    using shared_mutex_t = std::mutex; // TODO: Find an OS-compatible solution
-    using shared_lock_t = std::unique_lock<shared_mutex_t>;
+    using shared_mutex_t = unfair_shared_mutex_t;
+    using shared_lock_t = std::shared_lock<shared_mutex_t>;
     using unique_lock_t = std::unique_lock<shared_mutex_t>;
 
     struct key_and_slot_t {
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 725e454d..54c72217 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -6,6 +6,9 @@
 #include <thread>  // `std::thread`
 #include <vector>  // `std::vector`
 
+#include <atomic> // `std::atomic`
+#include <thread> // `std::thread`
+
 #include <usearch/index.hpp> // `expected_gt` and macros
 
 #if USEARCH_USE_OPENMP
@@ -822,6 +825,86 @@ template <std::size_t alignment_ak = 1> class memory_mapping_allocator_gt {
 
 using memory_mapping_allocator_t = memory_mapping_allocator_gt<>;
 
+/**
+ *  @brief  C++11 userspace implementation of an oversimplified `std::shared_mutex`,
+ *          that assumes rare interleaving of shared and unique locks. It's not fair,
+ *          but requires only a single 32-bit atomic integer to work.
+ */
+class unfair_shared_mutex_t {
+    /** Any positive integer describes the number of concurrent readers */
+    enum state_t : std::int32_t {
+        idle_k = 0,
+        writing_k = -1,
+    };
+    std::atomic<std::int32_t> state_{idle_k};
+
+  public:
+    inline void lock() noexcept {
+        std::int32_t raw;
+    relock:
+        raw = idle_k;
+        if (!state_.compare_exchange_weak(raw, writing_k, std::memory_order_acquire, std::memory_order_relaxed)) {
+            std::this_thread::yield();
+            goto relock;
+        }
+    }
+
+    inline void unlock() noexcept { state_.store(idle_k, std::memory_order_release); }
+
+    inline void lock_shared() noexcept {
+        std::int32_t raw;
+    relock_shared:
+        raw = state_.load(std::memory_order_acquire);
+        // Spin while it's uniquely locked
+        if (raw == writing_k) {
+            std::this_thread::yield();
+            goto relock_shared;
+        }
+        // Try incrementing the counter
+        if (!state_.compare_exchange_weak(raw, raw + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
+            std::this_thread::yield();
+            goto relock_shared;
+        }
+    }
+
+    inline void unlock_shared() noexcept { state_.fetch_sub(1, std::memory_order_release); }
+
+    /**
+     *  @brief Try upgrades the current `lock_shared()` to a unique `lock()` state.
+     */
+    inline bool try_escalate() noexcept {
+        std::int32_t one_read = 1;
+        return state_.compare_exchange_weak(one_read, writing_k, std::memory_order_acquire, std::memory_order_relaxed);
+    }
+
+    /**
+     *  @brief Escalates current lock potentially loosing control in the middle.
+     *  It's a shortcut for `try_escalate`-`unlock_shared`-`lock` trio.
+     */
+    inline void unsafe_escalate() noexcept {
+        if (!try_escalate()) {
+            unlock_shared();
+            lock();
+        }
+    }
+
+    /**
+     *  @brief Upgrades the current `lock_shared()` to a unique `lock()` state.
+     */
+    inline void escalate() noexcept {
+        while (!try_escalate())
+            std::this_thread::yield();
+    }
+
+    /**
+     *  @brief De-escalation of a previously escalated state.
+     */
+    inline void de_escalate() noexcept {
+        std::int32_t one_read = 1;
+        state_.store(one_read, std::memory_order_release);
+    }
+};
+
 /**
  *  @brief  Utility class used to cast arrays of one scalar type to another,
  *          avoiding unnecessary conversions.

From 1190f83cb56f1be54923df3c99fbcfdff3fcb600 Mon Sep 17 00:00:00 2001
From: Gurgen Yegoryan <21982202+gurgenyegoryan@users.noreply.github.com>
Date: Tue, 22 Aug 2023 19:54:45 +0400
Subject: [PATCH 47/70] Make: Test python macOS builds without docker setup

---
 .github/workflows/release.yml | 636 +++++++++++++++++-----------------
 1 file changed, 318 insertions(+), 318 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 56fa16c0..25d30f84 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -2,7 +2,7 @@ name: Release
 
 on:
   push:
-    branches: ["main"]
+    branches: ["main-dev"]
 
 env:
   BUILD_TYPE: Release
@@ -17,88 +17,88 @@ permissions:
 
 jobs:
 
-  versioning:
-    name: Semantic Release
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        persist-credentials: false
-    - uses: actions/setup-node@v3
-    - run: npm install --save-dev @semantic-release/exec @semantic-release/git conventional-changelog-eslint semantic-release && npx semantic-release
-
-
-  rebase:
-    name: Rebase Development Branch
-    needs: versioning
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout the latest code
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - name: Perform rebase
-        run: |
-          git checkout main-dev
-          git rebase main
+  # versioning:
+  #   name: Semantic Release
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #     with:
+  #       persist-credentials: false
+  #   - uses: actions/setup-node@v3
+  #   - run: npm install --save-dev @semantic-release/exec @semantic-release/git conventional-changelog-eslint semantic-release && npx semantic-release
+
+
+  # rebase:
+  #   name: Rebase Development Branch
+  #   needs: versioning
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - name: Checkout the latest code
+  #       uses: actions/checkout@v3
+  #       with:
+  #         fetch-depth: 0
+
+  #     - name: Perform rebase
+  #       run: |
+  #         git checkout main-dev
+  #         git rebase main
           
-      - name: Push changes
-        uses: CasperWA/push-protected@v2
-        with:
-          token: ${{ secrets.SEMANTIC_REBASE_TOKEN }}
-          branch: main-dev
-          unprotect_reviews: True
-          force: True
-
-
-  publish_hashes:
-    name: Add hashes to release notes
-    runs-on: ubuntu-latest
-    needs: versioning
-    steps:
-    - name: Get last release ID
-      id: last_release
-      run: |
-          response=$(curl -s -H "Authorization: Bearer ${{ secrets.SEMANTIC_RELEASE_TOKEN }}" \
-            "https://api.github.com/repos/${{ github.repository }}/releases/latest")
-          echo "last_release_id=$(echo "$response" | jq -r '.id')" >> $GITHUB_OUTPUT
-
-    - uses: robinraju/release-downloader@v1.8
-      name: Download release assets
-      with:
-        latest: true
-        fileName: "*"
-        out-file-path: "downloads"
-        tarBall: true
-        zipBall: true
-
-    - name: Calculate source hashes
-      id: hashes
-      run: |
-        source_zip_path=$(find $GITHUB_WORKSPACE/downloads -name "*.zip")
-        zip_hash=$(sha256sum "$source_zip_path" | awk '{print $1}')
-        source_tar_path=$(find $GITHUB_WORKSPACE/downloads -name "*.tar.gz" ! -name "docs.tar.gz")
-        tar_hash=$(sha256sum "$source_tar_path" | awk '{print $1}')
-        echo "zip_hash=$zip_hash" >> $GITHUB_OUTPUT
-        echo "tar_hash=$tar_hash" >> $GITHUB_OUTPUT
+  #     - name: Push changes
+  #       uses: CasperWA/push-protected@v2
+  #       with:
+  #         token: ${{ secrets.SEMANTIC_REBASE_TOKEN }}
+  #         branch: main-dev
+  #         unprotect_reviews: True
+  #         force: True
+
+
+  # publish_hashes:
+  #   name: Add hashes to release notes
+  #   runs-on: ubuntu-latest
+  #   needs: versioning
+  #   steps:
+  #   - name: Get last release ID
+  #     id: last_release
+  #     run: |
+  #         response=$(curl -s -H "Authorization: Bearer ${{ secrets.SEMANTIC_RELEASE_TOKEN }}" \
+  #           "https://api.github.com/repos/${{ github.repository }}/releases/latest")
+  #         echo "last_release_id=$(echo "$response" | jq -r '.id')" >> $GITHUB_OUTPUT
+
+  #   - uses: robinraju/release-downloader@v1.8
+  #     name: Download release assets
+  #     with:
+  #       latest: true
+  #       fileName: "*"
+  #       out-file-path: "downloads"
+  #       tarBall: true
+  #       zipBall: true
+
+  #   - name: Calculate source hashes
+  #     id: hashes
+  #     run: |
+  #       source_zip_path=$(find $GITHUB_WORKSPACE/downloads -name "*.zip")
+  #       zip_hash=$(sha256sum "$source_zip_path" | awk '{print $1}')
+  #       source_tar_path=$(find $GITHUB_WORKSPACE/downloads -name "*.tar.gz" ! -name "docs.tar.gz")
+  #       tar_hash=$(sha256sum "$source_tar_path" | awk '{print $1}')
+  #       echo "zip_hash=$zip_hash" >> $GITHUB_OUTPUT
+  #       echo "tar_hash=$tar_hash" >> $GITHUB_OUTPUT
           
-    - name: Upload hashes to release
-      uses: irongut/EditRelease@v1.2.0
-      with:
-        token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
-        id: ${{ steps.last_release.outputs.last_release_id }}
-        prerelease: false
-        replacebody: false
-        body: |
-            ## Hashes
-            * Source code (zip) : `${{ steps.hashes.outputs.zip_hash }}` 
-            * Source code (tar.gz) : `${{ steps.hashes.outputs.tar_hash }}`
+  #   - name: Upload hashes to release
+  #     uses: irongut/EditRelease@v1.2.0
+  #     with:
+  #       token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
+  #       id: ${{ steps.last_release.outputs.last_release_id }}
+  #       prerelease: false
+  #       replacebody: false
+  #       body: |
+  #           ## Hashes
+  #           * Source code (zip) : `${{ steps.hashes.outputs.zip_hash }}` 
+  #           * Source code (tar.gz) : `${{ steps.hashes.outputs.tar_hash }}`
 
 
   build_wheels:
     name: Build Wheels for ${{ matrix.os }}
-    needs: versioning
+    # needs: versioning
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -111,13 +111,13 @@ jobs:
       - uses: actions/setup-python@v3
       
       - name: Setup Docker
-        if: matrix.os != 'windows-2022'
+        if: matrix.os == 'ubuntu-20.04'
         uses: crazy-max/ghaction-setup-docker@v1.0.0
         with:
           version: 23.0.1
 
       - name: Setup QEMU
-        if: matrix.os != 'windows-2022'
+        if: matrix.os == 'ubuntu-20.04'
         uses: docker/setup-qemu-action@v2.1.0
 
       - name: Install CIBuildWheel
@@ -131,245 +131,245 @@ jobs:
           path: ./wheelhouse/*.whl
   
   
-  publish_python:
-    name: Publish Python
-    needs: build_wheels
-    runs-on: ubuntu-20.04
-    environment:
-      name: pypi
-      url: https://pypi.org/p/usearch
-    permissions:
-      id-token: write
-
-    steps:
-      - name: Download artifacts
-        uses: actions/download-artifact@v3.0.2
-        with:
-          path: ./dist/
-
-      - name: Publish to PyPi
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          packages-dir: ./dist/artifact
-          verbose: true
-          print-hash: true
-
-  publish_javascript:
-    name: Publish JavaScript
-    needs: versioning
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          ref: 'main'  
-      - run: git submodule update --init --recursive
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 18
-      - run: npm install
-      - run: npm ci
-      - run: npm test
-      - uses: JS-DevTools/npm-publish@v2
-        with:
-          token: ${{ secrets.NPM_TOKEN }}
-
-  publish_rust:
-    name: Publish Rust
-    needs: versioning
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        ref: 'main'  
-    - run: git submodule update --init --recursive
-    - uses: actions-rs/toolchain@v1
-      with:
-          toolchain: stable
-          override: true
-    - uses: katyo/publish-crates@v2
-      with:
-          registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}          
-
-  publish_java:
-    name: Publish Java
-    needs: versioning
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        ref: 'main'  
-    - run: git submodule update --init --recursive
-    - uses: actions/setup-java@v3
-      with:
-        java-version: '11'
-        distribution: 'adopt'
-    - name: Setup Gradle
-      uses: gradle/gradle-build-action@v2.4.2
-    - name: Execute Gradle build
-      run: gradle clean build
-    - name: Publish package
-      run: gradle publish
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-  publish_swift:
-    name: Publish ObjC & Swift
-    needs: versioning
-    runs-on: macos-latest
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        ref: 'main'  
-    - run: git submodule update --init --recursive
-    - name: Build
-      run: swift build
-    - name: Run tests
-      run: swift test
-
-  # Publishes to both Docker Hub and GitHub Container Registry
-  # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-docker-hub-and-github-packages
-  publish_docker:
-    name: Publish Docker Image
-    needs: versioning
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        ref: 'main'  
-    - run: git submodule update --init --recursive
-    - name: Login to Docker Hub
-      uses: docker/login-action@v2
-      with:
-        username: ${{ secrets.DOCKERHUB_USERNAME }}
-        password: ${{ secrets.DOCKERHUB_TOKEN }}
-    - name: Log in to GitHub Registry
-      uses: docker/login-action@v2
-      with:
-        registry: ghcr.io
-        username: ${{ github.actor }}
-        password: ${{ secrets.GITHUB_TOKEN }}
-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v2
-    - name: Extract tags & labels for Docker
-      id: meta
-      uses: docker/metadata-action@v4
-      with:
-        images: |
-          unum/usearch
-          ghcr.io/${{ github.repository }}
-    - name: Build and push
-      uses: docker/build-push-action@v4
-      with:
-        context: .
-        push: true
-        tags: ${{ steps.meta.outputs.tags }}
-        labels: ${{ steps.meta.outputs.labels }}
+  # publish_python:
+  #   name: Publish Python
+  #   needs: build_wheels
+  #   runs-on: ubuntu-20.04
+  #   environment:
+  #     name: pypi
+  #     url: https://pypi.org/p/usearch
+  #   permissions:
+  #     id-token: write
+
+  #   steps:
+  #     - name: Download artifacts
+  #       uses: actions/download-artifact@v3.0.2
+  #       with:
+  #         path: ./dist/
+
+  #     - name: Publish to PyPi
+  #       uses: pypa/gh-action-pypi-publish@release/v1
+  #       with:
+  #         packages-dir: ./dist/artifact
+  #         verbose: true
+  #         print-hash: true
+
+  # publish_javascript:
+  #   name: Publish JavaScript
+  #   needs: versioning
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - uses: actions/checkout@v3
+  #       with:
+  #         ref: 'main'  
+  #     - run: git submodule update --init --recursive
+  #     - uses: actions/setup-node@v3
+  #       with:
+  #         node-version: 18
+  #     - run: npm install
+  #     - run: npm ci
+  #     - run: npm test
+  #     - uses: JS-DevTools/npm-publish@v2
+  #       with:
+  #         token: ${{ secrets.NPM_TOKEN }}
+
+  # publish_rust:
+  #   name: Publish Rust
+  #   needs: versioning
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #     with:
+  #       ref: 'main'  
+  #   - run: git submodule update --init --recursive
+  #   - uses: actions-rs/toolchain@v1
+  #     with:
+  #         toolchain: stable
+  #         override: true
+  #   - uses: katyo/publish-crates@v2
+  #     with:
+  #         registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}          
+
+  # publish_java:
+  #   name: Publish Java
+  #   needs: versioning
+  #   runs-on: ubuntu-latest
+  #   permissions:
+  #     contents: read
+  #     packages: write
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #     with:
+  #       ref: 'main'  
+  #   - run: git submodule update --init --recursive
+  #   - uses: actions/setup-java@v3
+  #     with:
+  #       java-version: '11'
+  #       distribution: 'adopt'
+  #   - name: Setup Gradle
+  #     uses: gradle/gradle-build-action@v2.4.2
+  #   - name: Execute Gradle build
+  #     run: gradle clean build
+  #   - name: Publish package
+  #     run: gradle publish
+  #     env:
+  #       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  # publish_swift:
+  #   name: Publish ObjC & Swift
+  #   needs: versioning
+  #   runs-on: macos-latest
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #     with:
+  #       ref: 'main'  
+  #   - run: git submodule update --init --recursive
+  #   - name: Build
+  #     run: swift build
+  #   - name: Run tests
+  #     run: swift test
+
+  # # Publishes to both Docker Hub and GitHub Container Registry
+  # # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-docker-hub-and-github-packages
+  # publish_docker:
+  #   name: Publish Docker Image
+  #   needs: versioning
+  #   runs-on: ubuntu-latest
+  #   permissions:
+  #     contents: read
+  #     packages: write
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #     with:
+  #       ref: 'main'  
+  #   - run: git submodule update --init --recursive
+  #   - name: Login to Docker Hub
+  #     uses: docker/login-action@v2
+  #     with:
+  #       username: ${{ secrets.DOCKERHUB_USERNAME }}
+  #       password: ${{ secrets.DOCKERHUB_TOKEN }}
+  #   - name: Log in to GitHub Registry
+  #     uses: docker/login-action@v2
+  #     with:
+  #       registry: ghcr.io
+  #       username: ${{ github.actor }}
+  #       password: ${{ secrets.GITHUB_TOKEN }}
+  #   - name: Set up Docker Buildx
+  #     uses: docker/setup-buildx-action@v2
+  #   - name: Extract tags & labels for Docker
+  #     id: meta
+  #     uses: docker/metadata-action@v4
+  #     with:
+  #       images: |
+  #         unum/usearch
+  #         ghcr.io/${{ github.repository }}
+  #   - name: Build and push
+  #     uses: docker/build-push-action@v4
+  #     with:
+  #       context: .
+  #       push: true
+  #       tags: ${{ steps.meta.outputs.tags }}
+  #       labels: ${{ steps.meta.outputs.labels }}
   
-  publish_wasm:
-    name: WASM Build
-    needs: versioning
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        ref: 'main'  
-    - run: git submodule update --init --recursive
+  # publish_wasm:
+  #   name: WASM Build
+  #   needs: versioning
+  #   runs-on: ubuntu-latest
+  #   permissions:
+  #     contents: read
+  #     packages: write
+
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #     with:
+  #       ref: 'main'  
+  #   - run: git submodule update --init --recursive
     
-    - name: Prepare WASM Environment
-      run: |
-        git clone https://github.com/emscripten-core/emsdk.git
-        ./emsdk/emsdk install latest
+  #   - name: Prepare WASM Environment
+  #     run: |
+  #       git clone https://github.com/emscripten-core/emsdk.git
+  #       ./emsdk/emsdk install latest
     
-    - name: Build USearch WASM by Emscripten
-      run: |
-        ./emsdk/emsdk activate latest && source ./emsdk/emsdk_env.sh
-        emcmake cmake -DUSEARCH_BUILD_TEST=0 -DUSEARCH_BUILD_BENCHMARK=0 -DUSEARCH_BUILD_WASM=1 -B ./build && emmake make -C ./build
-
-    - name: Trigger Wasmer.io CLI
-      run: |
-        curl https://get.wasmer.io -sSfL | sh
-        source ~/.wasmer/wasmer.sh
-        wasmer login "${{ secrets.WASMER_TOKEN }}"
-        wasmer publish
-
-
-  build_docs:
-    name: Build Docs
-    runs-on: ubuntu-22.04
-    if: ${{ always() }}
-    needs: [publish_python, publish_javascript, publish_rust, publish_java, publish_swift, publish_docker, publish_wasm]
-    permissions:
-      contents: write
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          ref: 'main'   
-      - name: Install dependencies
-        run: |
-              sudo apt update && 
-              sudo apt install -y doxygen graphviz dia git && 
-              pip install sphinx sphinx-js breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery && 
-              npm install -g jsdoc
-      - name: Install USearch from PyPi
-        run: pip install usearch
-      - name: Build documentation
-        run: cd docs && doxygen conf.dox && make html
-      - name: Copy assets
-        run: cp -r assets build/docs/html/
-      - name: Compress assets
-        run: tar -czvf docs.tar.gz build/docs/html/
-      - name: Upload docs to release
-        uses: xresloader/upload-to-github-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          file: docs.tar.gz
-          update_latest_release: true
-
-  deploy_docs_pages:
-    name: Deploy GitHub Pages
-    environment: 
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    runs-on: ubuntu-22.04
-    if: ${{ always() }}
-    needs: build_docs
-    steps:
-      - uses: robinraju/release-downloader@v1.8
-        with:
-          latest: true
-          fileName: docs.tar.gz
-      - name: Unpack docs
-        run: tar -xf ./docs.tar.gz
-      - name: Setup GitHub Pages
-        uses: actions/configure-pages@v2
-      - name: Upload artifacts
-        uses: actions/upload-pages-artifact@v1
-        with:
-          path: ./build/docs/html
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v1
-
-  deploy_docs_vercel:
-    name: Deploy Vercel
-    runs-on: ubuntu-22.04
-    if: ${{ always() }}
-    needs: build_docs
-    steps:
-      - name: Notify Vercel
-        uses: fjogeleit/http-request-action@v1
-        with:
-          url: ${{ secrets.DOCS_VERCEL }}
-          method: 'POST'
+  #   - name: Build USearch WASM by Emscripten
+  #     run: |
+  #       ./emsdk/emsdk activate latest && source ./emsdk/emsdk_env.sh
+  #       emcmake cmake -DUSEARCH_BUILD_TEST=0 -DUSEARCH_BUILD_BENCHMARK=0 -DUSEARCH_BUILD_WASM=1 -B ./build && emmake make -C ./build
+
+  #   - name: Trigger Wasmer.io CLI
+  #     run: |
+  #       curl https://get.wasmer.io -sSfL | sh
+  #       source ~/.wasmer/wasmer.sh
+  #       wasmer login "${{ secrets.WASMER_TOKEN }}"
+  #       wasmer publish
+
+
+  # build_docs:
+  #   name: Build Docs
+  #   runs-on: ubuntu-22.04
+  #   if: ${{ always() }}
+  #   needs: [publish_python, publish_javascript, publish_rust, publish_java, publish_swift, publish_docker, publish_wasm]
+  #   permissions:
+  #     contents: write
+  #   steps:
+  #     - name: Checkout
+  #       uses: actions/checkout@v3
+  #       with:
+  #         ref: 'main'   
+  #     - name: Install dependencies
+  #       run: |
+  #             sudo apt update && 
+  #             sudo apt install -y doxygen graphviz dia git && 
+  #             pip install sphinx sphinx-js breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery && 
+  #             npm install -g jsdoc
+  #     - name: Install USearch from PyPi
+  #       run: pip install usearch
+  #     - name: Build documentation
+  #       run: cd docs && doxygen conf.dox && make html
+  #     - name: Copy assets
+  #       run: cp -r assets build/docs/html/
+  #     - name: Compress assets
+  #       run: tar -czvf docs.tar.gz build/docs/html/
+  #     - name: Upload docs to release
+  #       uses: xresloader/upload-to-github-release@v1
+  #       env:
+  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  #       with:
+  #         file: docs.tar.gz
+  #         update_latest_release: true
+
+  # deploy_docs_pages:
+  #   name: Deploy GitHub Pages
+  #   environment: 
+  #     name: github-pages
+  #     url: ${{ steps.deployment.outputs.page_url }}
+  #   runs-on: ubuntu-22.04
+  #   if: ${{ always() }}
+  #   needs: build_docs
+  #   steps:
+  #     - uses: robinraju/release-downloader@v1.8
+  #       with:
+  #         latest: true
+  #         fileName: docs.tar.gz
+  #     - name: Unpack docs
+  #       run: tar -xf ./docs.tar.gz
+  #     - name: Setup GitHub Pages
+  #       uses: actions/configure-pages@v2
+  #     - name: Upload artifacts
+  #       uses: actions/upload-pages-artifact@v1
+  #       with:
+  #         path: ./build/docs/html
+  #     - name: Deploy to GitHub Pages
+  #       id: deployment
+  #       uses: actions/deploy-pages@v1
+
+  # deploy_docs_vercel:
+  #   name: Deploy Vercel
+  #   runs-on: ubuntu-22.04
+  #   if: ${{ always() }}
+  #   needs: build_docs
+  #   steps:
+  #     - name: Notify Vercel
+  #       uses: fjogeleit/http-request-action@v1
+  #       with:
+  #         url: ${{ secrets.DOCS_VERCEL }}
+  #         method: 'POST'

From f72e4180c701d69eab4fe5f75f3bd020f6163729 Mon Sep 17 00:00:00 2001
From: Gurgen Yegoryan <21982202+gurgenyegoryan@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:00:54 +0400
Subject: [PATCH 48/70] Refactor: Enable temporary disabled jobs

---
 .github/workflows/release.yml | 632 +++++++++++++++++-----------------
 1 file changed, 316 insertions(+), 316 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 25d30f84..55771dd7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -2,7 +2,7 @@ name: Release
 
 on:
   push:
-    branches: ["main-dev"]
+    branches: ["main"]
 
 env:
   BUILD_TYPE: Release
@@ -17,88 +17,88 @@ permissions:
 
 jobs:
 
-  # versioning:
-  #   name: Semantic Release
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #   - uses: actions/checkout@v3
-  #     with:
-  #       persist-credentials: false
-  #   - uses: actions/setup-node@v3
-  #   - run: npm install --save-dev @semantic-release/exec @semantic-release/git conventional-changelog-eslint semantic-release && npx semantic-release
-
-
-  # rebase:
-  #   name: Rebase Development Branch
-  #   needs: versioning
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Checkout the latest code
-  #       uses: actions/checkout@v3
-  #       with:
-  #         fetch-depth: 0
-
-  #     - name: Perform rebase
-  #       run: |
-  #         git checkout main-dev
-  #         git rebase main
+  versioning:
+    name: Semantic Release
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        persist-credentials: false
+    - uses: actions/setup-node@v3
+    - run: npm install --save-dev @semantic-release/exec @semantic-release/git conventional-changelog-eslint semantic-release && npx semantic-release
+
+
+  rebase:
+    name: Rebase Development Branch
+    needs: versioning
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout the latest code
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Perform rebase
+        run: |
+          git checkout main-dev
+          git rebase main
           
-  #     - name: Push changes
-  #       uses: CasperWA/push-protected@v2
-  #       with:
-  #         token: ${{ secrets.SEMANTIC_REBASE_TOKEN }}
-  #         branch: main-dev
-  #         unprotect_reviews: True
-  #         force: True
-
-
-  # publish_hashes:
-  #   name: Add hashes to release notes
-  #   runs-on: ubuntu-latest
-  #   needs: versioning
-  #   steps:
-  #   - name: Get last release ID
-  #     id: last_release
-  #     run: |
-  #         response=$(curl -s -H "Authorization: Bearer ${{ secrets.SEMANTIC_RELEASE_TOKEN }}" \
-  #           "https://api.github.com/repos/${{ github.repository }}/releases/latest")
-  #         echo "last_release_id=$(echo "$response" | jq -r '.id')" >> $GITHUB_OUTPUT
-
-  #   - uses: robinraju/release-downloader@v1.8
-  #     name: Download release assets
-  #     with:
-  #       latest: true
-  #       fileName: "*"
-  #       out-file-path: "downloads"
-  #       tarBall: true
-  #       zipBall: true
-
-  #   - name: Calculate source hashes
-  #     id: hashes
-  #     run: |
-  #       source_zip_path=$(find $GITHUB_WORKSPACE/downloads -name "*.zip")
-  #       zip_hash=$(sha256sum "$source_zip_path" | awk '{print $1}')
-  #       source_tar_path=$(find $GITHUB_WORKSPACE/downloads -name "*.tar.gz" ! -name "docs.tar.gz")
-  #       tar_hash=$(sha256sum "$source_tar_path" | awk '{print $1}')
-  #       echo "zip_hash=$zip_hash" >> $GITHUB_OUTPUT
-  #       echo "tar_hash=$tar_hash" >> $GITHUB_OUTPUT
+      - name: Push changes
+        uses: CasperWA/push-protected@v2
+        with:
+          token: ${{ secrets.SEMANTIC_REBASE_TOKEN }}
+          branch: main-dev
+          unprotect_reviews: True
+          force: True
+
+
+  publish_hashes:
+    name: Add hashes to release notes
+    runs-on: ubuntu-latest
+    needs: versioning
+    steps:
+    - name: Get last release ID
+      id: last_release
+      run: |
+          response=$(curl -s -H "Authorization: Bearer ${{ secrets.SEMANTIC_RELEASE_TOKEN }}" \
+            "https://api.github.com/repos/${{ github.repository }}/releases/latest")
+          echo "last_release_id=$(echo "$response" | jq -r '.id')" >> $GITHUB_OUTPUT
+
+    - uses: robinraju/release-downloader@v1.8
+      name: Download release assets
+      with:
+        latest: true
+        fileName: "*"
+        out-file-path: "downloads"
+        tarBall: true
+        zipBall: true
+
+    - name: Calculate source hashes
+      id: hashes
+      run: |
+        source_zip_path=$(find $GITHUB_WORKSPACE/downloads -name "*.zip")
+        zip_hash=$(sha256sum "$source_zip_path" | awk '{print $1}')
+        source_tar_path=$(find $GITHUB_WORKSPACE/downloads -name "*.tar.gz" ! -name "docs.tar.gz")
+        tar_hash=$(sha256sum "$source_tar_path" | awk '{print $1}')
+        echo "zip_hash=$zip_hash" >> $GITHUB_OUTPUT
+        echo "tar_hash=$tar_hash" >> $GITHUB_OUTPUT
           
-  #   - name: Upload hashes to release
-  #     uses: irongut/EditRelease@v1.2.0
-  #     with:
-  #       token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
-  #       id: ${{ steps.last_release.outputs.last_release_id }}
-  #       prerelease: false
-  #       replacebody: false
-  #       body: |
-  #           ## Hashes
-  #           * Source code (zip) : `${{ steps.hashes.outputs.zip_hash }}` 
-  #           * Source code (tar.gz) : `${{ steps.hashes.outputs.tar_hash }}`
+    - name: Upload hashes to release
+      uses: irongut/EditRelease@v1.2.0
+      with:
+        token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
+        id: ${{ steps.last_release.outputs.last_release_id }}
+        prerelease: false
+        replacebody: false
+        body: |
+            ## Hashes
+            * Source code (zip) : `${{ steps.hashes.outputs.zip_hash }}` 
+            * Source code (tar.gz) : `${{ steps.hashes.outputs.tar_hash }}`
 
 
   build_wheels:
     name: Build Wheels for ${{ matrix.os }}
-    # needs: versioning
+    needs: versioning
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -131,245 +131,245 @@ jobs:
           path: ./wheelhouse/*.whl
   
   
-  # publish_python:
-  #   name: Publish Python
-  #   needs: build_wheels
-  #   runs-on: ubuntu-20.04
-  #   environment:
-  #     name: pypi
-  #     url: https://pypi.org/p/usearch
-  #   permissions:
-  #     id-token: write
-
-  #   steps:
-  #     - name: Download artifacts
-  #       uses: actions/download-artifact@v3.0.2
-  #       with:
-  #         path: ./dist/
-
-  #     - name: Publish to PyPi
-  #       uses: pypa/gh-action-pypi-publish@release/v1
-  #       with:
-  #         packages-dir: ./dist/artifact
-  #         verbose: true
-  #         print-hash: true
-
-  # publish_javascript:
-  #   name: Publish JavaScript
-  #   needs: versioning
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - uses: actions/checkout@v3
-  #       with:
-  #         ref: 'main'  
-  #     - run: git submodule update --init --recursive
-  #     - uses: actions/setup-node@v3
-  #       with:
-  #         node-version: 18
-  #     - run: npm install
-  #     - run: npm ci
-  #     - run: npm test
-  #     - uses: JS-DevTools/npm-publish@v2
-  #       with:
-  #         token: ${{ secrets.NPM_TOKEN }}
-
-  # publish_rust:
-  #   name: Publish Rust
-  #   needs: versioning
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #   - uses: actions/checkout@v3
-  #     with:
-  #       ref: 'main'  
-  #   - run: git submodule update --init --recursive
-  #   - uses: actions-rs/toolchain@v1
-  #     with:
-  #         toolchain: stable
-  #         override: true
-  #   - uses: katyo/publish-crates@v2
-  #     with:
-  #         registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}          
-
-  # publish_java:
-  #   name: Publish Java
-  #   needs: versioning
-  #   runs-on: ubuntu-latest
-  #   permissions:
-  #     contents: read
-  #     packages: write
-  #   steps:
-  #   - uses: actions/checkout@v3
-  #     with:
-  #       ref: 'main'  
-  #   - run: git submodule update --init --recursive
-  #   - uses: actions/setup-java@v3
-  #     with:
-  #       java-version: '11'
-  #       distribution: 'adopt'
-  #   - name: Setup Gradle
-  #     uses: gradle/gradle-build-action@v2.4.2
-  #   - name: Execute Gradle build
-  #     run: gradle clean build
-  #   - name: Publish package
-  #     run: gradle publish
-  #     env:
-  #       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-  # publish_swift:
-  #   name: Publish ObjC & Swift
-  #   needs: versioning
-  #   runs-on: macos-latest
-  #   steps:
-  #   - uses: actions/checkout@v3
-  #     with:
-  #       ref: 'main'  
-  #   - run: git submodule update --init --recursive
-  #   - name: Build
-  #     run: swift build
-  #   - name: Run tests
-  #     run: swift test
-
-  # # Publishes to both Docker Hub and GitHub Container Registry
-  # # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-docker-hub-and-github-packages
-  # publish_docker:
-  #   name: Publish Docker Image
-  #   needs: versioning
-  #   runs-on: ubuntu-latest
-  #   permissions:
-  #     contents: read
-  #     packages: write
-  #   steps:
-  #   - uses: actions/checkout@v3
-  #     with:
-  #       ref: 'main'  
-  #   - run: git submodule update --init --recursive
-  #   - name: Login to Docker Hub
-  #     uses: docker/login-action@v2
-  #     with:
-  #       username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #       password: ${{ secrets.DOCKERHUB_TOKEN }}
-  #   - name: Log in to GitHub Registry
-  #     uses: docker/login-action@v2
-  #     with:
-  #       registry: ghcr.io
-  #       username: ${{ github.actor }}
-  #       password: ${{ secrets.GITHUB_TOKEN }}
-  #   - name: Set up Docker Buildx
-  #     uses: docker/setup-buildx-action@v2
-  #   - name: Extract tags & labels for Docker
-  #     id: meta
-  #     uses: docker/metadata-action@v4
-  #     with:
-  #       images: |
-  #         unum/usearch
-  #         ghcr.io/${{ github.repository }}
-  #   - name: Build and push
-  #     uses: docker/build-push-action@v4
-  #     with:
-  #       context: .
-  #       push: true
-  #       tags: ${{ steps.meta.outputs.tags }}
-  #       labels: ${{ steps.meta.outputs.labels }}
+  publish_python:
+    name: Publish Python
+    needs: build_wheels
+    runs-on: ubuntu-20.04
+    environment:
+      name: pypi
+      url: https://pypi.org/p/usearch
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v3.0.2
+        with:
+          path: ./dist/
+
+      - name: Publish to PyPi
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: ./dist/artifact
+          verbose: true
+          print-hash: true
+
+  publish_javascript:
+    name: Publish JavaScript
+    needs: versioning
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: 'main'  
+      - run: git submodule update --init --recursive
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 18
+      - run: npm install
+      - run: npm ci
+      - run: npm test
+      - uses: JS-DevTools/npm-publish@v2
+        with:
+          token: ${{ secrets.NPM_TOKEN }}
+
+  publish_rust:
+    name: Publish Rust
+    needs: versioning
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        ref: 'main'  
+    - run: git submodule update --init --recursive
+    - uses: actions-rs/toolchain@v1
+      with:
+          toolchain: stable
+          override: true
+    - uses: katyo/publish-crates@v2
+      with:
+          registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}          
+
+  publish_java:
+    name: Publish Java
+    needs: versioning
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        ref: 'main'  
+    - run: git submodule update --init --recursive
+    - uses: actions/setup-java@v3
+      with:
+        java-version: '11'
+        distribution: 'adopt'
+    - name: Setup Gradle
+      uses: gradle/gradle-build-action@v2.4.2
+    - name: Execute Gradle build
+      run: gradle clean build
+    - name: Publish package
+      run: gradle publish
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  publish_swift:
+    name: Publish ObjC & Swift
+    needs: versioning
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        ref: 'main'  
+    - run: git submodule update --init --recursive
+    - name: Build
+      run: swift build
+    - name: Run tests
+      run: swift test
+
+  # Publishes to both Docker Hub and GitHub Container Registry
+  # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-docker-hub-and-github-packages
+  publish_docker:
+    name: Publish Docker Image
+    needs: versioning
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        ref: 'main'  
+    - run: git submodule update --init --recursive
+    - name: Login to Docker Hub
+      uses: docker/login-action@v2
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+    - name: Log in to GitHub Registry
+      uses: docker/login-action@v2
+      with:
+        registry: ghcr.io
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v2
+    - name: Extract tags & labels for Docker
+      id: meta
+      uses: docker/metadata-action@v4
+      with:
+        images: |
+          unum/usearch
+          ghcr.io/${{ github.repository }}
+    - name: Build and push
+      uses: docker/build-push-action@v4
+      with:
+        context: .
+        push: true
+        tags: ${{ steps.meta.outputs.tags }}
+        labels: ${{ steps.meta.outputs.labels }}
   
-  # publish_wasm:
-  #   name: WASM Build
-  #   needs: versioning
-  #   runs-on: ubuntu-latest
-  #   permissions:
-  #     contents: read
-  #     packages: write
-
-  #   steps:
-  #   - uses: actions/checkout@v3
-  #     with:
-  #       ref: 'main'  
-  #   - run: git submodule update --init --recursive
+  publish_wasm:
+    name: WASM Build
+    needs: versioning
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        ref: 'main'  
+    - run: git submodule update --init --recursive
     
-  #   - name: Prepare WASM Environment
-  #     run: |
-  #       git clone https://github.com/emscripten-core/emsdk.git
-  #       ./emsdk/emsdk install latest
+    - name: Prepare WASM Environment
+      run: |
+        git clone https://github.com/emscripten-core/emsdk.git
+        ./emsdk/emsdk install latest
     
-  #   - name: Build USearch WASM by Emscripten
-  #     run: |
-  #       ./emsdk/emsdk activate latest && source ./emsdk/emsdk_env.sh
-  #       emcmake cmake -DUSEARCH_BUILD_TEST=0 -DUSEARCH_BUILD_BENCHMARK=0 -DUSEARCH_BUILD_WASM=1 -B ./build && emmake make -C ./build
-
-  #   - name: Trigger Wasmer.io CLI
-  #     run: |
-  #       curl https://get.wasmer.io -sSfL | sh
-  #       source ~/.wasmer/wasmer.sh
-  #       wasmer login "${{ secrets.WASMER_TOKEN }}"
-  #       wasmer publish
-
-
-  # build_docs:
-  #   name: Build Docs
-  #   runs-on: ubuntu-22.04
-  #   if: ${{ always() }}
-  #   needs: [publish_python, publish_javascript, publish_rust, publish_java, publish_swift, publish_docker, publish_wasm]
-  #   permissions:
-  #     contents: write
-  #   steps:
-  #     - name: Checkout
-  #       uses: actions/checkout@v3
-  #       with:
-  #         ref: 'main'   
-  #     - name: Install dependencies
-  #       run: |
-  #             sudo apt update && 
-  #             sudo apt install -y doxygen graphviz dia git && 
-  #             pip install sphinx sphinx-js breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery && 
-  #             npm install -g jsdoc
-  #     - name: Install USearch from PyPi
-  #       run: pip install usearch
-  #     - name: Build documentation
-  #       run: cd docs && doxygen conf.dox && make html
-  #     - name: Copy assets
-  #       run: cp -r assets build/docs/html/
-  #     - name: Compress assets
-  #       run: tar -czvf docs.tar.gz build/docs/html/
-  #     - name: Upload docs to release
-  #       uses: xresloader/upload-to-github-release@v1
-  #       env:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  #       with:
-  #         file: docs.tar.gz
-  #         update_latest_release: true
-
-  # deploy_docs_pages:
-  #   name: Deploy GitHub Pages
-  #   environment: 
-  #     name: github-pages
-  #     url: ${{ steps.deployment.outputs.page_url }}
-  #   runs-on: ubuntu-22.04
-  #   if: ${{ always() }}
-  #   needs: build_docs
-  #   steps:
-  #     - uses: robinraju/release-downloader@v1.8
-  #       with:
-  #         latest: true
-  #         fileName: docs.tar.gz
-  #     - name: Unpack docs
-  #       run: tar -xf ./docs.tar.gz
-  #     - name: Setup GitHub Pages
-  #       uses: actions/configure-pages@v2
-  #     - name: Upload artifacts
-  #       uses: actions/upload-pages-artifact@v1
-  #       with:
-  #         path: ./build/docs/html
-  #     - name: Deploy to GitHub Pages
-  #       id: deployment
-  #       uses: actions/deploy-pages@v1
-
-  # deploy_docs_vercel:
-  #   name: Deploy Vercel
-  #   runs-on: ubuntu-22.04
-  #   if: ${{ always() }}
-  #   needs: build_docs
-  #   steps:
-  #     - name: Notify Vercel
-  #       uses: fjogeleit/http-request-action@v1
-  #       with:
-  #         url: ${{ secrets.DOCS_VERCEL }}
-  #         method: 'POST'
+    - name: Build USearch WASM by Emscripten
+      run: |
+        ./emsdk/emsdk activate latest && source ./emsdk/emsdk_env.sh
+        emcmake cmake -DUSEARCH_BUILD_TEST=0 -DUSEARCH_BUILD_BENCHMARK=0 -DUSEARCH_BUILD_WASM=1 -B ./build && emmake make -C ./build
+
+    - name: Trigger Wasmer.io CLI
+      run: |
+        curl https://get.wasmer.io -sSfL | sh
+        source ~/.wasmer/wasmer.sh
+        wasmer login "${{ secrets.WASMER_TOKEN }}"
+        wasmer publish
+
+
+  build_docs:
+    name: Build Docs
+    runs-on: ubuntu-22.04
+    if: ${{ always() }}
+    needs: [publish_python, publish_javascript, publish_rust, publish_java, publish_swift, publish_docker, publish_wasm]
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          ref: 'main'   
+      - name: Install dependencies
+        run: |
+              sudo apt update && 
+              sudo apt install -y doxygen graphviz dia git && 
+              pip install sphinx sphinx-js breathe furo m2r2 sphinxcontrib-googleanalytics==0.2.dev20220708 sphinxcontrib-jquery && 
+              npm install -g jsdoc
+      - name: Install USearch from PyPi
+        run: pip install usearch
+      - name: Build documentation
+        run: cd docs && doxygen conf.dox && make html
+      - name: Copy assets
+        run: cp -r assets build/docs/html/
+      - name: Compress assets
+        run: tar -czvf docs.tar.gz build/docs/html/
+      - name: Upload docs to release
+        uses: xresloader/upload-to-github-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          file: docs.tar.gz
+          update_latest_release: true
+
+  deploy_docs_pages:
+    name: Deploy GitHub Pages
+    environment: 
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-22.04
+    if: ${{ always() }}
+    needs: build_docs
+    steps:
+      - uses: robinraju/release-downloader@v1.8
+        with:
+          latest: true
+          fileName: docs.tar.gz
+      - name: Unpack docs
+        run: tar -xf ./docs.tar.gz
+      - name: Setup GitHub Pages
+        uses: actions/configure-pages@v2
+      - name: Upload artifacts
+        uses: actions/upload-pages-artifact@v1
+        with:
+          path: ./build/docs/html
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v1
+
+  deploy_docs_vercel:
+    name: Deploy Vercel
+    runs-on: ubuntu-22.04
+    if: ${{ always() }}
+    needs: build_docs
+    steps:
+      - name: Notify Vercel
+        uses: fjogeleit/http-request-action@v1
+        with:
+          url: ${{ secrets.DOCS_VERCEL }}
+          method: 'POST'

From a01fc6d3a8f8bb2df72cef68a72a7a0168da58a0 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:27:57 +0000
Subject: [PATCH 49/70] Fix: Printing top layer of graph

---
 python/usearch/index.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/usearch/index.py b/python/usearch/index.py
index 75748790..3985998f 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -1001,6 +1001,10 @@ def vectors(self) -> np.ndarray:
     def max_level(self) -> int:
         return self._compiled.max_level
 
+    @property
+    def nlevels(self) -> int:
+        return self._compiled.max_level + 1
+
     @property
     def levels_stats(self) -> _CompiledIndexStats:
         """Get the accumulated statistics for the entire multi-level graph.
@@ -1056,13 +1060,12 @@ def __repr__(self) -> str:
             self.expansion_add,
             self.expansion_search,
             len(self),
-            self.max_level + 1,
+            self.nlevels,
         )
 
     def _repr_pretty_(self, printer, cycle) -> str:
         level_stats = [
-            f"--- {i}. {self.level_stats(i).nodes:,} nodes"
-            for i in range(self.max_level)
+            f"--- {i}. {self.level_stats(i).nodes:,} nodes" for i in range(self.nlevels)
         ]
         lines = "\n".join(
             [

From 593f6882e206a05c91c1e74ac18dc9b607b3ed9b Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:28:34 +0000
Subject: [PATCH 50/70] Improve: Lower asymptotics for clustering

---
 include/usearch/index_dense.hpp | 53 ++++++++++++++-------------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 6f65471d..33819dde 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -1348,14 +1348,14 @@ class index_dense_gt {
 
         std::size_t const queries_count = queries_end - queries_begin;
 
-        // Skip the first few top level, assuming they can't even potentially have enough clusters
+        // Find the first level (top -> down) that has enough nodes to exceed `config.min_clusters`.
         std::size_t level = max_level();
-        if (config.min_clusters)
+        if (config.min_clusters) {
             for (; level > 1; --level) {
-                if (stats(level).nodes < config.min_clusters)
+                if (stats(level).nodes > config.min_clusters)
                     break;
             }
-        else
+        } else
             level = 1, config.max_clusters = stats(1).nodes, config.min_clusters = 2;
 
         clustering_result_t result;
@@ -1370,7 +1370,7 @@ class index_dense_gt {
             byte_t* vector;
         };
 
-        auto smaller_key = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
+        auto centroid_id = [](cluster_t const& a, cluster_t const& b) { return a.centroid < b.centroid; };
         auto higher_popularity = [](cluster_t const& a, cluster_t const& b) { return a.popularity > b.popularity; };
 
         std::atomic<std::size_t> visited_members(0);
@@ -1411,7 +1411,7 @@ class index_dense_gt {
 
         // Now once we have identified the closest clusters,
         // we can try reducing their quantity, refining
-        std::sort(clusters.begin(), clusters.end(), smaller_key);
+        std::sort(clusters.begin(), clusters.end(), centroid_id);
 
         // Transform into run-length encoding, computing the number of unique clusters
         std::size_t unique_clusters = 0;
@@ -1435,40 +1435,33 @@ class index_dense_gt {
             goto map_to_clusters;
         }
 
+        std::sort(clusters.data(), clusters.data() + unique_clusters, higher_popularity);
+
         // If clusters are too numerous, merge the ones that are too close to each other.
         std::size_t merge_cycles = 0;
     merge_nearby_clusters:
         if (unique_clusters > config.max_clusters) {
 
-            struct cluster_merge_t {
-                std::size_t from_idx = 0;
-                std::size_t to_idx = 0;
-            };
-
-            cluster_merge_t merge;
+            cluster_t& merge_source = clusters[unique_clusters - 1];
+            std::size_t merge_target_idx = 0;
             distance_t merge_distance = std::numeric_limits<distance_t>::max();
 
-            for (std::size_t first_idx = 0; first_idx != unique_clusters; ++first_idx) {
-                for (std::size_t second_idx = 0; second_idx != first_idx; ++second_idx) {
-                    distance_t distance = metric_(clusters[first_idx].vector, clusters[second_idx].vector);
-                    if (distance < merge_distance) {
-                        merge_distance = distance;
-                        merge = {first_idx, second_idx};
-                    }
+            for (std::size_t candidate_idx = 0; candidate_idx + 1 < unique_clusters; ++candidate_idx) {
+                distance_t distance = metric_(merge_source.vector, clusters[candidate_idx].vector);
+                if (distance < merge_distance) {
+                    merge_distance = distance;
+                    merge_target_idx = candidate_idx;
                 }
             }
 
-            if (clusters[merge.from_idx].popularity > clusters[merge.to_idx].popularity)
-                std::swap(merge.from_idx, merge.to_idx);
+            merge_source.merged_into = clusters[merge_target_idx].centroid;
+            clusters[merge_target_idx].popularity += exchange(merge_source.popularity, 0);
 
-            clusters[merge.from_idx].merged_into = clusters[merge.to_idx].centroid;
-            clusters[merge.to_idx].popularity += exchange(clusters[merge.from_idx].popularity, 0);
+            // The target object may have to be swapped a few times to get to optimal position.
+            while (merge_target_idx &&
+                   clusters[merge_target_idx - 1].popularity < clusters[merge_target_idx].popularity)
+                std::swap(clusters[merge_target_idx - 1], clusters[merge_target_idx]), --merge_target_idx;
 
-            // Move the merged entry to the end
-            // std::partition(clusters.data(), clusters.data() + unique_clusters,
-            //                [&](cluster_t const& c) { return c.merged_into == free_key(); });
-            if (merge.from_idx != (unique_clusters - 1))
-                std::swap(clusters[merge.from_idx], clusters[unique_clusters - 1]);
             unique_clusters--;
             merge_cycles++;
             goto merge_nearby_clusters;
@@ -1478,7 +1471,7 @@ class index_dense_gt {
         if (merge_cycles) {
             // Sort dropped clusters by name to accelerate future lookups
             auto clusters_end = clusters.data() + config.max_clusters + merge_cycles;
-            std::sort(clusters.data(), clusters_end, smaller_key);
+            std::sort(clusters.data(), clusters_end, centroid_id);
 
             executor.dynamic(queries_count, [&](std::size_t thread_idx, std::size_t query_idx) {
                 key_t& cluster_key = cluster_keys[query_idx];
@@ -1489,7 +1482,7 @@ class index_dense_gt {
                     // To avoid implementing heterogeneous comparisons, lets wrap the `cluster_key`
                     cluster_t updated_cluster;
                     updated_cluster.centroid = cluster_key;
-                    updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, smaller_key);
+                    updated_cluster = *std::lower_bound(clusters.data(), clusters_end, updated_cluster, centroid_id);
                     if (updated_cluster.merged_into == free_key())
                         break;
                     cluster_key = updated_cluster.merged_into;

From 4570ee3cd7fd4d490d058e5b0f5bc96056d1215f Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:29:41 +0000
Subject: [PATCH 51/70] Add: `Clustering` class for recursive exploration

---
 cluster.ipynb           | 167 ++++++++++++++++++++++++++++++++++++++++
 python/usearch/index.py |  45 ++++++++++-
 2 files changed, 209 insertions(+), 3 deletions(-)
 create mode 100644 cluster.ipynb

diff --git a/cluster.ipynb b/cluster.ipynb
new file mode 100644
index 00000000..13ecf748
--- /dev/null
+++ b/cluster.ipynb
@@ -0,0 +1,167 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from usearch.io import load_matrix\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vectors = load_matrix(\"datasets/arxiv_2M/abstract.e5-base-v2.fbin\", view=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vectors.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from usearch.index import Index, USES_SIMSIMD, USES_NATIVE_F16\n",
+    "\n",
+    "index = Index(ndim=vectors.shape[1], metric=\"cos\", dtype=\"i8\")\n",
+    "index.hardware_acceleration, USES_SIMSIMD, USES_NATIVE_F16"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import os\n",
+    "\n",
+    "if os.path.exists(\"cluster.i8.usearch\"):\n",
+    "    index.load(\"cluster.i8.usearch\")\n",
+    "\n",
+    "if len(index) == 0:\n",
+    "    index.add(None, vectors, log=True)\n",
+    "    index.save(\"cluster.i8.usearch\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clustering = index.cluster(min_count=10, max_count=15, threads=60)\n",
+    "clustering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"{len(clustering.centroids_popularity[0])} unique clusters for {len(index)} members\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clustering.plot_centroids_popularity()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nx.draw_networkx(g)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import networkx as nx\n",
+    "\n",
+    "G = g\n",
+    "\n",
+    "node_sizes = nx.get_node_attributes(G, \"size\").values()\n",
+    "max_node_size = max(node_sizes)\n",
+    "node_sizes = [size * 10 / max_node_size for size in node_sizes]\n",
+    "\n",
+    "edge_labels = nx.get_edge_attributes(G, \"distance\")\n",
+    "edge_labels = {edge: f\"{label:.2}\" for edge, label in edge_labels.items()}\n",
+    "\n",
+    "pos = nx.spring_layout(G, seed=7)\n",
+    "nx.draw_networkx_nodes(G, pos, node_size=node_sizes)\n",
+    "nx.draw_networkx_edges(G, pos, edgelist=G.edges(data=False))\n",
+    "nx.draw_networkx_labels(G, pos, font_size=10, font_family=\"sans-serif\")\n",
+    "nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=5)\n",
+    "\n",
+    "ax = plt.gca()\n",
+    "ax.margins(0.08)\n",
+    "plt.axis(\"off\")\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/usearch/index.py b/python/usearch/index.py
index 3985998f..f8ed3cc2 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -356,9 +356,11 @@ class Clustering:
     def __init__(
         self,
         index: Index,
-        queries: np.ndarray,
         matches: BatchMatches,
+        queries: Optional[np.ndarray] = None,
     ) -> None:
+        if queries is None:
+            queries = index._compiled.get_keys_in_slice()
         self.index = index
         self.queries = queries
         self.matches = matches
@@ -366,6 +368,42 @@ def __init__(
     def __repr__(self) -> str:
         return f"usearch.Clustering(for {len(self.queries)} queries)"
 
+    @property
+    def centroids_popularity(self) -> Tuple[np.ndarray, np.ndarray]:
+        return np.unique(self.matches.keys, return_counts=True)
+
+    def members_of(self, centroid: Key) -> np.ndarray:
+        return self.queries[self.matches.keys.flatten() == centroid]
+
+    def subcluster(self, centroid: Key, **clustering_kwards) -> Clustering:
+        sub_keys = self.members_of(centroid)
+        return self.index.cluster(keys=sub_keys, **clustering_kwards)
+
+    def plot_centroids_popularity(self):
+        from matplotlib import pyplot as plt
+
+        _, sizes = self.centroids_popularity
+        plt.yscale("log")
+        plt.plot(sorted(sizes), np.arange(len(sizes)))
+        plt.show()
+
+    @property
+    def network(self):
+        import networkx as nx
+
+        keys, sizes = self.centroids_popularity
+
+        g = nx.Graph()
+        for key, size in zip(keys, sizes):
+            g.add_node(key, size=size)
+
+        for i, i_key in enumerate(keys):
+            for j_key in keys[:i]:
+                d = self.index.pairwise_distance(i_key, j_key)
+                g.add_edge(i_key, j_key, distance=d)
+
+        return g
+
 
 class IndexedKeys:
     """Smart-reference for the range of keys present in a specific `Index`"""
@@ -931,7 +969,7 @@ def cluster(
         threads: int = 0,
         log: Union[str, bool] = False,
         batch_size: int = 0,
-    ) -> Union[Matches, BatchMatches]:
+    ) -> Clustering:
         """
         Clusters already indexed or provided `vectors`, mapping them to various centroids.
 
@@ -975,7 +1013,8 @@ def cluster(
                 threads=threads,
             )
 
-        return BatchMatches(*results)
+        batch_matches = BatchMatches(*results)
+        return Clustering(self, batch_matches, keys)
 
     def pairwise_distance(
         self, left: KeyOrKeysLike, right: KeyOrKeysLike

From f988fc320772acc0b8f9b88b3b7d449ef9cc07f6 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:34:33 +0000
Subject: [PATCH 52/70] Fix: `shared_lock_gt` for C++11

---
 include/usearch/index_dense.hpp   | 2 +-
 include/usearch/index_plugins.hpp | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 33819dde..86fbc31c 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -330,7 +330,7 @@ class index_dense_gt {
     mutable std::mutex available_threads_mutex_;
 
     using shared_mutex_t = unfair_shared_mutex_t;
-    using shared_lock_t = std::shared_lock<shared_mutex_t>;
+    using shared_lock_t = shared_lock_gt<shared_mutex_t>;
     using unique_lock_t = std::unique_lock<shared_mutex_t>;
 
     struct key_and_slot_t {
diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp
index 54c72217..7b026e4e 100644
--- a/include/usearch/index_plugins.hpp
+++ b/include/usearch/index_plugins.hpp
@@ -905,6 +905,14 @@ class unfair_shared_mutex_t {
     }
 };
 
+template <typename mutex_at = unfair_shared_mutex_t> class shared_lock_gt {
+    mutex_at& mutex_;
+
+  public:
+    inline explicit shared_lock_gt(mutex_at& m) noexcept : mutex_(m) { mutex_.lock_shared(); }
+    inline ~shared_lock_gt() noexcept { mutex_.unlock_shared(); }
+};
+
 /**
  *  @brief  Utility class used to cast arrays of one scalar type to another,
  *          avoiding unnecessary conversions.

From b7a59adebeefa6bd26c01237709d2e60cb058c94 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Tue, 22 Aug 2023 20:40:03 +0000
Subject: [PATCH 53/70] Fix: Clustering tests

---
 python/scripts/test_index.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index 4942afd1..e35ad0cf 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -14,6 +14,7 @@
     Match,
     Matches,
     BatchMatches,
+    Clustering,
 )
 from usearch.index import (
     DEFAULT_CONNECTIVITY,
@@ -200,18 +201,18 @@ def test_index_clustering(ndim, metric, quantization, dtype, batch_size):
     vectors = random_vectors(count=batch_size, ndim=ndim, dtype=dtype)
     index.add(keys, vectors, threads=threads)
 
-    clusters: BatchMatches = index.cluster(vectors=vectors, threads=threads)
-    assert len(clusters.keys) == batch_size
+    clusters: Clustering = index.cluster(vectors=vectors, threads=threads)
+    assert len(clusters.matches.keys) == batch_size
 
     # If no argument is provided, we cluster the present entries
-    clusters: BatchMatches = index.cluster(threads=threads)
-    assert len(clusters.keys) == batch_size
+    clusters: Clustering = index.cluster(threads=threads)
+    assert len(clusters.matches.keys) == batch_size
 
     # If no argument is provided, we cluster the present entries
-    clusters: BatchMatches = index.cluster(keys=keys[:50], threads=threads)
-    assert len(clusters.keys) == 50
+    clusters: Clustering = index.cluster(keys=keys[:50], threads=threads)
+    assert len(clusters.matches.keys) == 50
 
     # If no argument is provided, we cluster the present entries
-    clusters: BatchMatches = index.cluster(min_count=3, max_count=10, threads=threads)
-    unique_clusters = set(clusters.keys.flatten().tolist())
+    clusters: Clustering = index.cluster(min_count=3, max_count=10, threads=threads)
+    unique_clusters = set(clusters.matches.keys.flatten().tolist())
     assert len(unique_clusters) >= 3 and len(unique_clusters) <= 10

From a142c9d840841880672bcad5a4188d44f1ea6291 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 13:21:15 +0400
Subject: [PATCH 54/70] Fix: Revert DLL exports for MSVC/MinGW interop

---
 c/lib.cpp   | 4 ----
 c/usearch.h | 5 +++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/c/lib.cpp b/c/lib.cpp
index 2053a506..4170b893 100644
--- a/c/lib.cpp
+++ b/c/lib.cpp
@@ -2,10 +2,6 @@
 
 #include <usearch/index_dense.hpp>
 
-#ifndef USEARCH_EXPORT
-#define USEARCH_EXPORT
-#endif
-
 extern "C" {
 #include "usearch.h"
 }
diff --git a/c/usearch.h b/c/usearch.h
index f43bcf23..04ff326f 100644
--- a/c/usearch.h
+++ b/c/usearch.h
@@ -4,9 +4,14 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
+
 #ifndef USEARCH_EXPORT
+#if defined(_WIN32) && !defined(__MINGW32__)
+#define USEARCH_EXPORT __declspec(dllexport)
+#else
 #define USEARCH_EXPORT
 #endif
+
 #include <stdbool.h> // `bool`
 #include <stdint.h>  // `size_t`
 

From 623e279fd62bc8fe006927238097f62df33525c9 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 14:36:30 +0400
Subject: [PATCH 55/70] Docs: Change sections order

---
 README.md | 84 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 8c394bf9..8497aa5a 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 <h1 align="center">USearch</h1>
 <h3 align="center">
-Smaller & Faster Single-File<br/>
-Vector Search Engine<br/>
+Faster & Smaller Single-File<br/>
+Search Engine for Vectors & Texts<br/>
 </h3>
 <br/>
 
@@ -18,7 +18,7 @@ Vector Search Engine<br/>
 </p>
 
 <p align="center">
-Euclidean • Angular • Jaccard • Hamming • Haversine • User-Defined Metrics
+Euclidean • Angular • Bitwise • Haversine • User-Defined Metrics
 <br/>
 <a href="https://unum-cloud.github.io/usearch/cpp">C++ 11</a> •
 <a href="https://unum-cloud.github.io/usearch/python">Python 3</a> •
@@ -31,9 +31,16 @@ Euclidean • Angular • Jaccard • Hamming • Haversine • User-Defined Met
 <a href="https://unum-cloud.github.io/usearch/golang">GoLang</a> •
 <a href="https://unum-cloud.github.io/usearch/wolfram">Wolfram</a>
 <br/>
-Linux • MacOS • Windows • Docker • WebAssembly
+Linux • MacOS • Windows • iOS • Docker • WebAssembly
 </p>
 
+<div align="center">
+<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/usearch?label=pypi%20downloads">
+<img alt="npm" src="https://img.shields.io/npm/dy/usearch?label=npm%20dowloads">
+<img alt="Crates.io" src="https://img.shields.io/crates/d/usearch?label=crate%20downloads">
+<img alt="GitHub code size in bytes" src="https://img.shields.io/github/languages/code-size/unum-cloud/usearch">
+</div>
+
 ---
 
 - ✅ Benchmark-topping performance.
@@ -42,7 +49,7 @@ Linux • MacOS • Windows • Docker • WebAssembly
 - ✅ Variable dimensionality vectors for unique applications, including search over compressed data.
 - ✅ Bitwise Tanimoto and Sorensen coefficients for [Genomics and Chemistry applications](#usearch--rdkit--molecular-search).
 - ✅ Hardware-agnostic `f16` & `i8` - [half-precision & quarter-precision support](#memory-efficiency-downcasting-and-quantization).
-- ✅ [View large indexes from disk](#disk-based-indexes) without loading into RAM.
+- ✅ [View large indexes from disk](#serving-index-from-disk) without loading into RAM.
 - ✅ Space-efficient point-clouds with `uint40_t`, accommodating 4B+ size.
 - ✅ Compatible with OpenMP and custom "executors", for fine-grained control over CPU utilization.
 - ✅ Heterogeneous lookups, renaming/relabeling, and on-the-fly deletions.
@@ -57,14 +64,15 @@ FAISS is a widely recognized standard for high-performance vector search engines
 USearch and FAISS both employ the same HNSW algorithm, but they differ significantly in their design principles.
 USearch is compact and broadly compatible without sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies.
 
-|                    | FAISS                         | USearch                            |
-| :----------------- | :---------------------------- | :--------------------------------- |
-| Implementation     | 84 K [SLOC][sloc] in `faiss/` | 3 K [SLOC][sloc] in `usearch/`     |
-| Supported metrics  | 9 fixed metrics               | Any User-Defined metrics           |
-| Supported ID types | `uint32_t`, `uint64_t`        | `uint32_t`, `uint40_t`, `uint64_t` |
-| Dependencies       | BLAS, OpenMP                  | None                               |
-| Bindings           | SWIG                          | Native                             |
-| Acceleration       | Learned Quantization          | Downcasting                        |
+|                     | FAISS                         | USearch                            |
+| :------------------ | :---------------------------- | :--------------------------------- |
+| Implementation      | 84 K [SLOC][sloc] in `faiss/` | 3 K [SLOC][sloc] in `usearch/`     |
+| Supported metrics   | 9 fixed metrics               | Any User-Defined metrics           |
+| Supported languages | C++, Python                   | 10 languages                       |
+| Supported ID types  | `uint32_t`, `uint64_t`        | `uint32_t`, `uint40_t`, `uint64_t` |
+| Dependencies        | BLAS, OpenMP                  | None                               |
+| Bindings            | SWIG                          | Native                             |
+| Acceleration        | Learned Quantization          | Downcasting                        |
 
 [sloc]: https://en.wikipedia.org/wiki/Source_lines_of_code
 
@@ -96,6 +104,26 @@ assert matches[0].distance <= 0.001
 assert np.allclose(index[42], vector)
 ```
 
+Comparing the performance of FAISS against USearch on 1 Million 96-dimensional vectors from the famous Deep1B dataset, once can expect the following numbers on modern AWS `c7g.metal` instances.
+
+|              | FAISS, `f32` | USearch, `f32` | USearch, `f16` |     USearch, `i8` |
+| :----------- | -----------: | -------------: | -------------: | ----------------: |
+| Batch Insert |       16 K/s |         73 K/s |        100 K/s | 104 K/s **+550%** |
+| Batch Search |       82 K/s |        103 K/s |        113 K/s |  134 K/s **+63%** |
+| Bulk Insert  |       76 K/s |        105 K/s |        115 K/s | 202 K/s **+165%** |
+| Bulk Search  |      118 K/s |        174 K/s |        173 K/s | 304 K/s **+157%** |
+| Recall @ 10  |          99% |          99.2% |          99.1% |             99.2% |
+
+> HNSW was configured with identical hyper-parameters:
+> connectivity `M=16`,
+> expansion @ construction `efConstruction=128`,
+> and expansion @ search `ef=64`.
+> Batch size is 256.
+> Jump to the [Performance Tuning][benchmarking] section to read about the effects of those hyper-parameters.
+
+[benchmarking]: https://github.com/unum-cloud/usearch/blob/main/docs/benchmarks.md
+
+
 ## User-Defined Functions
 
 While most vector search packages concentrate on just a couple of metrics - "Inner Product distance" and "Euclidean distance," USearch extends this list to include any user-defined metrics.
@@ -124,27 +152,7 @@ Instead, we have focused on high-precision arithmetic over low-precision downcas
 The same index, and `add` and `search` operations will automatically down-cast or up-cast between `f32_t`, `f16_t`, `f64_t`, and `i8_t` representations, even if the hardware doesn't natively support it.
 Continuing the topic of memory efficiency, we provide a `uint40_t` to allow collection with over 4B+ vectors without allocating 8 bytes for every neighbor reference in the proximity graph.
 
-|              | FAISS, `f32` | USearch, `f32` | USearch, `f16` |     USearch, `i8` |
-| :----------- | -----------: | -------------: | -------------: | ----------------: |
-| Batch Insert |       16 K/s |         73 K/s |        100 K/s | 104 K/s **+550%** |
-| Batch Search |       82 K/s |        103 K/s |        113 K/s |  134 K/s **+63%** |
-| Bulk Insert  |       76 K/s |        105 K/s |        115 K/s | 202 K/s **+165%** |
-| Bulk Search  |      118 K/s |        174 K/s |        173 K/s | 304 K/s **+157%** |
-| Recall @ 10  |          99% |          99.2% |          99.1% |             99.2% |
-
-> Dataset: 1M vectors sample of the Deep1B dataset.
-> Hardware: `c7g.metal` AWS instance with 64 cores and DDR5 memory.
-> HNSW was configured with identical hyper-parameters:
-> connectivity `M=16`,
-> expansion @ construction `efConstruction=128`,
-> and expansion @ search `ef=64`.
-> Batch size is 256.
-> Both libraries were compiled for the target architecture.
-> Jump to the [Performance Tuning][benchmarking] section to read about the effects of those hyper-parameters.
-
-[benchmarking]: https://github.com/unum-cloud/usearch/blob/main/docs/benchmarks.md
-
-## Disk-based Indexes
+## Serving `Index` from Disk
 
 With USearch, you can serve indexes from external memory, enabling you to optimize your server choices for indexing speed and serving costs.
 This can result in **20x cost reduction** on AWS and other public clouds.
@@ -159,7 +167,7 @@ other_view = Index(ndim=..., metric=CompiledMetric(...))
 other_view.view("index.usearch")
 ```
 
-## Exact, Approximate, and Multi-Index Lookups
+## Exact vs. Approximate Search
 
 Approximate search methods, such as HNSW, are predominantly used when an exact brute-force search becomes too resource-intensive.
 This typically occurs when you have millions of entries in a collection.
@@ -183,6 +191,8 @@ When compared to FAISS's `IndexFlatL2` in Google Colab, **[USearch may offer up
 - `faiss.IndexFlatL2`: **55.3 ms**.
 - `usearch.index.search`: **2.54 ms**.
 
+## `Indexes` for Multi-Index Lookups
+
 For larger workloads targeting billions or even trillions of vectors, parallel multi-index lookups become invaluable.
 These lookups prevent the need to construct a single, massive index, allowing users to query multiple smaller ones instead.
 
@@ -229,7 +239,7 @@ Broader functionality is ported per request.
 | Add, search             |   ✅    |    ✅     |   ✅   |   ✅   |     ✅      |   ✅   |   ✅    |   ✅   |
 | Save, load, view        |   ✅    |    ✅     |   ✅   |   ✅   |     ✅      |   ✅   |   ✅    |   ✅   |
 | User-defined metrics    |   ✅    |    ✅     |   ✅   |   ❌   |     ❌      |   ❌   |   ❌    |   ❌   |
-| Joins                    |   ✅    |    ✅     |   ❌   |   ❌   |     ❌      |   ❌   |   ❌    |   ❌   |
+| Joins                   |   ✅    |    ✅     |   ❌   |   ❌   |     ❌      |   ❌   |   ❌    |   ❌   |
 | Variable-length vectors |   ✅    |    ❌     |   ❌   |   ❌   |     ❌      |   ❌   |   ❌    |   ❌   |
 | 4B+ capacities          |   ✅    |    ❌     |   ❌   |   ❌   |     ❌      |   ❌   |   ❌    |   ❌   |
 
@@ -321,8 +331,8 @@ matches = index.search(fingerprints, 10)
 
 - [x] GPTCache: [Python](https://github.com/zilliztech/GPTCache/releases/tag/0.1.29).
 - [x] LangChain: [Python](https://github.com/langchain-ai/langchain/releases/tag/v0.0.257) and [JavaScipt](https://github.com/hwchase17/langchainjs/releases/tag/0.0.125).
+- [x] ClickHouse: [C++](https://github.com/ClickHouse/ClickHouse/pull/53447).
 - [ ] Microsoft Semantic Kernel: [Python](https://github.com/microsoft/semantic-kernel/pull/2358) and C#.
-- [ ] ClickHouse: C++.
 
 ## Citations
 

From e463a9466ffeff7f477a5f748658b4d6e9b847e3 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 14:48:36 +0400
Subject: [PATCH 56/70] Docs: Add links to packages

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8497aa5a..7d7fdd2d 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,9 @@ Linux • MacOS • Windows • iOS • Docker • WebAssembly
 </p>
 
 <div align="center">
-<img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/usearch?label=pypi%20downloads">
-<img alt="npm" src="https://img.shields.io/npm/dy/usearch?label=npm%20dowloads">
-<img alt="Crates.io" src="https://img.shields.io/crates/d/usearch?label=crate%20downloads">
+<a href="https://pypi.org/project/usearch/"> <img alt="PyPI - Downloads" src="https://img.shields.io/pypi/dm/usearch?label=pypi%20downloads"> </a>
+<a href="https://www.npmjs.com/package/usearch"> <img alt="npm" src="https://img.shields.io/npm/dy/usearch?label=npm%20dowloads"> </a>
+<a href="https://crates.io/crates/usearch"> <img alt="Crates.io" src="https://img.shields.io/crates/d/usearch?label=crate%20downloads"> </a>
 <img alt="GitHub code size in bytes" src="https://img.shields.io/github/languages/code-size/unum-cloud/usearch">
 </div>
 

From 6415283e0a0c3f96a94bbe021a5da70576b5d640 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 15:47:36 +0400
Subject: [PATCH 57/70] Add: `growing_hash_set_gt` for faster construction

---
 include/usearch/index.hpp       | 215 +++++++++++++++++++++++++++-----
 include/usearch/index_dense.hpp |   6 -
 2 files changed, 182 insertions(+), 39 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 80b45ac2..5fb26178 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -386,10 +386,10 @@ template <typename result_at> struct expected_gt {
 };
 
 /**
- *  @brief  Light-weight bitset implementation to track visited nodes during graph traversal.
+ *  @brief  Light-weight bitset implementation to sync nodes updates during graph mutations.
  *          Extends basic functionality with @b atomic operations.
  */
-template <typename allocator_at = std::allocator<byte_t>> class visits_bitset_gt {
+template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     using allocator_t = allocator_at;
     using byte_t = typename allocator_t::value_type;
     static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes");
@@ -405,8 +405,8 @@ template <typename allocator_at = std::allocator<byte_t>> class visits_bitset_gt
     std::size_t count_{};
 
   public:
-    visits_bitset_gt() noexcept {}
-    ~visits_bitset_gt() noexcept { reset(); }
+    bitset_gt() noexcept {}
+    ~bitset_gt() noexcept { reset(); }
 
     explicit operator bool() const noexcept { return slots_; }
     void clear() noexcept { std::memset(slots_, 0, count_ * sizeof(compressed_slot_t)); }
@@ -418,25 +418,25 @@ template <typename allocator_at = std::allocator<byte_t>> class visits_bitset_gt
         count_ = 0;
     }
 
-    visits_bitset_gt(std::size_t capacity) noexcept
+    bitset_gt(std::size_t capacity) noexcept
         : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))),
           count_(slots_ ? slots(capacity) : 0u) {
         clear();
     }
 
-    visits_bitset_gt(visits_bitset_gt&& other) noexcept {
+    bitset_gt(bitset_gt&& other) noexcept {
         slots_ = exchange(other.slots_, nullptr);
         count_ = exchange(other.count_, 0);
     }
 
-    visits_bitset_gt& operator=(visits_bitset_gt&& other) noexcept {
+    bitset_gt& operator=(bitset_gt&& other) noexcept {
         std::swap(slots_, other.slots_);
         std::swap(count_, other.count_);
         return *this;
     }
 
-    visits_bitset_gt(visits_bitset_gt const&) = delete;
-    visits_bitset_gt& operator=(visits_bitset_gt const&) = delete;
+    bitset_gt(bitset_gt const&) = delete;
+    bitset_gt& operator=(bitset_gt const&) = delete;
 
     inline bool test(std::size_t i) const noexcept { return slots_[i / bits_per_slot()] & (1ul << (i & bits_mask())); }
     inline bool set(std::size_t i) noexcept {
@@ -474,13 +474,12 @@ template <typename allocator_at = std::allocator<byte_t>> class visits_bitset_gt
 #endif
 
     class lock_t {
-        visits_bitset_gt& bitset_;
+        bitset_gt& bitset_;
         std::size_t bit_offset_;
 
       public:
         inline ~lock_t() noexcept { bitset_.atomic_reset(bit_offset_); }
-        inline lock_t(visits_bitset_gt& bitset, std::size_t bit_offset) noexcept
-            : bitset_(bitset), bit_offset_(bit_offset) {
+        inline lock_t(bitset_gt& bitset, std::size_t bit_offset) noexcept : bitset_(bitset), bit_offset_(bit_offset) {
             while (bitset_.atomic_set(bit_offset_))
                 ;
         }
@@ -489,8 +488,6 @@ template <typename allocator_at = std::allocator<byte_t>> class visits_bitset_gt
     inline lock_t lock(std::size_t i) noexcept { return {*this, i}; }
 };
 
-using visits_bitset_t = visits_bitset_gt<>;
-
 /**
  *  @brief  Similar to `std::priority_queue`, but allows raw access to underlying
  *          memory, in case you want to shuffle it or sort. Good for collections
@@ -810,6 +807,149 @@ class usearch_pack_m uint40_t {
 
 static_assert(sizeof(uint40_t) == 5, "uint40_t must be exactly 5 bytes");
 
+// clang-format off
+template <typename key_at, typename std::enable_if<std::is_integral<key_at>::value>::type* = nullptr> key_at default_free_value() { return std::numeric_limits<key_at>::max(); }
+template <typename key_at, typename std::enable_if<std::is_same<key_at, uint40_t>::value>::type* = nullptr> uint40_t default_free_value() { return uint40_t::max(); }
+template <typename key_at, typename std::enable_if<!std::is_integral<key_at>::value && !std::is_same<key_at, uint40_t>::value>::type* = nullptr> key_at default_free_value() { return key_at(); }
+// clang-format on
+
+template <typename element_at> struct hash_gt {
+    std::size_t operator()(element_at const& element) const noexcept { return std::hash<element_at>{}(element); }
+};
+
+template <> struct hash_gt<uint40_t> {
+    std::size_t operator()(uint40_t const& element) const noexcept { return std::hash<std::size_t>{}(element); }
+};
+
+/**
+ *  @brief  Minimalistic hash-set implementation to track visited nodes during graph traversal.
+ *
+ *  It doesn't support deletion of separate objects, but supports `clear`-ing all at once.
+ *  It expects `reserve` to be called ahead of all insertions, so no resizes are needed.
+ *  It also assumes `0xFF...FF` slots to be unused, to simplify the design.
+ *  It uses linear probing, the number of slots is always a power of two, and it uses linear-probing
+ *  in case of bucket collisions.
+ */
+template <typename element_at, typename hasher_at = hash_gt<element_at>, typename allocator_at = std::allocator<byte_t>>
+class growing_hash_set_gt {
+
+    using element_t = element_at;
+    using hasher_t = hasher_at;
+
+    using allocator_t = allocator_at;
+    using byte_t = typename allocator_t::value_type;
+    static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes");
+
+    element_t* slots_{};
+    /// @brief Number of slots.
+    std::size_t capacity_{};
+    /// @brief Number of populated.
+    std::size_t count_{};
+    hasher_t hasher_{};
+
+  public:
+    growing_hash_set_gt() noexcept {}
+    ~growing_hash_set_gt() noexcept { reset(); }
+
+    explicit operator bool() const noexcept { return slots_; }
+    std::size_t size() const noexcept { return count_; }
+
+    void clear() noexcept {
+        std::memset(slots_, 0xFF, capacity_ * sizeof(element_t));
+        count_ = 0;
+    }
+
+    void reset() noexcept {
+        if (slots_)
+            allocator_t{}.deallocate((byte_t*)slots_, capacity_ * sizeof(element_t));
+        slots_ = nullptr;
+        capacity_ = 0;
+        count_ = 0;
+    }
+
+    growing_hash_set_gt(std::size_t capacity) noexcept
+        : slots_((element_t*)allocator_t{}.allocate(ceil2(capacity) * sizeof(element_t))),
+          capacity_(slots_ ? ceil2(capacity) : 0u), count_(0u) {
+        clear();
+    }
+
+    growing_hash_set_gt(growing_hash_set_gt&& other) noexcept {
+        slots_ = exchange(other.slots_, nullptr);
+        capacity_ = exchange(other.capacity_, 0);
+        count_ = exchange(other.count_, 0);
+    }
+
+    growing_hash_set_gt& operator=(growing_hash_set_gt&& other) noexcept {
+        std::swap(slots_, other.slots_);
+        std::swap(capacity_, other.capacity_);
+        std::swap(count_, other.count_);
+        return *this;
+    }
+
+    growing_hash_set_gt(growing_hash_set_gt const&) = delete;
+    growing_hash_set_gt& operator=(growing_hash_set_gt const&) = delete;
+
+    inline bool test(element_t const& elem) const noexcept {
+        std::size_t index = hasher_(elem) & (capacity_ - 1);
+        while (slots_[index] != default_free_value<element_t>()) {
+            if (slots_[index] == elem)
+                return true;
+
+            index = (index + 1) & (capacity_ - 1);
+        }
+        return false;
+    }
+
+    /**
+     *
+     *  @return Similar to `bitset_gt`, returns the previous value.
+     */
+    inline bool set(element_t const& elem) noexcept {
+        std::size_t index = hasher_(elem) & (capacity_ - 1);
+        while (slots_[index] != default_free_value<element_t>()) {
+            // Already exists
+            if (slots_[index] == elem)
+                return true;
+
+            index = (index + 1) & (capacity_ - 1);
+        }
+        slots_[index] = elem;
+        ++count_;
+        return false;
+    }
+
+    bool reserve(std::size_t new_capacity) noexcept {
+        new_capacity = new_capacity * 5 / 3;
+        if (new_capacity <= capacity_)
+            return true;
+
+        new_capacity = ceil2(new_capacity);
+        element_t* new_slots = (element_t*)allocator_t{}.allocate(new_capacity * sizeof(element_t));
+        if (!new_slots)
+            return false;
+
+        std::memset(new_slots, 0xFF, new_capacity * sizeof(element_t));
+        std::size_t new_count = count_;
+        if (new_count) {
+            for (std::size_t old_index = 0; old_index < capacity_; ++old_index) {
+                if (slots_[old_index] == default_free_value<element_t>())
+                    continue;
+
+                std::size_t new_index = hasher_(slots_[old_index]) & (new_capacity - 1);
+                while (new_slots[new_index] != default_free_value<element_t>())
+                    new_index = (new_index + 1) & (new_capacity - 1);
+                new_slots[new_index] = slots_[old_index];
+            }
+        }
+
+        reset();
+        slots_ = new_slots;
+        capacity_ = new_capacity;
+        count_ = new_count;
+        return true;
+    }
+};
+
 /**
  *  @brief  Basic single-threaded @b ring class, used for all kinds of task queues.
  */
@@ -1626,7 +1766,9 @@ class index_gt {
      */
     static constexpr std::size_t node_head_bytes_() { return sizeof(key_t) + sizeof(level_t); }
 
-    using visits_bitset_t = visits_bitset_gt<dynamic_allocator_t>;
+    using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+
+    using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
 
     struct precomputed_constants_t {
         double inverse_log_connectivity{};
@@ -1720,7 +1862,7 @@ class index_gt {
     struct usearch_align_m context_t {
         top_candidates_t top_candidates{};
         next_candidates_t next_candidates{};
-        visits_bitset_t visits{};
+        visits_hash_set_t visits{};
         std::default_random_engine level_generator{};
         std::size_t iteration_cycles{};
         std::size_t computed_distances_count{};
@@ -1777,7 +1919,7 @@ class index_gt {
     buffer_gt<node_t, nodes_allocator_t> nodes_{};
 
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
-    mutable visits_bitset_t nodes_mutexes_{};
+    mutable nodes_mutexes_t nodes_mutexes_{};
 
     using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>;
 
@@ -1940,18 +2082,12 @@ class index_gt {
             && limits.members <= limits_.members)
             return true;
 
-        visits_bitset_t new_mutexes(limits.members);
+        nodes_mutexes_t new_mutexes(limits.members);
         buffer_gt<node_t, nodes_allocator_t> new_nodes(limits.members);
         buffer_gt<context_t, contexts_allocator_t> new_contexts(limits.threads());
         if (!new_nodes || !new_contexts || !new_mutexes)
             return false;
 
-        for (context_t& context : new_contexts) {
-            context.visits = visits_bitset_t(limits.members);
-            if (!context.visits)
-                return false;
-        }
-
         // Move the nodes info, and deallocate previous buffers.
         if (nodes_)
             std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
@@ -2885,9 +3021,9 @@ class index_gt {
     }
 
     struct node_lock_t {
-        visits_bitset_t& bitset;
+        nodes_mutexes_t& mutexes;
         std::size_t slot;
-        inline ~node_lock_t() noexcept { bitset.atomic_reset(slot); }
+        inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
     };
 
     inline node_lock_t node_lock_(std::size_t slot) const noexcept {
@@ -2997,7 +3133,7 @@ class index_gt {
 
         index_gt const& index_;
         neighbors_ref_t neighbors_;
-        visits_bitset_t& visits_;
+        visits_hash_set_t& visits_;
         std::size_t current_;
 
         candidates_iterator_t& skip_missing() noexcept {
@@ -3020,7 +3156,7 @@ class index_gt {
         using reference = misaligned_ref_gt<element_t>;
 
         reference operator*() const noexcept { return slot(); }
-        candidates_iterator_t(index_gt const& index, neighbors_ref_t neighbors, visits_bitset_t& visits,
+        candidates_iterator_t(index_gt const& index, neighbors_ref_t neighbors, visits_hash_set_t& visits,
                               std::size_t progress) noexcept
             : index_(index), neighbors_(neighbors), visits_(visits), current_(progress) {}
         candidates_iterator_t operator++(int) noexcept {
@@ -3043,7 +3179,7 @@ class index_gt {
     struct candidates_range_t {
         index_gt const& index;
         neighbors_ref_t neighbors;
-        visits_bitset_t& visits;
+        visits_hash_set_t& visits;
 
         candidates_iterator_t begin() const noexcept {
             return candidates_iterator_t{index, neighbors, visits, 0}.skip_missing();
@@ -3056,7 +3192,7 @@ class index_gt {
         value_at&& query, metric_at&& metric, prefetch_at&& prefetch, //
         std::size_t closest_slot, level_t begin_level, level_t end_level, context_t& context) const noexcept {
 
-        visits_bitset_t& visits = context.visits;
+        visits_hash_set_t& visits = context.visits;
         visits.clear();
 
         // Optional prefetching
@@ -3103,13 +3239,15 @@ class index_gt {
         std::size_t start_slot, std::size_t new_slot, level_t level, std::size_t top_limit,
         context_t& context) noexcept {
 
-        visits_bitset_t& visits = context.visits;
+        visits_hash_set_t& visits = context.visits;
         next_candidates_t& next = context.next_candidates; // pop min, push
         top_candidates_t& top = context.top_candidates;    // pop max, push
 
         visits.clear();
         next.clear();
         top.clear();
+        if (!visits.reserve(config_.connectivity_base + 1u))
+            return false;
 
         // Optional prefetching
         if (!std::is_same<prefetch_at, dummy_prefetch_t>::value)
@@ -3142,6 +3280,10 @@ class index_gt {
                 prefetch(missing_candidates.begin(), missing_candidates.end());
             }
 
+            // Assume the worst-case when reserving memory
+            if (!visits.reserve(visits.size() + candidate_neighbors.size()))
+                return false;
+
             for (compressed_slot_t successor_slot : candidate_neighbors) {
                 if (visits.set(successor_slot))
                     continue;
@@ -3170,7 +3312,7 @@ class index_gt {
         value_at&& query, metric_at&& metric, predicate_at&& predicate, prefetch_at&& prefetch, //
         std::size_t start_slot, std::size_t expansion, context_t& context) const noexcept {
 
-        visits_bitset_t& visits = context.visits;
+        visits_hash_set_t& visits = context.visits;
         next_candidates_t& next = context.next_candidates; // pop min, push
         top_candidates_t& top = context.top_candidates;    // pop max, push
         std::size_t const top_limit = expansion;
@@ -3178,6 +3320,8 @@ class index_gt {
         visits.clear();
         next.clear();
         top.clear();
+        if (!visits.reserve(config_.connectivity_base + 1u))
+            return false;
 
         // Optional prefetching
         if (!std::is_same<prefetch_at, dummy_prefetch_t>::value)
@@ -3205,6 +3349,10 @@ class index_gt {
                 prefetch(missing_candidates.begin(), missing_candidates.end());
             }
 
+            // Assume the worst-case when reserving memory
+            if (!visits.reserve(visits.size() + candidate_neighbors.size()))
+                return false;
+
             for (compressed_slot_t successor_slot : candidate_neighbors) {
                 if (visits.set(successor_slot))
                     continue;
@@ -3370,6 +3518,7 @@ static join_result_t join(               //
     config.max_proposals = (std::min)(men.size(), config.max_proposals);
 
     using distance_t = typename men_at::distance_t;
+    using dynamic_allocator_t = typename men_at::dynamic_allocator_t;
     using dynamic_allocator_traits_t = typename men_at::dynamic_allocator_traits_t;
     using man_key_t = typename men_at::key_t;
     using woman_key_t = typename women_at::key_t;
@@ -3401,7 +3550,7 @@ static join_result_t join(               //
     std::memset(proposal_counts.data(), 0, sizeof(proposals_count_t) * men.size());
 
     // Define locks, to limit concurrent accesses to `man_to_woman_slots` and `woman_to_man_slots`.
-    visits_bitset_t men_locks(men.size()), women_locks(women.size());
+    bitset_gt<dynamic_allocator_t> men_locks(men.size()), women_locks(women.size());
     if (!men_locks || !women_locks)
         return result.failed("Can't allocate locks");
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 86fbc31c..4ba2c045 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -16,12 +16,6 @@ namespace usearch {
 
 template <typename, typename> class index_dense_gt;
 
-// clang-format off
-template <typename key_at, typename std::enable_if<std::is_integral<key_at>::value>::type* = nullptr> key_at default_free_value() { return std::numeric_limits<key_at>::max(); }
-template <typename key_at, typename std::enable_if<std::is_same<key_at, uint40_t>::value>::type* = nullptr> uint40_t default_free_value() { return uint40_t::max(); }
-template <typename key_at, typename std::enable_if<!std::is_integral<key_at>::value && !std::is_same<key_at, uint40_t>::value>::type* = nullptr> key_at default_free_value() { return key_at(); }
-// clang-format on
-
 /**
  *  @brief  The "magic" sequence helps infer the type of the file.
  *          USearch indexes start with the "usearch" string.

From f18bd56bfbb6bc6ca347963f4cfc3cd79dbfa968 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 17:22:28 +0400
Subject: [PATCH 58/70] Refactor: Avoid typename collision on Windows

---
 python/lib.cpp | 83 ++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/python/lib.cpp b/python/lib.cpp
index 1c43a075..7c77b605 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -44,10 +44,11 @@ enum class metric_signature_t {
 
 namespace py = pybind11;
 using py_shape_t = py::array::ShapeContainer;
-#define key_t typename index_dense_t::key_t
+
 using metric_t = metric_punned_t;
 using distance_t = distance_punned_t;
 
+using dense_key_t = typename index_dense_t::key_t;
 using dense_add_result_t = typename index_dense_t::add_result_t;
 using dense_search_result_t = typename index_dense_t::search_result_t;
 using dense_labeling_result_t = typename index_dense_t::labeling_result_t;
@@ -215,7 +216,7 @@ static void add_typed_to_index(                                            //
     atomic_error_t atomic_error{nullptr};
 
     executor_default_t{threads}.dynamic(vectors_count, [&](std::size_t thread_idx, std::size_t task_idx) {
-        key_t key = *reinterpret_cast<key_t const*>(keys_data + task_idx * keys_info.strides[0]);
+        dense_key_t key = *reinterpret_cast<dense_key_t const*>(keys_data + task_idx * keys_info.strides[0]);
         scalar_at const* vector = reinterpret_cast<scalar_at const*>(vectors_data + task_idx * vectors_info.strides[0]);
         dense_add_result_t result = index.add(key, vector, thread_idx, force_copy);
         if (!result) {
@@ -246,7 +247,7 @@ static void add_many_to_index(                            //
     py::buffer_info keys_info = keys.request();
     py::buffer_info vectors_info = vectors.request();
 
-    if (keys_info.itemsize != sizeof(key_t))
+    if (keys_info.itemsize != sizeof(dense_key_t))
         throw std::invalid_argument("Incompatible key type!");
 
     if (keys_info.ndim != 1)
@@ -285,7 +286,7 @@ template <typename scalar_at>
 static void search_typed(                                   //
     dense_index_py_t& index, py::buffer_info& vectors_info, //
     std::size_t wanted, bool exact, std::size_t threads,    //
-    py::array_t<key_t>& keys_py, py::array_t<distance_t>& distances_py, py::array_t<Py_ssize_t>& counts_py,
+    py::array_t<dense_key_t>& keys_py, py::array_t<distance_t>& distances_py, py::array_t<Py_ssize_t>& counts_py,
     std::atomic<std::size_t>& stats_visited_members, std::atomic<std::size_t>& stats_computed_distances) {
 
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
@@ -334,7 +335,7 @@ template <typename scalar_at>
 static void search_typed(                                       //
     dense_indexes_py_t& indexes, py::buffer_info& vectors_info, //
     std::size_t wanted, bool exact, std::size_t threads,        //
-    py::array_t<key_t>& keys_py, py::array_t<distance_t>& distances_py, py::array_t<Py_ssize_t>& counts_py,
+    py::array_t<dense_key_t>& keys_py, py::array_t<distance_t>& distances_py, py::array_t<Py_ssize_t>& counts_py,
     std::atomic<std::size_t>& stats_visited_members, std::atomic<std::size_t>& stats_computed_distances) {
 
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
@@ -432,7 +433,7 @@ static py::tuple search_many_in_index( //
     if (vectors_dimensions != static_cast<Py_ssize_t>(index.scalar_words()))
         throw std::invalid_argument("The number of vector dimensions doesn't match!");
 
-    py::array_t<key_t> keys_py({vectors_count, static_cast<Py_ssize_t>(wanted)});
+    py::array_t<dense_key_t> keys_py({vectors_count, static_cast<Py_ssize_t>(wanted)});
     py::array_t<distance_t> distances_py({vectors_count, static_cast<Py_ssize_t>(wanted)});
     py::array_t<Py_ssize_t> counts_py(vectors_count);
     std::atomic<std::size_t> stats_visited_members(0);
@@ -462,7 +463,7 @@ template <typename scalar_at>
 static void search_typed_brute_force(                                //
     py::buffer_info& dataset_info, py::buffer_info& queries_info,    //
     std::size_t wanted, std::size_t threads, metric_t const& metric, //
-    py::array_t<key_t>& keys_py, py::array_t<distance_t>& distances_py, py::array_t<Py_ssize_t>& counts_py) {
+    py::array_t<dense_key_t>& keys_py, py::array_t<distance_t>& distances_py, py::array_t<Py_ssize_t>& counts_py) {
 
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
     auto distances_py2d = distances_py.template mutable_unchecked<2>();
@@ -495,7 +496,7 @@ static void search_typed_brute_force(                                //
 
         {
             auto lock = query_mutexes.lock(query_idx);
-            key_t* keys = &keys_py2d(query_idx, 0);
+            dense_key_t* keys = &keys_py2d(query_idx, 0);
             distance_t* distances = &distances_py2d(query_idx, 0);
             std::size_t& matches = reinterpret_cast<std::size_t&>(counts_py1d(query_idx));
             if (matches == wanted)
@@ -505,9 +506,9 @@ static void search_typed_brute_force(                                //
             std::size_t offset = std::lower_bound(distances, distances + matches, distance) - distances;
 
             std::size_t count_worse = matches - offset - (wanted == matches);
-            std::memmove(keys + offset + 1, keys + offset, count_worse * sizeof(key_t));
+            std::memmove(keys + offset + 1, keys + offset, count_worse * sizeof(dense_key_t));
             std::memmove(distances + offset + 1, distances + offset, count_worse * sizeof(distance_t));
-            keys[offset] = static_cast<key_t>(dataset_idx);
+            keys[offset] = static_cast<dense_key_t>(dataset_idx);
             distances[offset] = distance;
             matches += matches != wanted;
         }
@@ -547,7 +548,7 @@ static py::tuple search_many_brute_force(    //
     if (dataset_kind != queries_kind)
         throw std::invalid_argument("The types of vectors don't match!");
 
-    py::array_t<key_t> keys_py({dataset_count, static_cast<Py_ssize_t>(wanted)});
+    py::array_t<dense_key_t> keys_py({dataset_count, static_cast<Py_ssize_t>(wanted)});
     py::array_t<distance_t> distances_py({dataset_count, static_cast<Py_ssize_t>(wanted)});
     py::array_t<Py_ssize_t> counts_py(dataset_count);
 
@@ -617,14 +618,14 @@ static py::tuple cluster_vectors(        //
     if (queries_dimensions != index.scalar_words())
         throw std::invalid_argument("The number of vector dimensions doesn't match!");
 
-    py::array_t<key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
+    py::array_t<dense_key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
     py::array_t<distance_t> distances_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
     dense_clustering_result_t cluster_result;
     executor_default_t executor{threads};
 
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
     auto distances_py2d = distances_py.template mutable_unchecked<2>();
-    key_t* keys_ptr = reinterpret_cast<key_t*>(&keys_py2d(0, 0));
+    dense_key_t* keys_ptr = reinterpret_cast<dense_key_t*>(&keys_py2d(0, 0));
     distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
     index_dense_clustering_config_t config;
@@ -674,8 +675,8 @@ static py::tuple cluster_vectors(        //
  *      4. number of computed pairwise distances.
  */
 template <typename index_at>
-static py::tuple cluster_keys(                      //
-    index_at& index, py::array_t<key_t> queries_py, //
+static py::tuple cluster_keys(                            //
+    index_at& index, py::array_t<dense_key_t> queries_py, //
     std::size_t min_count, std::size_t max_count, std::size_t threads) {
 
     if (index.limits().threads_search < threads)
@@ -683,16 +684,16 @@ static py::tuple cluster_keys(                      //
 
     std::size_t queries_count = static_cast<std::size_t>(queries_py.size());
     auto queries_py1d = queries_py.template unchecked<1>();
-    key_t const* queries_begin = &queries_py1d(0);
-    key_t const* queries_end = queries_begin + queries_count;
+    dense_key_t const* queries_begin = &queries_py1d(0);
+    dense_key_t const* queries_end = queries_begin + queries_count;
 
-    py::array_t<key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
+    py::array_t<dense_key_t> keys_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
     py::array_t<distance_t> distances_py({Py_ssize_t(queries_count), Py_ssize_t(1)});
     executor_default_t executor{threads};
 
     auto keys_py2d = keys_py.template mutable_unchecked<2>();
     auto distances_py2d = distances_py.template mutable_unchecked<2>();
-    key_t* keys_ptr = reinterpret_cast<key_t*>(&keys_py2d(0, 0));
+    dense_key_t* keys_ptr = reinterpret_cast<dense_key_t*>(&keys_py2d(0, 0));
     distance_t* distances_ptr = reinterpret_cast<distance_t*>(&distances_py2d(0, 0));
 
     index_dense_clustering_config_t config;
@@ -718,11 +719,11 @@ static py::tuple cluster_keys(                      //
     return results;
 }
 
-static std::unordered_map<key_t, key_t> join_index(       //
-    dense_index_py_t const& a, dense_index_py_t const& b, //
+static std::unordered_map<dense_key_t, dense_key_t> join_index( //
+    dense_index_py_t const& a, dense_index_py_t const& b,       //
     std::size_t max_proposals, bool exact) {
 
-    std::unordered_map<key_t, key_t> a_to_b;
+    std::unordered_map<dense_key_t, dense_key_t> a_to_b;
     dummy_key_to_key_mapping_t b_to_a;
     a_to_b.reserve((std::min)(a.size(), b.size()));
 
@@ -781,7 +782,7 @@ static py::tuple get_typed_vectors_for_keys(index_at const& index, py::buffer ke
     py::tuple results(keys_count);
 
     for (Py_ssize_t task_idx = 0; task_idx != keys_count; ++task_idx) {
-        key_t key = *reinterpret_cast<key_t const*>(keys_data + task_idx * keys_info.strides[0]);
+        dense_key_t key = *reinterpret_cast<dense_key_t const*>(keys_data + task_idx * keys_info.strides[0]);
         std::size_t vectors_count = index.count(key);
         if (!vectors_count) {
             results[task_idx] = py::none();
@@ -951,7 +952,7 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "rename_one_to_one",
-        [](dense_index_py_t& index, key_t from, key_t to) -> bool {
+        [](dense_index_py_t& index, dense_key_t from, dense_key_t to) -> bool {
             dense_labeling_result_t result = index.rename(from, to);
             forward_error(result);
             return result.completed;
@@ -960,7 +961,8 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "rename_many_to_many",
-        [](dense_index_py_t& index, std::vector<key_t> const& from, std::vector<key_t> const& to) -> std::vector<bool> {
+        [](dense_index_py_t& index, std::vector<dense_key_t> const& from,
+           std::vector<dense_key_t> const& to) -> std::vector<bool> {
             if (from.size() != to.size())
                 throw std::invalid_argument("Sizes of `from` and `to` arrays don't match!");
 
@@ -976,7 +978,7 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "rename_many_to_one",
-        [](dense_index_py_t& index, std::vector<key_t> const& from, key_t to) -> std::vector<bool> {
+        [](dense_index_py_t& index, std::vector<dense_key_t> const& from, dense_key_t to) -> std::vector<bool> {
             std::vector<bool> results(from.size(), false);
             for (std::size_t i = 0; i != from.size(); ++i) {
                 dense_labeling_result_t result = index.rename(from[i], to);
@@ -989,7 +991,7 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "remove_one",
-        [](dense_index_py_t& index, key_t key, bool compact, std::size_t threads) -> bool {
+        [](dense_index_py_t& index, dense_key_t key, bool compact, std::size_t threads) -> bool {
             dense_labeling_result_t result = index.remove(key);
             forward_error(result);
             if (!compact)
@@ -1007,7 +1009,8 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "remove_many",
-        [](dense_index_py_t& index, std::vector<key_t> const& keys, bool compact, std::size_t threads) -> std::size_t {
+        [](dense_index_py_t& index, std::vector<dense_key_t> const& keys, bool compact,
+           std::size_t threads) -> std::size_t {
             dense_labeling_result_t result = index.remove(keys.begin(), keys.end());
             forward_error(result);
             if (!compact)
@@ -1062,7 +1065,8 @@ PYBIND11_MODULE(compiled, m) {
     i.def("count_one", &dense_index_py_t::count);
 
     i.def( //
-        "contains_many", [](dense_index_py_t const& index, py::array_t<key_t> const& keys_py) -> py::array_t<bool> {
+        "contains_many",
+        [](dense_index_py_t const& index, py::array_t<dense_key_t> const& keys_py) -> py::array_t<bool> {
             py::array_t<bool> results_py(keys_py.size());
             auto results_py1d = results_py.template mutable_unchecked<1>();
             auto keys_py1d = keys_py.template unchecked<1>();
@@ -1072,7 +1076,8 @@ PYBIND11_MODULE(compiled, m) {
         });
 
     i.def( //
-        "count_many", [](dense_index_py_t const& index, py::array_t<key_t> const& keys_py) -> py::array_t<std::size_t> {
+        "count_many",
+        [](dense_index_py_t const& index, py::array_t<dense_key_t> const& keys_py) -> py::array_t<std::size_t> {
             py::array_t<std::size_t> results_py(keys_py.size());
             auto results_py1d = results_py.template mutable_unchecked<1>();
             auto keys_py1d = keys_py.template unchecked<1>();
@@ -1083,8 +1088,8 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def( //
         "pairwise_distances",
-        [](dense_index_py_t const& index, py::array_t<key_t> const& left_py,
-           py::array_t<key_t> const& right_py) -> py::array_t<distance_t> {
+        [](dense_index_py_t const& index, py::array_t<dense_key_t> const& left_py,
+           py::array_t<dense_key_t> const& right_py) -> py::array_t<distance_t> {
             py::array_t<distance_t> results_py(left_py.size());
             auto results_py1d = results_py.template mutable_unchecked<1>();
             auto left_py1d = left_py.template unchecked<1>();
@@ -1095,7 +1100,7 @@ PYBIND11_MODULE(compiled, m) {
         });
 
     i.def( //
-        "pairwise_distance", [](dense_index_py_t const& index, key_t left, key_t right) -> distance_t {
+        "pairwise_distance", [](dense_index_py_t const& index, dense_key_t left, dense_key_t right) -> distance_t {
             return index.distance_between(left, right).min;
         });
 
@@ -1103,9 +1108,9 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "get_keys_in_slice",
-        [](dense_index_py_t const& index, std::size_t offset, std::size_t limit) -> py::array_t<key_t> {
+        [](dense_index_py_t const& index, std::size_t offset, std::size_t limit) -> py::array_t<dense_key_t> {
             limit = std::min(index.size(), limit);
-            py::array_t<key_t> result_py(static_cast<Py_ssize_t>(limit));
+            py::array_t<dense_key_t> result_py(static_cast<Py_ssize_t>(limit));
             auto result_py1d = result_py.template mutable_unchecked<1>();
             index.export_keys(&result_py1d(0), offset, limit);
             return result_py;
@@ -1114,8 +1119,8 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "get_keys_at_offsets",
-        [](dense_index_py_t const& index, py::array_t<Py_ssize_t> const& offsets_py) -> py::array_t<key_t> {
-            py::array_t<key_t> result_py(offsets_py.size());
+        [](dense_index_py_t const& index, py::array_t<Py_ssize_t> const& offsets_py) -> py::array_t<dense_key_t> {
+            py::array_t<dense_key_t> result_py(offsets_py.size());
             auto result_py1d = result_py.template mutable_unchecked<1>();
             auto offsets_py1d = offsets_py.template unchecked<1>();
             for (Py_ssize_t task_idx = 0; task_idx != offsets_py.size(); ++task_idx)
@@ -1126,8 +1131,8 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def(
         "get_key_at_offset",
-        [](dense_index_py_t const& index, std::size_t offset) -> key_t {
-            key_t result;
+        [](dense_index_py_t const& index, std::size_t offset) -> dense_key_t {
+            dense_key_t result;
             index.export_keys(&result, offset, 1);
             return result;
         },

From 21f144e0f98cecf4619781dc9844666ebb21e7d4 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 17:38:38 +0400
Subject: [PATCH 59/70] Fix: Argument name in `self_recall`

---
 python/usearch/eval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/usearch/eval.py b/python/usearch/eval.py
index 412a7f90..100efc48 100644
--- a/python/usearch/eval.py
+++ b/python/usearch/eval.py
@@ -103,8 +103,8 @@ def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
     """
     if len(index) == 0:
         return 0
-    if "k" not in kwargs:
-        kwargs["k"] = 1
+    if "count" not in kwargs:
+        kwargs["count"] = 1
 
     keys = index.keys
     if sample != 1:

From 052b837c290913dedfe1380aa0841cc0352976cc Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 18:28:28 +0400
Subject: [PATCH 60/70] Fix: Spinning through empty set

---
 include/usearch/index.hpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 5fb26178..ed3d2bf3 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -919,7 +919,7 @@ class growing_hash_set_gt {
     }
 
     bool reserve(std::size_t new_capacity) noexcept {
-        new_capacity = new_capacity * 5 / 3;
+        new_capacity = (new_capacity * 5u) / 3u;
         if (new_capacity <= capacity_)
             return true;
 
@@ -930,8 +930,8 @@ class growing_hash_set_gt {
 
         std::memset(new_slots, 0xFF, new_capacity * sizeof(element_t));
         std::size_t new_count = count_;
-        if (new_count) {
-            for (std::size_t old_index = 0; old_index < capacity_; ++old_index) {
+        if (count_) {
+            for (std::size_t old_index = 0; old_index != capacity_; ++old_index) {
                 if (slots_[old_index] == default_free_value<element_t>())
                     continue;
 
@@ -3137,6 +3137,8 @@ class index_gt {
         std::size_t current_;
 
         candidates_iterator_t& skip_missing() noexcept {
+            if (!visits_.size())
+                return *this;
             while (current_ != neighbors_.size()) {
                 compressed_slot_t neighbor_slot = neighbors_[current_];
                 if (visits_.test(neighbor_slot))

From 4ce7f5b4fc90f07e8d1a2054c0b6d660c3e2f0ea Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 18:28:57 +0400
Subject: [PATCH 61/70] Improve: Pass specific keys to `self_recall`

---
 python/usearch/eval.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/usearch/eval.py b/python/usearch/eval.py
index 100efc48..340ccbee 100644
--- a/python/usearch/eval.py
+++ b/python/usearch/eval.py
@@ -106,7 +106,11 @@ def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
     if "count" not in kwargs:
         kwargs["count"] = 1
 
-    keys = index.keys
+    if "keys" in kwargs:
+        keys = kwargs["keys"]
+    else:
+        keys = np.array(index.keys)
+
     if sample != 1:
         keys = np.random.choice(keys, int(ceil(len(keys) * sample)))
 

From 9fe5735eedaf7d22098ebe931b007e806dbe54d7 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 18:29:47 +0400
Subject: [PATCH 62/70] Fix: `nd.array(index.keys)` type

---
 python/usearch/index.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/usearch/index.py b/python/usearch/index.py
index f8ed3cc2..5a87dd19 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -433,6 +433,8 @@ def __getitem__(
             return self.index._compiled.get_key_at_offset(offset)
 
     def __array__(self, dtype=None) -> np.ndarray:
+        if dtype is None:
+            dtype = Key
         return self.index._compiled.get_keys_in_slice().astype(dtype)
 
 

From eb3f91d11126f53a6ec7a5c234b18ea6b8d63e1f Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 19:15:47 +0400
Subject: [PATCH 63/70] Fix: `bitset_t` type alias for Pyhton

---
 include/usearch/index.hpp | 4 +++-
 python/lib.cpp            | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index ed3d2bf3..5e0e9bca 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -488,6 +488,8 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
     inline lock_t lock(std::size_t i) noexcept { return {*this, i}; }
 };
 
+using bitset_t = bitset_gt<>;
+
 /**
  *  @brief  Similar to `std::priority_queue`, but allows raw access to underlying
  *          memory, in case you want to shuffle it or sort. Good for collections
@@ -3552,7 +3554,7 @@ static join_result_t join(               //
     std::memset(proposal_counts.data(), 0, sizeof(proposals_count_t) * men.size());
 
     // Define locks, to limit concurrent accesses to `man_to_woman_slots` and `woman_to_man_slots`.
-    bitset_gt<dynamic_allocator_t> men_locks(men.size()), women_locks(women.size());
+    bitset_t men_locks(men.size()), women_locks(women.size());
     if (!men_locks || !women_locks)
         return result.failed("Can't allocate locks");
 
diff --git a/python/lib.cpp b/python/lib.cpp
index 7c77b605..844db8f0 100644
--- a/python/lib.cpp
+++ b/python/lib.cpp
@@ -350,7 +350,7 @@ static void search_typed(                                       //
     if (!threads)
         threads = std::thread::hardware_concurrency();
 
-    visits_bitset_t query_mutexes(static_cast<std::size_t>(vectors_count));
+    bitset_t query_mutexes(static_cast<std::size_t>(vectors_count));
     if (!query_mutexes)
         throw std::bad_alloc();
 
@@ -481,7 +481,7 @@ static void search_typed_brute_force(                                //
         threads = std::thread::hardware_concurrency();
 
     std::size_t tasks_count = static_cast<std::size_t>(dataset_count * queries_count);
-    visits_bitset_t query_mutexes(static_cast<std::size_t>(queries_count));
+    bitset_t query_mutexes(static_cast<std::size_t>(queries_count));
     if (!query_mutexes)
         throw std::bad_alloc();
 

From 1e86a319a27eb3c0d545dd2ae492b7a3a71a93c0 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 19:20:51 +0400
Subject: [PATCH 64/70] Fix: Missing `#endif`

---
 c/usearch.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/c/usearch.h b/c/usearch.h
index 04ff326f..91069a65 100644
--- a/c/usearch.h
+++ b/c/usearch.h
@@ -11,6 +11,7 @@ extern "C" {
 #else
 #define USEARCH_EXPORT
 #endif
+#endif
 
 #include <stdbool.h> // `bool`
 #include <stdint.h>  // `size_t`
@@ -216,4 +217,5 @@ USEARCH_EXPORT size_t usearch_rename(usearch_index_t, usearch_key_t from, usearc
 #ifdef __cplusplus
 }
 #endif
-#endif
+
+#endif // UNUM_USEARCH_H

From c9601806bf841e7ff26c474611c02c2128c19736 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 21:51:50 +0400
Subject: [PATCH 65/70] Fix: Cast keys on `Index.get`

---
 python/usearch/index.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/usearch/index.py b/python/usearch/index.py
index 5a87dd19..23053577 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -725,6 +725,8 @@ def cast(result):
             keys = [keys]
         if not isinstance(keys, np.ndarray):
             keys = np.array(keys, dtype=Key)
+        else:
+            keys = keys.astype(Key)
 
         results = self._compiled.get_many(keys, dtype)
         results = [cast(result) for result in results]

From 361f0fae446cb7a7b685e5a7e140d165ab4149c8 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 21:58:32 +0400
Subject: [PATCH 66/70] Fix: Close file descriptor before removing

---
 python/scripts/test_index.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index e35ad0cf..46d4606e 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -155,6 +155,7 @@ def test_index_save_load_restore_copy(ndim, quantization, batch_size):
     assert len(copied_index) == len(index)
     assert np.allclose(np.vstack(copied_index.get(keys)), np.vstack(index.get(keys)))
 
+    index.reset()
     os.remove("tmp.usearch")
 
 

From 3355e649e829a315222dd0b4d3ea7297403847b0 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 23 Aug 2023 22:13:43 +0400
Subject: [PATCH 67/70] Refactor: Updating eval scripts

---
 python/scripts/join.py | 12 ++++++------
 python/usearch/eval.py |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/scripts/join.py b/python/scripts/join.py
index 43ddcac9..537b72a5 100644
--- a/python/scripts/join.py
+++ b/python/scripts/join.py
@@ -21,7 +21,7 @@
 from usearch.io import load_matrix
 from usearch.eval import measure_seconds
 
-k = 10
+count = 10
 exact = False
 batch_size = 1024 * 4
 max_elements = 1000000
@@ -104,7 +104,7 @@
 
     dt = measure_seconds
     args = dict(
-        k=k,
+        count=count,
         batch_size=batch_size,
         log=True,
         exact=exact,
@@ -113,28 +113,28 @@
     secs, a_self_recall = dt(lambda: a.search(a.vectors, **args).recall(a.keys))
     print(
         "Self-recall @{} of {} index: {:.2f}%, took {:.2f}s".format(
-            k, a_name, a_self_recall * 100, secs
+            count, a_name, a_self_recall * 100, secs
         )
     )
 
     secs, b_self_recall = dt(lambda: b.search(b.vectors, **args).recall(b.keys))
     print(
         "Self-recall @{} of {} index: {:.2f}%, took {:.2f}s".format(
-            k, b_name, b_self_recall * 100, secs
+            count, b_name, b_self_recall * 100, secs
         )
     )
 
     secs, ab_recall = dt(lambda: b.search(a.vectors, **args).recall(b.keys))
     print(
         "Cross-recall @{} of {} in {}: {:.2f}%, took {:.2f}s".format(
-            k, a_name, b_name, ab_recall * 100, secs
+            count, a_name, b_name, ab_recall * 100, secs
         )
     )
 
     secs, ba_recall = dt(lambda: a.search(b.vectors, **args).recall(a.keys))
     print(
         "Cross-recall @{} of {} in {}: {:.2f}%, took {:.2f}s".format(
-            k, b_name, a_name, ba_recall * 100, secs
+            count, b_name, a_name, ba_recall * 100, secs
         )
     )
 
diff --git a/python/usearch/eval.py b/python/usearch/eval.py
index 340ccbee..00087f5a 100644
--- a/python/usearch/eval.py
+++ b/python/usearch/eval.py
@@ -407,7 +407,7 @@ def __call__(self, index: Index) -> TaskResult:
 
         return TaskResult(
             search_per_second=self.queries.shape[0] / dt,
-            recall_at_one=results.recall_first(self.neighbors[:, 0].flatten()),
+            recall_at_one=results.mean_recall(self.neighbors[:, 0].flatten()),
         )
 
     def slices(self, batch_size: int) -> List[SearchTask]:

From da771e6d1645d83f05f440338fc9a625ce770fc0 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 24 Aug 2023 00:29:22 +0400
Subject: [PATCH 68/70] Fix: FAISS banchmarks compatibility

---
 python/scripts/index_faiss.py | 6 +++---
 python/usearch/index.py       | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/scripts/index_faiss.py b/python/scripts/index_faiss.py
index 6a57d7fe..6dd42392 100644
--- a/python/scripts/index_faiss.py
+++ b/python/scripts/index_faiss.py
@@ -4,7 +4,7 @@
 import numpy as np
 from faiss import IndexHNSWFlat, IndexIVFPQ, read_index
 
-from usearch.index import Matches
+from usearch.index import Matches, BatchMatches
 from usearch.index import (
     DEFAULT_CONNECTIVITY,
     DEFAULT_EXPANSION_ADD,
@@ -45,9 +45,9 @@ def add(self, keys, vectors):
         # self._faiss.add_with_ids(vectors, keys)
         self._faiss.add(vectors)
 
-    def search(self, queries, k: int) -> Matches:
+    def search(self, queries, k: int) -> BatchMatches:
         distances, keys = self._faiss.search(queries, k)
-        return Matches(keys, distances, np.array([k] * queries.shape[0]))
+        return BatchMatches(keys, distances, np.array([k] * queries.shape[0]))
 
     def __len__(self) -> int:
         return self._faiss.ntotal
diff --git a/python/usearch/index.py b/python/usearch/index.py
index 23053577..2bbdb9b1 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -269,8 +269,8 @@ class Matches:
     keys: np.ndarray
     distances: np.ndarray
 
-    visited_members: int
-    computed_distances: int
+    visited_members: int = 0
+    computed_distances: int = 0
 
     def __len__(self) -> int:
         return len(self.keys)
@@ -304,8 +304,8 @@ class BatchMatches:
     distances: np.ndarray
     counts: np.ndarray
 
-    visited_members: int
-    computed_distances: int
+    visited_members: int = 0
+    computed_distances: int = 0
 
     def __len__(self) -> int:
         return len(self.counts)

From c99d528b5e24c67d85ed261903d6b3b4a174f077 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 24 Aug 2023 00:29:34 +0400
Subject: [PATCH 69/70] Docs: section on "Clustering"

---
 README.md | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7d7fdd2d..75010a0f 100644
--- a/README.md
+++ b/README.md
@@ -53,6 +53,7 @@ Linux • MacOS • Windows • iOS • Docker • WebAssembly
 - ✅ Space-efficient point-clouds with `uint40_t`, accommodating 4B+ size.
 - ✅ Compatible with OpenMP and custom "executors", for fine-grained control over CPU utilization.
 - ✅ Heterogeneous lookups, renaming/relabeling, and on-the-fly deletions.
+- ✅ Near-real-time [clustering and sub-clusterings](#clustering) for Tens or Millions of clusters.
 - ✅ [Semantic Search](#usearch--ai--multi-modal-semantic-search) and [Joins](#joins).
 
 [usearch-header]: https://github.com/unum-cloud/usearch/blob/main/include/usearch/index.hpp
@@ -123,7 +124,6 @@ Comparing the performance of FAISS against USearch on 1 Million 96-dimensional v
 
 [benchmarking]: https://github.com/unum-cloud/usearch/blob/main/docs/benchmarks.md
 
-
 ## User-Defined Functions
 
 While most vector search packages concentrate on just a couple of metrics - "Inner Product distance" and "Euclidean distance," USearch extends this list to include any user-defined metrics.
@@ -208,6 +208,36 @@ multi_index = Indexes(
 multi_index.search(...)
 ```
 
+## Clustering
+
+Once the index is constructed, it can be used to cluster entries much faster.
+In essense, the `Index` itself can be seen as a clustering, and it allows iterative deepening.
+
+```py
+clustering = index.cluster(
+    min_count=10, # Optional
+    max_count=15, # Optional
+    threads=..., # Optional
+)
+
+# Get the clusters and their sizes
+centroid_keys, sizes = clustering.centroids_popularity
+
+# Use Matplotlib draw a histogram
+clustering.plot_centroids_popularity()
+
+# Export a NetworkX graph of the clusters
+g = clustering.network
+
+# Get members of a specific cluster
+first_members = clustering.members_of(centroid_keys[0])
+
+# Deepen into that cluster spliting it into more parts, all same arguments supported
+sub_clustering = clustering.subcluster(min_count=..., max_count=...)
+```
+
+Using Scikit-Learn, on a 1 Million point dataset, one may expect queries to take anywhere from minutes to hours, depending on the number of clusters you want to highlight. For 50'000 clusters the performance difference between USearch and conventional clustering methods may easily reach 100x.
+
 ## Joins, One-to-One, One-to-Many, and Many-to-Many Mappings
 
 One of the big questions these days is how will AI change the world of databases and data management.
@@ -326,6 +356,10 @@ matches = index.search(fingerprints, 10)
 [smiles]: https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system
 [rdkit-fingerprints]: https://www.rdkit.org/docs/RDKit_Book.html#additional-information-about-the-fingerprints
 
+### USearch + POI Coordinates = GIS Applications... on iOS?
+
+With Objective-C and iOS bindings, USearch can be easily used in mobile applications
+
 
 ## Integrations
 

From 58b043c1bc8eec8cd949457547a3bff8a9e0bad2 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 24 Aug 2023 09:41:34 +0200
Subject: [PATCH 70/70] Improve: Reporting `connectivity` in `repr`

---
 python/usearch/index.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/usearch/index.py b/python/usearch/index.py
index 2bbdb9b1..fca4e4c7 100644
--- a/python/usearch/index.py
+++ b/python/usearch/index.py
@@ -1095,11 +1095,12 @@ def specs(self) -> Dict[str, Union[str, int, bool]]:
         }
 
     def __repr__(self) -> str:
-        f = "usearch.Index({} x {}, {}, expansion: {} & {}, {} vectors in {} levels)"
+        f = "usearch.Index({} x {}, {}, connectivity: {}, expansion: {} & {}, {} vectors in {} levels)"
         return f.format(
             self.dtype,
             self.ndim,
             self.metric,
+            self.connectivity,
             self.expansion_add,
             self.expansion_search,
             len(self),
@@ -1117,6 +1118,7 @@ def _repr_pretty_(self, printer, cycle) -> str:
                 f"-- data type: {self.dtype}",
                 f"-- dimensions: {self.ndim}",
                 f"-- metric: {self.metric}",
+                f"-- connectivity: {self.connectivity}",
                 f"-- expansion on addition:{self.expansion_add} candidates",
                 f"-- expansion on search: {self.expansion_search} candidates",
                 "- binary",