From 48c55a623d8cc79f83612b2954d5e7acf4a32e44 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Wed, 19 Jul 2023 11:19:36 +0400 Subject: [PATCH] Refactor: Deprecate `search_around` pagination --- cpp/bench.cpp | 28 --------------- include/usearch/index.hpp | 34 ------------------ include/usearch/index_punned_dense.hpp | 48 -------------------------- 3 files changed, 110 deletions(-) diff --git a/cpp/bench.cpp b/cpp/bench.cpp index 23a54a9d..12e94073 100644 --- a/cpp/bench.cpp +++ b/cpp/bench.cpp @@ -311,26 +311,6 @@ void search_many( // } } -template -void paginate_many( // - index_at& native, std::size_t n, real_at const* vectors, std::size_t dims, std::size_t wanted, - vector_id_at const* hints) { - - std::string name = "Paginate " + std::to_string(wanted); - running_stats_printer_t printer{n, name.c_str()}; - -#pragma omp parallel for schedule(static, 32) - for (std::size_t i = 0; i < n; ++i) { - search_config_t config; - config.thread = omp_get_thread_num(); - vector_view_t vector{vectors + dims * i, dims}; - native.search_around(hints[i], vector, wanted, config); - printer.progress++; - if (omp_get_thread_num() == 0) - printer.refresh(); - } -} - template // static void single_shot(dataset_at& dataset, index_at& index, bool construct = true) { using label_t = typename index_at::label_t; @@ -397,14 +377,6 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t std::printf("Unmatched %.2f %% (%zu items)\n", unmatched_count * 100.f / index.size(), unmatched_count); std::printf("Proposals %.2f / man (%zu total)\n", join_attempts * 1.f / index.size(), join_attempts); - // Paginate - std::vector hints(dataset.queries_count()); - for (std::size_t i = 0; i != hints.size(); ++i) - hints[i] = dataset.neighborhood(i)[0]; - paginate_many(index, dataset.queries_count(), dataset.query(0), dataset.dimensions(), 10, hints.data()); - paginate_many(index, dataset.queries_count(), dataset.query(0), dataset.dimensions(), 100, hints.data()); - paginate_many(index, dataset.queries_count(), dataset.query(0), dataset.dimensions(), 1000, hints.data()); - std::printf("------------\n"); std::printf("\n"); } diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp index e640c187..243036b1 100644 --- a/include/usearch/index.hpp +++ b/include/usearch/index.hpp @@ -2128,40 +2128,6 @@ class index_gt { return result; } - template - search_result_t search_around( // - id_t hint, vector_view_t query, std::size_t wanted, // - search_config_t config = {}, predicate_at&& predicate = dummy_predicate_t{}) const noexcept { - - context_t& context = contexts_[config.thread]; - top_candidates_t& top = context.top_candidates; - next_candidates_t& next = context.next_candidates; - search_result_t result{*this, top}; - - if (!size_) - return result; - - std::size_t expansion = (std::max)(config.expansion, wanted); - if (!next.reserve(expansion)) - return result.failed("Out of memory!"); - if (!top.reserve(expansion)) - return result.failed("Out of memory!"); - - // Go down the level, tracking only the closest match - result.measurements = context.measurements_count; - result.cycles = context.iteration_cycles; - - search_to_find_in_base_(hint, query, expansion, context, std::forward(predicate)); - top.sort_ascending(); - top.shrink(wanted); - - // Normalize stats - result.measurements = context.measurements_count - result.measurements; - result.cycles = context.iteration_cycles - result.cycles; - result.count = top.size(); - return result; - } - #pragma endregion #pragma region Metadata diff --git a/include/usearch/index_punned_dense.hpp b/include/usearch/index_punned_dense.hpp index 04aa117f..82f09b37 100644 --- a/include/usearch/index_punned_dense.hpp +++ b/include/usearch/index_punned_dense.hpp @@ -281,18 +281,6 @@ class index_punned_dense_gt { search_result_t search(f32_t const* vector, std::size_t wanted, search_config_t config) const { return search_(vector, wanted, config, casts_.from_f32); } search_result_t search(f64_t const* vector, std::size_t wanted, search_config_t config) const { return search_(vector, wanted, config, casts_.from_f64); } - search_result_t search_around(label_t hint, b1x8_t const* vector, std::size_t wanted) const { return search_around_(hint, vector, wanted, casts_.from_b1x8); } - search_result_t search_around(label_t hint, f8_bits_t const* vector, std::size_t wanted) const { return search_around_(hint, vector, wanted, casts_.from_f8); } - search_result_t search_around(label_t hint, f16_t const* vector, std::size_t wanted) const { return search_around_(hint, vector, wanted, casts_.from_f16); } - search_result_t search_around(label_t hint, f32_t const* vector, std::size_t wanted) const { return search_around_(hint, vector, wanted, casts_.from_f32); } - search_result_t search_around(label_t hint, f64_t const* vector, std::size_t wanted) const { return search_around_(hint, vector, wanted, casts_.from_f64); } - - search_result_t search_around(label_t hint, b1x8_t const* vector, std::size_t wanted, search_config_t config) const { return search_around_(hint, vector, wanted, config, casts_.from_b1x8); } - search_result_t search_around(label_t hint, f8_bits_t const* vector, std::size_t wanted, search_config_t config) const { return search_around_(hint, vector, wanted, config, casts_.from_f8); } - search_result_t search_around(label_t hint, f16_t const* vector, std::size_t wanted, search_config_t config) const { return search_around_(hint, vector, wanted, config, casts_.from_f16); } - search_result_t search_around(label_t hint, f32_t const* vector, std::size_t wanted, search_config_t config) const { return search_around_(hint, vector, wanted, config, casts_.from_f32); } - search_result_t search_around(label_t hint, f64_t const* vector, std::size_t wanted, search_config_t config) const { return search_around_(hint, vector, wanted, config, casts_.from_f64); } - search_result_t empty_search_result() const { return search_result_t{*typed_}; } bool get(label_t label, b1x8_t* vector) const { return get_(label, vector, casts_.to_b1x8); } @@ -454,32 +442,6 @@ class index_punned_dense_gt { return typed_->search({vector_data, vector_bytes}, wanted, config); } - template - search_result_t search_around_( // - label_t hint, scalar_at const* vector, std::size_t wanted, // - search_config_t config, cast_t const& cast) const { - - byte_t const* vector_data = reinterpret_cast(vector); - std::size_t vector_bytes = dimensions_ * sizeof(scalar_at); - - byte_t* casted_data = cast_buffer_.data() + casted_vector_bytes_ * config.thread; - bool casted = cast(vector_data, dimensions_, casted_data); - if (casted) - vector_data = casted_data, vector_bytes = casted_vector_bytes_; - - return typed_->search_around(static_cast(hint), {vector_data, vector_bytes}, wanted, config); - } - - void reindex_labels() { - shared_lock_t lock(lookup_table_mutex_); - lookup_table_.clear(); - for (std::size_t i = 0; i != typed_->size(); ++i) { - member_citerator_t iterator = typed_->cbegin() + i; - member_cref_t member = *iterator; - lookup_table_[member.label] = static_cast(i); - } - } - id_t lookup_id_(label_t label) const { shared_lock_t lock(lookup_table_mutex_); return lookup_table_.at(label); @@ -520,16 +482,6 @@ class index_punned_dense_gt { return search_(vector, wanted, search_config, cast); } - template - search_result_t search_around_( // - label_t hint, scalar_at const* vector, std::size_t wanted, // - cast_t const& cast) const { - thread_lock_t lock = thread_lock_(); - search_config_t search_config; - search_config.thread = lock.thread_id; - return search_around_(hint, vector, wanted, search_config, cast); - } - static index_punned_dense_gt make_( // std::size_t dimensions, scalar_kind_t scalar_kind, // index_config_t config, std::size_t expansion_add, std::size_t expansion_search, //