Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
02ce18c
Add FAISS Flat benchmark to benchmarks
FrancescaDelGaudio Feb 13, 2026
99c63b9
Add FAISS Flat benchmark to benchmarks
FrancescaDelGaudio Feb 13, 2026
128b62e
Add FAISS Flat benchmark to benchmarks
FrancescaDelGaudio Feb 13, 2026
9e41460
Add FAISS Flat benchmark to benchmarks
FrancescaDelGaudio Feb 13, 2026
ee9d928
Add FAISS Flat benchmark to benchmarks
FrancescaDelGaudio Feb 13, 2026
55b3e41
Add FAISS Flat benchmark to benchmarks
FrancescaDelGaudio Feb 13, 2026
5a69a70
“bm_faiss”
FrancescaDelGaudio Feb 16, 2026
79b6e7e
“bm_faiss”
FrancescaDelGaudio Feb 16, 2026
69d901f
“bm_faiss”
FrancescaDelGaudio Feb 16, 2026
f8f5cc4
“bm_faiss”
FrancescaDelGaudio Feb 16, 2026
d1256c1
paris_failures
FrancescaDelGaudio Feb 16, 2026
aae8251
paris_failures
FrancescaDelGaudio Feb 16, 2026
528df50
paris_failures
FrancescaDelGaudio Feb 16, 2026
4cb6f11
paris_failures
FrancescaDelGaudio Feb 16, 2026
360ae0f
paris_failures
FrancescaDelGaudio Feb 16, 2026
d5f9174
paris_failures
FrancescaDelGaudio Feb 16, 2026
7b178f5
paris_failures
FrancescaDelGaudio Feb 16, 2026
76b31b1
paris_failures
FrancescaDelGaudio Feb 16, 2026
4762473
paris_failures
FrancescaDelGaudio Feb 16, 2026
4d5d622
paris_failures
FrancescaDelGaudio Feb 16, 2026
0d22f7a
paris_failures
FrancescaDelGaudio Feb 16, 2026
28be938
paris_failures
FrancescaDelGaudio Feb 16, 2026
ad865eb
paris_failures
FrancescaDelGaudio Feb 16, 2026
9b2f0c6
paris_failures
FrancescaDelGaudio Feb 16, 2026
574f55f
paris_failures
FrancescaDelGaudio Feb 16, 2026
4c5076b
paris_failures
FrancescaDelGaudio Feb 16, 2026
1c4f1e1
paris_failures
FrancescaDelGaudio Feb 16, 2026
502901e
paris_failures
FrancescaDelGaudio Feb 16, 2026
c63c24e
paris_failures
FrancescaDelGaudio Feb 16, 2026
4120ae0
paris_failures
FrancescaDelGaudio Feb 17, 2026
0ac6666
paris_failures
FrancescaDelGaudio Feb 17, 2026
9a71b61
paris_failures
FrancescaDelGaudio Feb 17, 2026
04df710
paris_failures
FrancescaDelGaudio Feb 17, 2026
c336cd1
paris_failures
FrancescaDelGaudio Feb 17, 2026
b872bc5
paris_failures
FrancescaDelGaudio Feb 17, 2026
b74a349
paris_failures
FrancescaDelGaudio Feb 17, 2026
04c2c0c
paris_failures
FrancescaDelGaudio Feb 17, 2026
74f231e
paris_failures
FrancescaDelGaudio Feb 17, 2026
3abfecc
paris_failures
FrancescaDelGaudio Feb 17, 2026
cff6e5d
paris_failures
FrancescaDelGaudio Feb 17, 2026
d134a74
paris_failures
FrancescaDelGaudio Feb 17, 2026
8d0e652
paris_failures
FrancescaDelGaudio Feb 17, 2026
2663087
paris_failures
FrancescaDelGaudio Feb 17, 2026
5485066
paris_failures
FrancescaDelGaudio Feb 17, 2026
a8b7472
paris_failures
FrancescaDelGaudio Feb 17, 2026
8e09686
paris_failures
FrancescaDelGaudio Feb 17, 2026
6dfaab7
paris_failures
FrancescaDelGaudio Feb 17, 2026
ec67bb2
paris_failures
FrancescaDelGaudio Feb 17, 2026
e03d0f3
paris_failures
FrancescaDelGaudio Feb 17, 2026
a96d680
paris_failures
FrancescaDelGaudio Feb 17, 2026
9f16500
paris_failures
FrancescaDelGaudio Feb 17, 2026
0651a0e
paris_failures
FrancescaDelGaudio Feb 17, 2026
8cefaa3
paris_failures
FrancescaDelGaudio Feb 17, 2026
10a3580
paris_failures
FrancescaDelGaudio Feb 17, 2026
3800df1
paris_failures
FrancescaDelGaudio Feb 17, 2026
438d189
paris_failures
FrancescaDelGaudio Feb 17, 2026
d0eecc5
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 17, 2026
febdad8
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 17, 2026
23eeaf1
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 18, 2026
2106f25
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 18, 2026
4dca66c
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 18, 2026
e5a6cdc
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 18, 2026
b30a6cf
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 18, 2026
06d1ebd
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 18, 2026
296dbf0
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 19, 2026
71efff6
DEEP/Seismic benchmarks, query_limit, seismic 1000 queries
FrancescaDelGaudio Feb 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "extern/pybind11"]
path = extern/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "benchmark/faiss"]
path = benchmark/faiss
url = https://github.com/facebookresearch/faiss
36 changes: 36 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ if(DEBUG_MSG)
message(STATUS "FetchContent_MakeAvailable for 'benchmark' completed. Google Benchmark is now available.")
endif()

# ====== FAISS (CPU-only) for benchmarks ======
set(FAISS_ENABLE_GPU OFF CACHE BOOL "" FORCE)
set(FAISS_ENABLE_PYTHON OFF CACHE BOOL "" FORCE)
set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
set(FAISS_ENABLE_EXTRAS OFF CACHE BOOL "" FORCE)
set(FAISS_ENABLE_C_API OFF CACHE BOOL "" FORCE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/faiss ${CMAKE_CURRENT_BINARY_DIR}/faiss)

# ////// BRUTEFORCE //////
if(DEBUG_MSG)
message(STATUS "---")
Expand Down Expand Up @@ -165,6 +173,34 @@ if(DEBUG_MSG)
message(STATUS "Include directories added for bm_Messi_L2Square.")
endif()

# ////// FAISS FLAT //////
if(DEBUG_MSG)
message(STATUS "---")
message(STATUS "## Benchmark: FAISS Flat L2Square")
endif()

add_executable(bm_FaissFlat_L2Square
bm_FaissFlat_L2Square.cpp
bm_utils.cpp
../commons/paramSetup.cpp
../commons/test_bm_utils.cpp
../commons/dataloaders.cpp
)

target_link_libraries(bm_FaissFlat_L2Square
PRIVATE
benchmark::benchmark
benchmark::benchmark_main
dino_lib
faiss
)

target_include_directories(bm_FaissFlat_L2Square
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../lib
${CMAKE_CURRENT_SOURCE_DIR}/../commons
)

# ////// PARIS //////
if(DEBUG_MSG)
message(STATUS "---")
Expand Down
139 changes: 139 additions & 0 deletions benchmark/bm_FaissFlat_L2Square.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include <benchmark/benchmark.h>
#include <cstdio>
#include <string>
#include "bm_utils.hpp"
#include "../commons/dataloaders.hpp"
#include "../commons/VectorDataLoader.h"
#include "../commons/test_bm_utils.hpp"
#include <faiss/IndexFlat.h>

#ifdef _OPENMP
#include <omp.h>
#endif

struct FaissFlatSearchOnlyFixture : public benchmark::Fixture {
faiss::IndexFlatL2* index = nullptr;
float* query = nullptr;
faiss::idx_t* I = nullptr;
float* D = nullptr;
faiss::idx_t n_query = 0;
size_t k = 0;
std::string dataset_name;
size_t n_database = 0;
int thread_count = 0;

static bool endsWith(const std::string& s, const std::string& suffix) {
return s.size() >= suffix.size() &&
s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
}

void SetUp(const benchmark::State& state) override {
int config_idx = static_cast<int>(state.range(0));
const SSTestConfig& config = test_configs_deep_seismic_astro270m[config_idx];

const bool use_fvecs = endsWith(config.dataset_path, ".fvecs") || endsWith(config.query_path, ".fvecs");
size_t dim_u = 0, n_database_u = 0, n_q_u = 0;
float* database = nullptr;

if (use_fvecs) {
database = fvecs_read(config.dataset_path.c_str(), &dim_u, &n_database_u, 0);
if (!database) {
std::cerr << "Failed to load dataset (fvecs)" << std::endl;
return;
}
const size_t query_limit = (config.query_limit > 0) ? static_cast<size_t>(config.query_limit) : 0;
query = fvecs_read(config.query_path.c_str(), &dim_u, &n_q_u, query_limit);
if (!query) {
std::cerr << "Failed to load queries (fvecs)" << std::endl;
delete[] database;
return;
}
} else {
std::string dataset_filename = pathToFilename(config.dataset_path);
std::string query_filename = pathToFilename(config.query_path);

daisy::idx_t dim, n_database, _, __;
if (!parseFilenameForConfig(dataset_filename, "bruteForce", dim, n_database, _, __)) {
std::cerr << "Failed to parse dataset config from filename: " << dataset_filename << std::endl;
return;
}

daisy::idx_t dim_q, n_q, ___, ____;
if (!parseFilenameForConfig(query_filename, "bruteForce", dim_q, n_q, ___, ____)) {
std::cerr << "Failed to parse query config from filename: " << query_filename << std::endl;
return;
}

if (dim != static_cast<daisy::idx_t>(dim_q)) {
std::cerr << "Dimension mismatch between dataset and queries" << std::endl;
return;
}

dim_u = static_cast<size_t>(dim);
n_database_u = static_cast<size_t>(n_database);
if (config.query_limit > 0 && static_cast<daisy::idx_t>(config.query_limit) < n_q)
n_q = static_cast<daisy::idx_t>(config.query_limit);
n_q_u = static_cast<size_t>(n_q);

database = loadBinData(config.dataset_path.c_str(), n_database, dim, false);
if (!database) {
std::cerr << "Failed to load dataset" << std::endl;
return;
}

query = loadBinData(config.query_path.c_str(), n_q, dim_q, false);
if (!query) {
std::cerr << "Failed to load queries" << std::endl;
delete[] database;
return;
}
}

#ifdef _OPENMP
omp_set_num_threads(config.thread_count);
#endif

index = new faiss::IndexFlatL2(static_cast<int>(dim_u));
index->add(static_cast<faiss::idx_t>(n_database_u), database);
fprintf(stderr, ">>> Finished indexing\n");
delete[] database;

k = static_cast<size_t>(config.k_value);
n_query = static_cast<faiss::idx_t>(n_q_u);
I = new faiss::idx_t[n_query * k];
D = new float[n_query * k];

dataset_name = config.name;
n_database = n_database_u;
thread_count = config.thread_count;

fprintf(stderr, "[FAISS] n_database=%zu n_query=%zu dim=%zu k=%zu threads=%d\n",
n_database_u, n_query, dim_u, k, config.thread_count);
}

void TearDown(const benchmark::State&) override {
delete[] query;
delete[] I;
delete[] D;
delete index;
}
};

BENCHMARK_DEFINE_F(FaissFlatSearchOnlyFixture, BM_FaissFlat_SearchOnly)(benchmark::State& state) {
for (auto _ : state) {
fprintf(stderr, "[FAISS] --- Query phase ---\n");
fprintf(stderr, "[FAISS] dataset=%s n_database=%zu\n", dataset_name.c_str(), n_database);
fprintf(stderr, "[FAISS] search_threads=%d n_query=%zu k=%zu\n", thread_count, (size_t)n_query, k);
fflush(stderr);
index->search(n_query, query, static_cast<faiss::idx_t>(k), D, I);
fprintf(stderr, ">>> Finished querying\n");
}
}

BENCHMARK_REGISTER_F(FaissFlatSearchOnlyFixture, BM_FaissFlat_SearchOnly)
// q=100, k=1,10,100,1000: DEEP (0-3), Seismic (4-7)
->Args({0})->Args({1})->Args({2})->Args({3})->Args({4})->Args({5})->Args({6})->Args({7})
->Iterations(1)
->Unit(benchmark::kMillisecond);

BENCHMARK_MAIN();
153 changes: 145 additions & 8 deletions benchmark/bm_Messi_L2Square.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,155 @@
#include <benchmark/benchmark.h>
#include <cstdio>
#include <string>
#include "bm_utils.hpp"
#include "../lib/algos/Messi.hpp"
#include "../commons/dataloaders.hpp"
#include "../commons/VectorDataLoader.h"
#include "../commons/test_bm_utils.hpp"
#include "../lib/algos/Messi.hpp"
#include "../lib/algos/DataSource.hpp"

static void BM_Messi(benchmark::State& state) {
int config_idx = static_cast<int>(state.range(0));
const SSTestConfig& config = test_configs[config_idx];
static bool endsWith(const std::string& s, const std::string& suffix) {
return s.size() >= suffix.size() &&
s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
}

struct MessiSearchOnlyFixture : public benchmark::Fixture {
daisy::Messi* search = nullptr;
float* database = nullptr;
float* query = nullptr;
daisy::idx_t* I = nullptr;
float* D = nullptr;
daisy::idx_t n_query = 0;
size_t k = 0;
std::string dataset_name;
size_t n_database = 0;
int thread_count = 0;

void SetUp(const benchmark::State& state) override {
int config_idx = static_cast<int>(state.range(0));
const SSTestConfig& config = test_configs_deep_seismic_astro270m[config_idx];

const bool use_fvecs = endsWith(config.dataset_path, ".fvecs") || endsWith(config.query_path, ".fvecs");
size_t dim_u = 0, n_database_u = 0, n_q_u = 0;
database = nullptr;

if (use_fvecs) {
database = fvecs_read(config.dataset_path.c_str(), &dim_u, &n_database_u, 0);
if (!database) {
std::cerr << "Failed to load dataset (fvecs)" << std::endl;
return;
}
const size_t query_limit = (config.query_limit > 0) ? static_cast<size_t>(config.query_limit) : 0;
query = fvecs_read(config.query_path.c_str(), &dim_u, &n_q_u, query_limit);
if (!query) {
std::cerr << "Failed to load queries (fvecs)" << std::endl;
delete[] database;
return;
}
} else {
std::string dataset_filename = pathToFilename(config.dataset_path);
std::string query_filename = pathToFilename(config.query_path);

daisy::idx_t dim, n_database, _, __;
if (!parseFilenameForConfig(dataset_filename, "bruteForce", dim, n_database, _, __)) {
std::cerr << "Failed to parse dataset config from filename: " << dataset_filename << std::endl;
return;
}

daisy::idx_t dim_q, n_q, ___, ____;
if (!parseFilenameForConfig(query_filename, "bruteForce", dim_q, n_q, ___, ____)) {
std::cerr << "Failed to parse query config from filename: " << query_filename << std::endl;
return;
}

if (dim != static_cast<daisy::idx_t>(dim_q)) {
std::cerr << "Dimension mismatch between dataset and queries" << std::endl;
return;
}

dim_u = static_cast<size_t>(dim);
n_database_u = static_cast<size_t>(n_database);
if (config.query_limit > 0 && static_cast<daisy::idx_t>(config.query_limit) < n_q)
n_q = static_cast<daisy::idx_t>(config.query_limit);
n_q_u = static_cast<size_t>(n_q);

database = loadBinData(config.dataset_path.c_str(), n_database, dim, false);
if (!database) {
std::cerr << "Failed to load dataset" << std::endl;
return;
}

daisy::Messi search(daisy::DistanceType::L2_SQUARED);
query = loadBinData(config.query_path.c_str(), n_q, dim_q, false);
if (!query) {
std::cerr << "Failed to load queries" << std::endl;
delete[] database;
return;
}
}

search = new daisy::Messi(daisy::DistanceType::L2_SQUARED);
search->setNumThreads(config.thread_count);
search->setIndexWorkers(config.thread_count);

fprintf(stderr, "[MESSI] Before buildIndex (n_database=%zu dim=%zu).\n", n_database_u, dim_u);
fflush(stderr);

daisy::InMemoryDataSource data_source(database, static_cast<daisy::idx_t>(n_database_u), static_cast<daisy::idx_t>(dim_u));
fprintf(stderr, "created data source\n");
fflush(stderr);
search->buildIndex(&data_source);

fprintf(stderr, "[MESSI] Indexing finished (n_database=%zu dim=%zu).\n", n_database_u, dim_u);
fflush(stderr);

k = static_cast<size_t>(config.k_value);
n_query = static_cast<daisy::idx_t>(n_q_u);
I = new daisy::idx_t[n_query * k];
D = new float[n_query * k];

dataset_name = config.name;
n_database = n_database_u;
thread_count = config.thread_count;

fprintf(stderr, "[MESSI] n_database=%zu n_query=%zu dim=%zu k=%zu threads=%d\n",
n_database_u, (size_t)n_query, dim_u, k, config.thread_count);
fflush(stderr);
}

void TearDown(const benchmark::State&) override {
fprintf(stderr, "[MESSI] TearDown start.\n");
fflush(stderr);
delete search;
delete[] database;
delete[] query;
delete[] I;
delete[] D;
search = nullptr;
database = nullptr;
query = nullptr;
I = nullptr;
D = nullptr;
fprintf(stderr, "[MESSI] TearDown done.\n");
fflush(stderr);
}
};

BENCHMARK_DEFINE_F(MessiSearchOnlyFixture, BM_Messi_SearchOnly)(benchmark::State& state) {
for (auto _ : state) {
runSSTBenchmark(&search, config.dataset_path, config.query_path, config.thread_count, config.k_value);
fprintf(stderr, "[MESSI] --- Query phase ---\n");
fprintf(stderr, "[MESSI] dataset=%s n_database=%zu\n", dataset_name.c_str(), n_database);
fprintf(stderr, "[MESSI] search_threads=%d n_query=%zu k=%zu\n", thread_count, (size_t)n_query, k);
fflush(stderr);
search->searchIndex(query, n_query, static_cast<daisy::idx_t>(k), I, D);
fprintf(stderr, "[MESSI] Querying finished (n_query=%zu k=%zu).\n", (size_t)n_query, k);
fflush(stderr);
}
}

BENCHMARK(BM_Messi)->Arg(0)->MinTime(2.0)->Unit(benchmark::kMillisecond);
BENCHMARK_REGISTER_F(MessiSearchOnlyFixture, BM_Messi_SearchOnly)
// q=100, k=1,10,100,1000: DEEP (0-3), Seismic (4-7)
->Args({0})->Args({1})->Args({2})->Args({3})->Args({4})->Args({5})->Args({6})->Args({7})
->Iterations(1)
->Unit(benchmark::kMillisecond);

BENCHMARK_MAIN();
BENCHMARK_MAIN();
Loading
Loading