Skip to content

Commit

Permalink
Deprecate <?> operator, remove post_parse_analyze_hook and `Executo…
Browse files Browse the repository at this point in the history
…rStart_hook` hooks and related codes.

Update tests to not use <?> operator
  • Loading branch information
var77 committed Nov 1, 2024
1 parent 18f83e1 commit bd85111
Show file tree
Hide file tree
Showing 47 changed files with 157 additions and 1,704 deletions.
14 changes: 2 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,9 @@ FROM small_world ORDER BY vector <-> ARRAY[0,0,0] LIMIT 1;

### A note on operators and operator classes

Lantern supports several distance functions in the index and it has 2 modes for operators:
Lantern supports several distance functions in the index

1. `lantern.pgvector_compat=TRUE` (default)
In this mode there are 3 operators available `<->` (l2sq), `<=>` (cosine), `<+>` (hamming).

Note that in this mode, you need to use right operator in order to trigger an index scan.

2. `lantern.pgvector_compat=FALSE`
In this mode you only need to specify the distance function used for a column at index creation time. Lantern will automatically infer the distance function to use for search so you always use `<?>` operator in search queries.

Note that in this mode, the operator `<?>` is intended exclusively for use with index lookups. If you expect to not use the index in a query, use the distance function directly (e.g. `l2sq_dist(v1, v2)`)

> To switch between modes set `lantern.pgvector_compat` variable to `TRUE` or `FALSE`.
There are 3 operators available `<->` (l2sq), `<=>` (cosine), `<+>` (hamming).

There are four defined operator classes that can be employed during index creation:

Expand Down
11 changes: 0 additions & 11 deletions ci/scripts/run-tests-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,6 @@ function run_pgvector_tests(){
pushd /tmp/pgvector
# Add lantern to load-extension in pgregress
sed -i '/REGRESS_OPTS \=/ s/$/ --load-extension lantern/' Makefile

# Set pgvector_compat flag in test files
for file in ./test/sql/*; do
echo 'SET lantern.pgvector_compat=TRUE;' | cat - $file > temp && mv temp $file
done

# Set pgvector_compat flag in result files
for file in ./test/expected/*.out; do
echo 'SET lantern.pgvector_compat=TRUE;' | cat - $file > temp && mv temp $file
done

# Run tests
make installcheck
popd
Expand Down
14 changes: 2 additions & 12 deletions lantern_hnsw/scripts/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def primary():
node.init()
node.append_conf("enable_seqscan = off")
node.append_conf("maintenance_work_mem = '1GB'")
node.append_conf("lantern.pgvector_compat=FALSE")
node.append_conf("checkpoint_timeout = '100min'")
node.append_conf("min_wal_size = '1GB'")
node.append_conf("checkpoint_completion_target = '0.9'")
Expand Down Expand Up @@ -158,7 +157,6 @@ def generic_vector_query(

dist_with_function = f"{distance_metric}_dist(v, ({query_vector}))"
dist_with_concrete_op = f"v {DIST_OPS[distance_metric]} ({query_vector})"
dist_with_generic_op = f"v <?> ({query_vector})"

query_generator = (
lambda order_by: f"""
Expand All @@ -173,8 +171,6 @@ def generic_vector_query(
return query_generator(dist_with_function)
elif kind == "concrete":
return query_generator(dist_with_concrete_op)
elif kind == "generic":
return query_generator(dist_with_generic_op)


@pytest.mark.parametrize("distance_metric", ["l2sq", "cos"], scope="session")
Expand All @@ -197,9 +193,6 @@ def test_selects(db, setup_copy_table_with_index, distance_metric, quant_bits, r
concrete_op_query = generic_vector_query(
table_name, distance_metric, "concrete", query_vector_id=q_vec_id
)
generic_op_query = generic_vector_query(
table_name, distance_metric, "generic", query_vector_id=q_vec_id
)

exact_explain_query = f"EXPLAIN {exact_query}"
exact_plan = primary.execute("testdb", exact_explain_query)
Expand All @@ -214,7 +207,7 @@ def test_selects(db, setup_copy_table_with_index, distance_metric, quant_bits, r
q_vec_id == exact_res[0][0]
), "First result in exact query result should be the query vector"

for query in [generic_op_query, concrete_op_query]:
for query in [concrete_op_query]:
explain_query = f"EXPLAIN {query}"
plan = primary.execute("testdb", explain_query)
assert f"Index Scan using idx_{table_name}" in str(
Expand Down Expand Up @@ -348,9 +341,6 @@ def test_inserts(setup_copy_table_with_index, distance_metric, quant_bits, reque
concrete_op_query = generic_vector_query(
table_name, distance_metric, "concrete", query_vector_id=q_vec_id
)
generic_op_query = generic_vector_query(
table_name, distance_metric, "generic", query_vector_id=q_vec_id
)

exact_explain_query = f"EXPLAIN {exact_query}"
for db in [primary, replica]:
Expand All @@ -367,7 +357,7 @@ def test_inserts(setup_copy_table_with_index, distance_metric, quant_bits, reque
exact_res[0][0] in inserted_vector_orig_ids[q_vec_id]
), "First result in exact query result should be the query vector"

for query in [generic_op_query, concrete_op_query]:
for query in [concrete_op_query]:
explain_query = f"EXPLAIN {query}"
plan = db.execute("testdb", explain_query)
assert f"Index Scan using idx_{table_name}" in str(
Expand Down
4 changes: 1 addition & 3 deletions lantern_hnsw/src/hnsw.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,7 @@ static float8 vector_dist(Vector *a, Vector *b, usearch_metric_kind_t metric_kin
PGDLLEXPORT PG_FUNCTION_INFO_V1(ldb_generic_dist);
Datum ldb_generic_dist(PG_FUNCTION_ARGS)
{
if(ldb_pgvector_compat) {
elog(ERROR, "Operator can only be used when lantern.pgvector_compat=FALSE");
}
elog(ERROR, "Operator <?> is deprecated. Please explicitly use the operator that matches your distance function.");
PG_RETURN_NULL();
}

Expand Down
40 changes: 0 additions & 40 deletions lantern_hnsw/src/hnsw/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
#include <utils/rel.h> // RelationData
#include <utils/syscache.h>

#include "../hooks/executor_start.h"
#include "../hooks/post_parse.h"

// We import this header file
// to access the op class support function pointers
#include "../hnsw.h"
Expand Down Expand Up @@ -54,10 +51,6 @@ int ldb_external_index_port;
char *ldb_external_index_host;
bool ldb_external_index_secure;

// if this variable is set to true
// our operator rewriting hooks will be disabled
bool ldb_pgvector_compat;

// this variable is only set during testing and controls whether
// certain elog() calls are made
// see ldb_dlog() definition and callsites for details
Expand Down Expand Up @@ -366,17 +359,6 @@ void _PG_init(void)
NULL,
NULL);

DefineCustomBoolVariable("lantern.pgvector_compat",
"Whether or not the operator <-> should automatically detect the right distance function",
"set this to 1 to disable operator rewriting hooks",
&ldb_pgvector_compat,
true,
PGC_USERSET,
0,
NULL,
NULL,
NULL);

DefineCustomIntVariable("lantern.external_index_port",
"Port for external indexing",
"Change this value if you run lantern daemon on different port",
Expand Down Expand Up @@ -417,30 +399,8 @@ void _PG_init(void)
MarkGUCPrefixReserved("_lantern_internal");
#endif

original_post_parse_analyze_hook = post_parse_analyze_hook;
original_ExecutorStart_hook = ExecutorStart_hook;

post_parse_analyze_hook = post_parse_analyze_hook_with_operator_check;
ExecutorStart_hook = ExecutorStart_hook_with_operator_check;

#ifndef NDEBUG
signal(SIGSEGV, ldb_wait_for_gdb);
signal(SIGABRT, ldb_wait_for_gdb);
#endif
}

// Called with extension unload.
void _PG_fini(void)
{
// Return back the original hook value.
// This check is because there might be case if while we stop the hooks (in pgvector_compat mode)
// Another extension will be loaded and it will overwrite the hooks
// And when lantern extension will be unloaded it will set the hooks to original values
// Overwriting the current changed hooks set by another extension
if(ExecutorStart_hook == ExecutorStart_hook_with_operator_check) {
ExecutorStart_hook = original_ExecutorStart_hook;
}
if(post_parse_analyze_hook == post_parse_analyze_hook_with_operator_check) {
post_parse_analyze_hook = original_post_parse_analyze_hook;
}
}
1 change: 0 additions & 1 deletion lantern_hnsw/src/hnsw/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ bytea* ldb_amoptions(Datum reloptions, bool validate);
extern int ldb_hnsw_init_k;
extern int ldb_hnsw_ef_search;
extern bool ldb_is_test;
extern bool ldb_pgvector_compat;
extern int ldb_external_index_port;
extern char* ldb_external_index_host;
extern bool ldb_external_index_secure;
Expand Down
109 changes: 0 additions & 109 deletions lantern_hnsw/src/hooks/executor_start.c

This file was deleted.

12 changes: 0 additions & 12 deletions lantern_hnsw/src/hooks/executor_start.h

This file was deleted.

Loading

0 comments on commit bd85111

Please sign in to comment.