Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create index on empty table #37

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Below is a short recording demonstrating the use of `livedebug.py`:

[![asciicast](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt.svg)](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt)

To lint the code you'll need to have clang-format version 14 installed and you can do `make format`.

## Adding/modifying LanternDB's SQL interface

When modifying the SQL interface, you add relevant SQL logic under `sql/`. In addition, you add an update script
Expand Down
2 changes: 0 additions & 2 deletions src/hnsw.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef LDB_HNSW_H
#define LDB_HNSW_H

#include "postgres.h"

#if PG_VERSION_NUM < 110000
#error "Requires PostgreSQL 11+"
#endif
Expand Down
1 change: 1 addition & 0 deletions src/hnsw/build.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "postgres.h"

#include <access/genam.h> // IndexUniqueCheck
#include <assert.h>
#include <catalog/index.h>
#include <catalog/namespace.h>
Expand Down
84 changes: 52 additions & 32 deletions src/hnsw/external_index.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@

#include "postgres.h"
#include <postgres.h>

#include "external_index.h"

#include <access/genam.h> // IndexUniqueCheck
#include <access/generic_xlog.h> // GenericXLog
#include <access/generic_xlog.h>
#include <assert.h>
#include <common/relpath.h> // ForkNumber
#include <nodes/execnodes.h> // IndexInfo
#include <storage/bufmgr.h> // Buffer
#include <utils/hsearch.h>
#include <utils/relcache.h> // Relation

#include "cache.h"
#include "common/relpath.h"
Expand All @@ -17,6 +22,23 @@

static Cache wal_retriever_block_numbers_cache;

Relation INDEX_RELATION_FOR_RETRIEVER;
HnswIndexHeaderPage HEADER_FOR_EXTERNAL_RETRIEVER;
Buffer *EXTRA_DIRTIED;
Page *EXTRA_DIRTIED_PAGE;
int EXTRA_DIRTIED_SIZE = 0;

#if LANTERNDB_COPYNODES
static char *wal_retriever_area = NULL;
static int wal_retriever_area_size = 0;
static int wal_retriever_area_offset = 0;
#else

#define TAKENBUFFERS_MAX 1000
static Buffer *takenbuffers;
static int takenbuffers_next = 0;
#endif

int UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level)
{
const int NODE_HEAD_BYTES = sizeof(usearch_label_t) + 4 /*sizeof dim */ + 4 /*sizeof level*/;
Expand Down Expand Up @@ -71,6 +93,11 @@ int CreateBlockMapGroup(
Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);

if(blockmap_id == 0) {
hdr->blockmap_page_groups = blockmap_groupno;
hdr->blockmap_page_group_index[ blockmap_groupno ] = BufferGetBlockNumber(buf);
}

Page page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE);
PageInit(page, BufferGetPageSize(buf), sizeof(HnswIndexPageSpecialBlock));

Expand All @@ -91,10 +118,7 @@ int CreateBlockMapGroup(

special->lastId = first_node_index + (blockmap_id + 1) * HNSW_BLOCKMAP_BLOCKS_PER_PAGE - 1;
special->nextblockno = BufferGetBlockNumber(buf) + 1;
if(blockmap_id == 0) {
hdr->blockmap_page_groups = blockmap_groupno;
hdr->blockmap_page_group_index[ blockmap_groupno ] = BufferGetBlockNumber(buf);
}

MarkBufferDirty(buf);
GenericXLogFinish(state);
UnlockReleaseBuffer(buf);
Expand Down Expand Up @@ -233,7 +257,7 @@ void StoreExternalIndexBlockMapGroup(Relation index,
GenericXLogFinish(state);
UnlockReleaseBuffer(buf);
}
CreateHeaderPage(index, data, forkNum, dimension, -1, last_block, last_block, true);
CreateHeaderPage(index, data, forkNum, dimension, -1, last_block, true);

// Update blockmap pages with correct associations
for(int blockmap_id = 0; blockmap_id < number_of_blockmaps_in_group; ++blockmap_id) {
Expand Down Expand Up @@ -273,7 +297,7 @@ void StoreExternalIndex(Relation index,
// header page is created twice. it is always at block=0 so the second time just overrides it
// it is added here to make sure a data block does not get to block=0.
// after somem sleep I will prob find a better way to do this
CreateHeaderPage(index, data, forkNum, dimension, num_added_vectors, -1, -1, false);
CreateHeaderPage(index, data, forkNum, dimension, num_added_vectors, -1, false);

uint32 number_of_index_pages = num_added_vectors / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1;
int group_node_first_index = 0;
Expand Down Expand Up @@ -311,7 +335,6 @@ void CreateHeaderPage(Relation index,
uint32 vector_dim,
uint32 num_vectors,
BlockNumber last_data_block,
uint32 num_blocks,
bool update)
{
Buffer buf;
Expand Down Expand Up @@ -355,7 +378,6 @@ void CreateHeaderPage(Relation index,
// headerp->blockmap_page_group_index and blockmap_page_groups are
// updated in a separate wal entry
headerp->last_data_block = last_data_block;
headerp->num_blocks = num_blocks;

memcpy(headerp->usearch_header, usearchHeader64, 64);
((PageHeader)page)->pd_lower = ((char *)headerp + sizeof(HnswIndexHeaderPage)) - (char *)page;
Expand All @@ -367,23 +389,6 @@ void CreateHeaderPage(Relation index,
UnlockReleaseBuffer(buf);
}

Relation INDEX_RELATION_FOR_RETRIEVER;
HnswIndexHeaderPage HEADER_FOR_EXTERNAL_RETRIEVER;
Buffer *EXTRA_DIRTIED;
Page *EXTRA_DIRTIED_PAGE;
int EXTRA_DIRTIED_SIZE = 0;

#if LANTERNDB_COPYNODES
static char *wal_retriever_area = NULL;
static int wal_retriever_area_size = 0;
static int wal_retriever_area_offset = 0;
#else

#define TAKENBUFFERS_MAX 1000
static Buffer *takenbuffers;
static int takenbuffers_next = 0;
#endif

void ldb_wal_retriever_area_init(int size)
{
#if LANTERNDB_COPYNODES
Expand All @@ -398,8 +403,8 @@ void ldb_wal_retriever_area_init(int size)
}
#endif

if(HEADER_FOR_EXTERNAL_RETRIEVER.num_vectors <= 0) {
elog(ERROR, "ldb_wal_retriever_area_init called with num_vectors <= 0");
if(HEADER_FOR_EXTERNAL_RETRIEVER.num_vectors < 0) {
elog(ERROR, "ldb_wal_retriever_area_init called with num_vectors < 0");
}
/* fill in a buffer with blockno index information, before spilling it to disk */
wal_retriever_block_numbers_cache = cache_create();
Expand Down Expand Up @@ -487,9 +492,24 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
* (create new page if necessary) ***/

if(hdr->last_data_block == InvalidBlockNumber) {
elog(ERROR, "inserting into an empty table not supported");
// index is created on the empty table.
// allocate the first page here
CreateBlockMapGroup(hdr, index_rel, MAIN_FORKNUM, 0, 0);
new_dblock = ReadBufferExtended(index_rel, MAIN_FORKNUM, P_NEW, RBM_NORMAL, NULL);
LockBuffer(new_dblock, BUFFER_LOCK_EXCLUSIVE);
new_vector_blockno = BufferGetBlockNumber(new_dblock);

// todo:: add a failure point in here for tests and make sure new_dblock is not leaked
hdr->last_data_block = new_vector_blockno;

// 4.
page = GenericXLogRegisterBuffer(state, new_dblock, LDB_GENERIC_XLOG_DELTA_IMAGE);
PageInit(page, BufferGetPageSize(new_dblock), sizeof(HnswIndexPageSpecialBlock));

(*extra_dirtied)[ (*extra_dirtied_size)++ ] = new_dblock;
(*extra_dirtied_page)[ (*extra_dirtied_size) - 1 ] = page;

new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size);

MarkBufferDirty(new_dblock);
} else {
last_dblock = ReadBufferExtended(index_rel, MAIN_FORKNUM, hdr->last_data_block, RBM_NORMAL, NULL);
for(int i = 0; i < *extra_dirtied_size; i++) {
Expand Down
10 changes: 1 addition & 9 deletions src/hnsw/external_index.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef LDB_HNSW_EXTERNAL_INDEX_H
#define LDB_HNSW_EXTERNAL_INDEX_H

#include "postgres.h"

#include <access/generic_xlog.h>
#include <common/relpath.h> // ForkNumber
#include <storage/bufmgr.h> // Buffer
Expand All @@ -26,15 +24,10 @@ typedef struct HnswIndexHeaderPage
{
uint32 magicNumber;
uint32 version;

uint32 vector_dim;
uint32 num_vectors;
// todo:: switch these to BlockNumber for documentation
// first data block is needed because in case of creating an index on empty table it no longer
// is headeblockno + 1
uint32 last_data_block;
uint32 blockno_index_start;
// todo:: get rid of this
uint32 num_blocks;
char usearch_header[ 64 ];

uint32 blockmap_page_groups;
Expand Down Expand Up @@ -91,7 +84,6 @@ void CreateHeaderPage(Relation index,
uint32 vector_dim,
uint32 num_vectors,
BlockNumber last_data_block,
uint32 num_blocks,
bool update);

void StoreExternalIndex(Relation index,
Expand Down
3 changes: 3 additions & 0 deletions src/hnsw/insert.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

#include "insert.h"

#include <access/genam.h> // IndexUniqueCheck
#include <access/generic_xlog.h>
#include <assert.h>
#include <float.h>
#include <nodes/execnodes.h> // IndexInfo
#include <storage/bufmgr.h>
#include <utils/array.h>
#include <utils/memutils.h>
#include <utils/rel.h>
#include <utils/relcache.h>

Expand Down
4 changes: 0 additions & 4 deletions src/hnsw/insert.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
#ifndef LDB_HNSW_INSERT_H
#define INSERT_H
#include <access/genam.h> // IndexUniqueCheck
#include <nodes/execnodes.h> // IndexInfo
#include <storage/bufmgr.h>
#include <utils/memutils.h>

// #define LDB_HNSW_INSERT_MAX_EXTRA_DIRTIED_BUFS 5
// this now includes buffers dirtied by the usearch
Expand Down
3 changes: 2 additions & 1 deletion src/hnsw/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

#include <access/reloptions.h>
#include <utils/guc.h>
#include <utils/rel.h> // RelationData
#include <utils/rel.h> // RelationData
#include <utils/relcache.h> // Relation

// reloption for lanterndb hnsw index creation paramters in
// CREATE INDEX ... WITH (...)
Expand Down
3 changes: 0 additions & 3 deletions src/hnsw/options.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#ifndef LDB_HNSW_OPTIONS_H
#define LDB_HNSW_OPTIONS_H
#include <postgres.h>

#include <utils/relcache.h> // Relation

// todo:: add hnsw dynamic vector dimension constraints
// based on vector element size
Expand Down
4 changes: 4 additions & 0 deletions src/hnsw/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@

#include "scan.h"

#include <access/reloptions.h>
#include <access/relscan.h>
#include <assert.h>
#include <pgstat.h>
#include <utils/rel.h>

#include "build.h"
#include "external_index.h"
#include "hnsw.h"
#include "lib_interface.h"
#include "options.h"
#include "usearch.h"
#include "vector.h"

PG_MODULE_MAGIC;
Expand Down
7 changes: 0 additions & 7 deletions src/hnsw/scan.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
#ifndef LDB_HNSW_SCAN_H
#define LDB_HNSW_SCAN_H
#include <postgres.h>

#include <access/reloptions.h>
#include <assert.h>

#include "usearch.h"
#include "lib_interface.h"

typedef struct HnswScanState
{
Expand Down
2 changes: 2 additions & 0 deletions src/hnsw/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include "utils.h"

#include <utils/relcache.h>

#include "hnsw.h"
#include "options.h"
#include "usearch.h"
Expand Down
3 changes: 0 additions & 3 deletions src/hnsw/utils.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#ifndef LDB_HNSW_UTILS_H
#define LDB_HNSW_UTILS_H
#include <utils/relcache.h>

#include "usearch.h"

void LogUsearchOptions(usearch_init_options_t *opts);
void PopulateUsearchOpts(Relation index, usearch_init_options_t *opts);
Expand Down
23 changes: 23 additions & 0 deletions test/expected/hnsw_insert_empty_table.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
CREATE EXTENSION IF NOT EXISTS lanterndb;
CREATE EXTENSION
CREATE TABLE small_world (
id varchar(3),
vector real[]
);
CREATE TABLE
CREATE INDEX ON small_world USING hnsw (vector);
psql:test/sql/hnsw_insert_empty_table.sql:8: INFO: done init usearch index
psql:test/sql/hnsw_insert_empty_table.sql:8: INFO: inserted 0 elements
psql:test/sql/hnsw_insert_empty_table.sql:8: INFO: done saving 0 vectors
CREATE INDEX
SELECT * FROM small_world WHERE 1=1;
id | vector
----+--------
(0 rows)

INSERT INTO small_world (id, vector) VALUES ('xxx', '{0,0,0}');
INSERT 0 1
INSERT INTO small_world (id, vector) VALUES ('xxx', NULL);
INSERT 0 1
INSERT INTO small_world (id, vector) VALUES ('xxx', '{1,1,1,1}');
psql:test/sql/hnsw_insert_empty_table.sql:17: ERROR: Wrong number of dimensions: 4 instead of 3 expected
18 changes: 18 additions & 0 deletions test/sql/hnsw_insert_empty_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE EXTENSION IF NOT EXISTS lanterndb;

CREATE TABLE small_world (
id varchar(3),
vector real[]
);

CREATE INDEX ON small_world USING hnsw (vector);

SELECT * FROM small_world WHERE 1=1;

INSERT INTO small_world (id, vector) VALUES ('xxx', '{0,0,0}');

-- make sure NULL inserts into the index are handled correctly
INSERT INTO small_world (id, vector) VALUES ('xxx', NULL);
\set ON_ERROR_STOP off
INSERT INTO small_world (id, vector) VALUES ('xxx', '{1,1,1,1}');
\set ON_ERROR_STOP on
Loading