Skip to content

Commit

Permalink
Implement splitting internal nodes; add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
Luke Hawthorne committed May 23, 2023
1 parent 9327810 commit 93f67fa
Show file tree
Hide file tree
Showing 2 changed files with 342 additions and 40 deletions.
223 changes: 184 additions & 39 deletions db.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;

const uint32_t PAGE_SIZE = 4096;
#define TABLE_MAX_PAGES 100
#define TABLE_MAX_PAGES 400

#define INVALID_PAGE_NUM UINT32_MAX

typedef struct {
int file_descriptor;
Expand Down Expand Up @@ -116,7 +118,7 @@ const uint32_t INTERNAL_NODE_CHILD_SIZE = sizeof(uint32_t);
const uint32_t INTERNAL_NODE_CELL_SIZE =
INTERNAL_NODE_CHILD_SIZE + INTERNAL_NODE_KEY_SIZE;
/* Keep this small for testing */
const uint32_t INTERNAL_NODE_MAX_CELLS = 3;
const uint32_t INTERNAL_NODE_MAX_KEYS = 3;

/*
* Leaf Node Header Layout
Expand Down Expand Up @@ -186,9 +188,19 @@ uint32_t* internal_node_child(void* node, uint32_t child_num) {
printf("Tried to access child_num %d > num_keys %d\n", child_num, num_keys);
exit(EXIT_FAILURE);
} else if (child_num == num_keys) {
return internal_node_right_child(node);
uint32_t* right_child = internal_node_right_child(node);
if (*right_child == INVALID_PAGE_NUM) {
printf("Tried to access right child of node, but was invalid page\n");
exit(EXIT_FAILURE);
}
return right_child;
} else {
return internal_node_cell(node, child_num);
uint32_t* child = internal_node_cell(node, child_num);
if (*child == INVALID_PAGE_NUM) {
printf("Tried to access child %d of node, but was invalid page\n", child_num);
exit(EXIT_FAILURE);
}
return child;
}
}

Expand Down Expand Up @@ -216,24 +228,6 @@ void* leaf_node_value(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
}

uint32_t get_node_max_key(void* node) {
switch (get_node_type(node)) {
case NODE_INTERNAL:
return *internal_node_key(node, *internal_node_num_keys(node) - 1);
case NODE_LEAF:
return *leaf_node_key(node, *leaf_node_num_cells(node) - 1);
}
}

void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}

void* get_page(Pager* pager, uint32_t page_num) {
if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
Expand Down Expand Up @@ -270,6 +264,23 @@ void* get_page(Pager* pager, uint32_t page_num) {
return pager->pages[page_num];
}

uint32_t get_node_max_key(Pager* pager, void* node) {
if (get_node_type(node) == NODE_LEAF) {
return *leaf_node_key(node, *leaf_node_num_cells(node) - 1);
}
void* right_child = get_page(pager,*internal_node_right_child(node));
return get_node_max_key(pager, right_child);
}

void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}

void indent(uint32_t level) {
for (uint32_t i = 0; i < level; i++) {
printf(" ");
Expand All @@ -294,15 +305,17 @@ void print_tree(Pager* pager, uint32_t page_num, uint32_t indentation_level) {
num_keys = *internal_node_num_keys(node);
indent(indentation_level);
printf("- internal (size %d)\n", num_keys);
for (uint32_t i = 0; i < num_keys; i++) {
child = *internal_node_child(node, i);
if (num_keys > 0) {
for (uint32_t i = 0; i < num_keys; i++) {
child = *internal_node_child(node, i);
print_tree(pager, child, indentation_level + 1);

indent(indentation_level + 1);
printf("- key %d\n", *internal_node_key(node, i));
}
child = *internal_node_right_child(node);
print_tree(pager, child, indentation_level + 1);

indent(indentation_level + 1);
printf("- key %d\n", *internal_node_key(node, i));
}
child = *internal_node_right_child(node);
print_tree(pager, child, indentation_level + 1);
break;
}
}
Expand Down Expand Up @@ -330,6 +343,12 @@ void initialize_internal_node(void* node) {
set_node_type(node, NODE_INTERNAL);
set_node_root(node, false);
*internal_node_num_keys(node) = 0;
/*
Necessary because the root page number is 0; by not initializing an internal
node's right child to an invalid page number when initializing the node, we may
end up with 0 as the node's right child, which makes the node a parent of the root
*/
*internal_node_right_child(node) = INVALID_PAGE_NUM;
}

Cursor* leaf_node_find(Table* table, uint32_t page_num, uint32_t key) {
Expand Down Expand Up @@ -661,22 +680,40 @@ void create_new_root(Table* table, uint32_t right_child_page_num) {
uint32_t left_child_page_num = get_unused_page_num(table->pager);
void* left_child = get_page(table->pager, left_child_page_num);

if (get_node_type(root) == NODE_INTERNAL) {
initialize_internal_node(right_child);
initialize_internal_node(left_child);
}

/* Left child has data copied from old root */
memcpy(left_child, root, PAGE_SIZE);
set_node_root(left_child, false);

if (get_node_type(left_child) == NODE_INTERNAL) {
void* child;
for (int i = 0; i < *internal_node_num_keys(left_child); i++) {
child = get_page(table->pager, *internal_node_child(left_child,i));
*node_parent(child) = left_child_page_num;
}
child = get_page(table->pager, *internal_node_right_child(left_child));
*node_parent(child) = left_child_page_num;
}

/* Root node is a new internal node with one key and two children */
initialize_internal_node(root);
set_node_root(root, true);
*internal_node_num_keys(root) = 1;
*internal_node_child(root, 0) = left_child_page_num;
uint32_t left_child_max_key = get_node_max_key(left_child);
uint32_t left_child_max_key = get_node_max_key(table->pager, left_child);
*internal_node_key(root, 0) = left_child_max_key;
*internal_node_right_child(root) = right_child_page_num;
*node_parent(left_child) = table->root_page_num;
*node_parent(right_child) = table->root_page_num;
}

void internal_node_split_and_insert(Table* table, uint32_t parent_page_num,
uint32_t child_page_num);

void internal_node_insert(Table* table, uint32_t parent_page_num,
uint32_t child_page_num) {
/*
Expand All @@ -685,25 +722,39 @@ void internal_node_insert(Table* table, uint32_t parent_page_num,

void* parent = get_page(table->pager, parent_page_num);
void* child = get_page(table->pager, child_page_num);
uint32_t child_max_key = get_node_max_key(child);
uint32_t child_max_key = get_node_max_key(table->pager, child);
uint32_t index = internal_node_find_child(parent, child_max_key);

uint32_t original_num_keys = *internal_node_num_keys(parent);
*internal_node_num_keys(parent) = original_num_keys + 1;

if (original_num_keys >= INTERNAL_NODE_MAX_CELLS) {
printf("Need to implement splitting internal node\n");
exit(EXIT_FAILURE);
if (original_num_keys >= INTERNAL_NODE_MAX_KEYS) {
internal_node_split_and_insert(table, parent_page_num, child_page_num);
return;
}

uint32_t right_child_page_num = *internal_node_right_child(parent);
/*
An internal node with a right child of INVALID_PAGE_NUM is empty
*/
if (right_child_page_num == INVALID_PAGE_NUM) {
*internal_node_right_child(parent) = child_page_num;
return;
}

void* right_child = get_page(table->pager, right_child_page_num);
/*
If we are already at the max number of cells for a node, we cannot increment
before splitting. Incrementing without inserting a new key/child pair
and immediately calling internal_node_split_and_insert has the effect
of creating a new key at (max_cells + 1) with an uninitialized value
*/
*internal_node_num_keys(parent) = original_num_keys + 1;

if (child_max_key > get_node_max_key(right_child)) {
if (child_max_key > get_node_max_key(table->pager, right_child)) {
/* Replace right child */
*internal_node_child(parent, original_num_keys) = right_child_page_num;
*internal_node_key(parent, original_num_keys) =
get_node_max_key(right_child);
get_node_max_key(table->pager, right_child);
*internal_node_right_child(parent) = child_page_num;
} else {
/* Make room for the new cell */
Expand All @@ -722,6 +773,100 @@ void update_internal_node_key(void* node, uint32_t old_key, uint32_t new_key) {
*internal_node_key(node, old_child_index) = new_key;
}

void internal_node_split_and_insert(Table* table, uint32_t parent_page_num,
uint32_t child_page_num) {
uint32_t old_page_num = parent_page_num;
void* old_node = get_page(table->pager,parent_page_num);
uint32_t old_max = get_node_max_key(table->pager, old_node);

void* child = get_page(table->pager, child_page_num);
uint32_t child_max = get_node_max_key(table->pager, child);

uint32_t new_page_num = get_unused_page_num(table->pager);

/*
Declaring a flag before updating pointers which
records whether this operation involves splitting the root -
if it does, we will insert our newly created node during
the step where the table's new root is created. If it does
not, we have to insert the newly created node into its parent
after the old node's keys have been transferred over. We are not
able to do this if the newly created node's parent is not a newly
initialized root node, because in that case its parent may have existing
keys aside from our old node which we are splitting. If that is true, we
need to find a place for our newly created node in its parent, and we
cannot insert it at the correct index if it does not yet have any keys
*/
uint32_t splitting_root = is_node_root(old_node);

void* parent;
void* new_node;
if (splitting_root) {
create_new_root(table, new_page_num);
parent = get_page(table->pager,table->root_page_num);
/*
If we are splitting the root, we need to update old_node to point
to the new root's left child, new_page_num will already point to
the new root's right child
*/
old_page_num = *internal_node_child(parent,0);
old_node = get_page(table->pager, old_page_num);
} else {
parent = get_page(table->pager,*node_parent(old_node));
new_node = get_page(table->pager, new_page_num);
initialize_internal_node(new_node);
}

uint32_t* old_num_keys = internal_node_num_keys(old_node);

uint32_t cur_page_num = *internal_node_right_child(old_node);
void* cur = get_page(table->pager, cur_page_num);

/*
First put right child into new node and set right child of old node to invalid page number
*/
internal_node_insert(table, new_page_num, cur_page_num);
*node_parent(cur) = new_page_num;
*internal_node_right_child(old_node) = INVALID_PAGE_NUM;
/*
For each key until you get to the middle key, move the key and the child to the new node
*/
for (int i = INTERNAL_NODE_MAX_KEYS - 1; i > INTERNAL_NODE_MAX_KEYS / 2; i--) {
cur_page_num = *internal_node_child(old_node, i);
cur = get_page(table->pager, cur_page_num);

internal_node_insert(table, new_page_num, cur_page_num);
*node_parent(cur) = new_page_num;

(*old_num_keys)--;
}

/*
Set child before middle key, which is now the highest key, to be node's right child,
and decrement number of keys
*/
*internal_node_right_child(old_node) = *internal_node_child(old_node,*old_num_keys - 1);
(*old_num_keys)--;

/*
Determine which of the two nodes after the split should contain the child to be inserted,
and insert the child
*/
uint32_t max_after_split = get_node_max_key(table->pager, old_node);

uint32_t destination_page_num = child_max < max_after_split ? old_page_num : new_page_num;

internal_node_insert(table, destination_page_num, child_page_num);
*node_parent(child) = destination_page_num;

update_internal_node_key(parent, old_max, get_node_max_key(table->pager, old_node));

if (!splitting_root) {
internal_node_insert(table,*node_parent(old_node),new_page_num);
*node_parent(new_node) = *node_parent(old_node);
}
}

void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {
/*
Create a new node and move half the cells over.
Expand All @@ -730,7 +875,7 @@ void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {
*/

void* old_node = get_page(cursor->table->pager, cursor->page_num);
uint32_t old_max = get_node_max_key(old_node);
uint32_t old_max = get_node_max_key(cursor->table->pager, old_node);
uint32_t new_page_num = get_unused_page_num(cursor->table->pager);
void* new_node = get_page(cursor->table->pager, new_page_num);
initialize_leaf_node(new_node);
Expand Down Expand Up @@ -772,7 +917,7 @@ void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {
return create_new_root(cursor->table, new_page_num);
} else {
uint32_t parent_page_num = *node_parent(old_node);
uint32_t new_max = get_node_max_key(old_node);
uint32_t new_max = get_node_max_key(cursor->table->pager, old_node);
void* parent = get_page(cursor->table->pager, parent_page_num);

update_internal_node_key(parent, old_max, new_max);
Expand Down
Loading

0 comments on commit 93f67fa

Please sign in to comment.