@@ -107,6 +107,26 @@ const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
107
107
const uint8_t COMMON_NODE_HEADER_SIZE =
108
108
NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE ;
109
109
110
+ /*
111
+ * Internal Node Header Layout
112
+ */
113
+ const uint32_t INTERNAL_NODE_NUM_KEYS_SIZE = sizeof (uint32_t );
114
+ const uint32_t INTERNAL_NODE_NUM_KEYS_OFFSET = COMMON_NODE_HEADER_SIZE ;
115
+ const uint32_t INTERNAL_NODE_RIGHT_CHILD_SIZE = sizeof (uint32_t );
116
+ const uint32_t INTERNAL_NODE_RIGHT_CHILD_OFFSET =
117
+ INTERNAL_NODE_NUM_KEYS_OFFSET + INTERNAL_NODE_NUM_KEYS_SIZE ;
118
+ const uint32_t INTERNAL_NODE_HEADER_SIZE = COMMON_NODE_HEADER_SIZE +
119
+ INTERNAL_NODE_NUM_KEYS_SIZE +
120
+ INTERNAL_NODE_RIGHT_CHILD_SIZE ;
121
+
122
+ /*
123
+ * Internal Node Body Layout
124
+ */
125
+ const uint32_t INTERNAL_NODE_KEY_SIZE = sizeof (uint32_t );
126
+ const uint32_t INTERNAL_NODE_CHILD_SIZE = sizeof (uint32_t );
127
+ const uint32_t INTERNAL_NODE_CELL_SIZE =
128
+ INTERNAL_NODE_CHILD_SIZE + INTERNAL_NODE_KEY_SIZE ;
129
+
110
130
/*
111
131
* Leaf Node Header Layout
112
132
*/
@@ -127,6 +147,9 @@ const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
127
147
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE ;
128
148
const uint32_t LEAF_NODE_MAX_CELLS =
129
149
LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE ;
150
+ const uint32_t LEAF_NODE_RIGHT_SPLIT_COUNT = (LEAF_NODE_MAX_CELLS + 1 ) / 2 ;
151
+ const uint32_t LEAF_NODE_LEFT_SPLIT_COUNT =
152
+ (LEAF_NODE_MAX_CELLS + 1 ) - LEAF_NODE_RIGHT_SPLIT_COUNT ;
130
153
131
154
NodeType get_node_type (void * node ) {
132
155
uint8_t value = * ((uint8_t * )(node + NODE_TYPE_OFFSET ));
@@ -138,6 +161,44 @@ void set_node_type(void* node, NodeType type) {
138
161
* ((uint8_t * )(node + NODE_TYPE_OFFSET )) = value ;
139
162
}
140
163
164
+ bool is_node_root (void * node ) {
165
+ uint8_t value = * ((uint8_t * )(node + IS_ROOT_OFFSET ));
166
+ return (bool )value ;
167
+ }
168
+
169
+ void set_node_root (void * node , bool is_root ) {
170
+ uint8_t value = is_root ;
171
+ * ((uint8_t * )(node + IS_ROOT_OFFSET )) = value ;
172
+ }
173
+
174
+ uint32_t * internal_node_num_keys (void * node ) {
175
+ return node + INTERNAL_NODE_NUM_KEYS_OFFSET ;
176
+ }
177
+
178
+ uint32_t * internal_node_right_child (void * node ) {
179
+ return node + INTERNAL_NODE_RIGHT_CHILD_OFFSET ;
180
+ }
181
+
182
+ uint32_t * internal_node_cell (void * node , uint32_t cell_num ) {
183
+ return node + INTERNAL_NODE_HEADER_SIZE + cell_num * INTERNAL_NODE_CELL_SIZE ;
184
+ }
185
+
186
+ uint32_t * internal_node_child (void * node , uint32_t child_num ) {
187
+ uint32_t num_keys = * internal_node_num_keys (node );
188
+ if (child_num > num_keys ) {
189
+ printf ("Tried to access child_num %d > num_keys %d\n" , child_num , num_keys );
190
+ exit (EXIT_FAILURE );
191
+ } else if (child_num == num_keys ) {
192
+ return internal_node_right_child (node );
193
+ } else {
194
+ return internal_node_cell (node , child_num );
195
+ }
196
+ }
197
+
198
+ uint32_t * internal_node_key (void * node , uint32_t key_num ) {
199
+ return internal_node_cell (node , key_num ) + INTERNAL_NODE_CHILD_SIZE ;
200
+ }
201
+
141
202
uint32_t * leaf_node_num_cells (void * node ) {
142
203
return node + LEAF_NODE_NUM_CELLS_OFFSET ;
143
204
}
@@ -154,6 +215,15 @@ void* leaf_node_value(void* node, uint32_t cell_num) {
154
215
return leaf_node_cell (node , cell_num ) + LEAF_NODE_KEY_SIZE ;
155
216
}
156
217
218
+ uint32_t get_node_max_key (void * node ) {
219
+ switch (get_node_type (node )) {
220
+ case NODE_INTERNAL :
221
+ return * internal_node_key (node , * internal_node_num_keys (node ) - 1 );
222
+ case NODE_LEAF :
223
+ return * leaf_node_key (node , * leaf_node_num_cells (node ) - 1 );
224
+ }
225
+ }
226
+
157
227
void print_constants () {
158
228
printf ("ROW_SIZE: %d\n" , ROW_SIZE );
159
229
printf ("COMMON_NODE_HEADER_SIZE: %d\n" , COMMON_NODE_HEADER_SIZE );
@@ -163,32 +233,6 @@ void print_constants() {
163
233
printf ("LEAF_NODE_MAX_CELLS: %d\n" , LEAF_NODE_MAX_CELLS );
164
234
}
165
235
166
- void print_leaf_node (void * node ) {
167
- uint32_t num_cells = * leaf_node_num_cells (node );
168
- printf ("leaf (size %d)\n" , num_cells );
169
- for (uint32_t i = 0 ; i < num_cells ; i ++ ) {
170
- uint32_t key = * leaf_node_key (node , i );
171
- printf (" - %d : %d\n" , i , key );
172
- }
173
- }
174
-
175
- void serialize_row (Row * source , void * destination ) {
176
- memcpy (destination + ID_OFFSET , & (source -> id ), ID_SIZE );
177
- memcpy (destination + USERNAME_OFFSET , & (source -> username ), USERNAME_SIZE );
178
- memcpy (destination + EMAIL_OFFSET , & (source -> email ), EMAIL_SIZE );
179
- }
180
-
181
- void deserialize_row (void * source , Row * destination ) {
182
- memcpy (& (destination -> id ), source + ID_OFFSET , ID_SIZE );
183
- memcpy (& (destination -> username ), source + USERNAME_OFFSET , USERNAME_SIZE );
184
- memcpy (& (destination -> email ), source + EMAIL_OFFSET , EMAIL_SIZE );
185
- }
186
-
187
- void initialize_leaf_node (void * node ) {
188
- set_node_type (node , NODE_LEAF );
189
- * leaf_node_num_cells (node ) = 0 ;
190
- }
191
-
192
236
void * get_page (Pager * pager , uint32_t page_num ) {
193
237
if (page_num > TABLE_MAX_PAGES ) {
194
238
printf ("Tried to fetch page number out of bounds. %d > %d\n" , page_num ,
@@ -225,6 +269,67 @@ void* get_page(Pager* pager, uint32_t page_num) {
225
269
return pager -> pages [page_num ];
226
270
}
227
271
272
+ void indent (uint32_t level ) {
273
+ for (uint32_t i = 0 ; i < level ; i ++ ) {
274
+ printf (" " );
275
+ }
276
+ }
277
+
278
+ void print_tree (Pager * pager , uint32_t page_num , uint32_t indentation_level ) {
279
+ void * node = get_page (pager , page_num );
280
+ uint32_t num_keys , child ;
281
+
282
+ switch (get_node_type (node )) {
283
+ case (NODE_LEAF ):
284
+ num_keys = * leaf_node_num_cells (node );
285
+ indent (indentation_level );
286
+ printf ("- leaf (size %d)\n" , num_keys );
287
+ for (uint32_t i = 0 ; i < num_keys ; i ++ ) {
288
+ indent (indentation_level + 1 );
289
+ printf ("- %d\n" , * leaf_node_key (node , i ));
290
+ }
291
+ break ;
292
+ case (NODE_INTERNAL ):
293
+ num_keys = * internal_node_num_keys (node );
294
+ indent (indentation_level );
295
+ printf ("- internal (size %d)\n" , num_keys );
296
+ for (uint32_t i = 0 ; i < num_keys ; i ++ ) {
297
+ child = * internal_node_child (node , i );
298
+ print_tree (pager , child , indentation_level + 1 );
299
+
300
+ indent (indentation_level + 1 );
301
+ printf ("- key %d\n" , * internal_node_key (node , i ));
302
+ }
303
+ child = * internal_node_right_child (node );
304
+ print_tree (pager , child , indentation_level + 1 );
305
+ break ;
306
+ }
307
+ }
308
+
309
+ void serialize_row (Row * source , void * destination ) {
310
+ memcpy (destination + ID_OFFSET , & (source -> id ), ID_SIZE );
311
+ memcpy (destination + USERNAME_OFFSET , & (source -> username ), USERNAME_SIZE );
312
+ memcpy (destination + EMAIL_OFFSET , & (source -> email ), EMAIL_SIZE );
313
+ }
314
+
315
+ void deserialize_row (void * source , Row * destination ) {
316
+ memcpy (& (destination -> id ), source + ID_OFFSET , ID_SIZE );
317
+ memcpy (& (destination -> username ), source + USERNAME_OFFSET , USERNAME_SIZE );
318
+ memcpy (& (destination -> email ), source + EMAIL_OFFSET , EMAIL_SIZE );
319
+ }
320
+
321
+ void initialize_leaf_node (void * node ) {
322
+ set_node_type (node , NODE_LEAF );
323
+ set_node_root (node , false);
324
+ * leaf_node_num_cells (node ) = 0 ;
325
+ }
326
+
327
+ void initialize_internal_node (void * node ) {
328
+ set_node_type (node , NODE_INTERNAL );
329
+ set_node_root (node , false);
330
+ * internal_node_num_keys (node ) = 0 ;
331
+ }
332
+
228
333
Cursor * table_start (Table * table ) {
229
334
Cursor * cursor = malloc (sizeof (Cursor ));
230
335
cursor -> table = table ;
@@ -342,6 +447,7 @@ Table* db_open(const char* filename) {
342
447
// New database file. Initialize page 0 as leaf node.
343
448
void * root_node = get_page (pager , 0 );
344
449
initialize_leaf_node (root_node );
450
+ set_node_root (root_node , true);
345
451
}
346
452
347
453
return table ;
@@ -427,7 +533,7 @@ MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) {
427
533
exit (EXIT_SUCCESS );
428
534
} else if (strcmp (input_buffer -> buffer , ".btree" ) == 0 ) {
429
535
printf ("Tree:\n" );
430
- print_leaf_node ( get_page ( table -> pager , 0 ) );
536
+ print_tree ( table -> pager , 0 , 0 );
431
537
return META_COMMAND_SUCCESS ;
432
538
} else if (strcmp (input_buffer -> buffer , ".constants" ) == 0 ) {
433
539
printf ("Constants:\n" );
@@ -481,14 +587,96 @@ PrepareResult prepare_statement(InputBuffer* input_buffer,
481
587
return PREPARE_UNRECOGNIZED_STATEMENT ;
482
588
}
483
589
590
+ /*
591
+ Until we start recycling free pages, new pages will always
592
+ go onto the end of the database file
593
+ */
594
+ uint32_t get_unused_page_num (Pager * pager ) { return pager -> num_pages ; }
595
+
596
+ void create_new_root (Table * table , uint32_t right_child_page_num ) {
597
+ /*
598
+ Handle splitting the root.
599
+ Old root copied to new page, becomes left child.
600
+ Address of right child passed in.
601
+ Re-initialize root page to contain the new root node.
602
+ New root node points to two children.
603
+ */
604
+
605
+ void * root = get_page (table -> pager , table -> root_page_num );
606
+ void * right_child = get_page (table -> pager , right_child_page_num );
607
+ uint32_t left_child_page_num = get_unused_page_num (table -> pager );
608
+ void * left_child = get_page (table -> pager , left_child_page_num );
609
+
610
+ /* Left child has data copied from old root */
611
+ memcpy (left_child , root , PAGE_SIZE );
612
+ set_node_root (left_child , false);
613
+
614
+ /* Root node is a new internal node with one key and two children */
615
+ initialize_internal_node (root );
616
+ set_node_root (root , true);
617
+ * internal_node_num_keys (root ) = 1 ;
618
+ * internal_node_child (root , 0 ) = left_child_page_num ;
619
+ uint32_t left_child_max_key = get_node_max_key (left_child );
620
+ * internal_node_key (root , 0 ) = left_child_max_key ;
621
+ * internal_node_right_child (root ) = right_child_page_num ;
622
+ }
623
+
624
+ void leaf_node_split_and_insert (Cursor * cursor , uint32_t key , Row * value ) {
625
+ /*
626
+ Create a new node and move half the cells over.
627
+ Insert the new value in one of the two nodes.
628
+ Update parent or create a new parent.
629
+ */
630
+
631
+ void * old_node = get_page (cursor -> table -> pager , cursor -> page_num );
632
+ uint32_t new_page_num = get_unused_page_num (cursor -> table -> pager );
633
+ void * new_node = get_page (cursor -> table -> pager , new_page_num );
634
+ initialize_leaf_node (new_node );
635
+
636
+ /*
637
+ All existing keys plus new key should should be divided
638
+ evenly between old (left) and new (right) nodes.
639
+ Starting from the right, move each key to correct position.
640
+ */
641
+ for (int32_t i = LEAF_NODE_MAX_CELLS ; i >= 0 ; i -- ) {
642
+ void * destination_node ;
643
+ if (i >= LEAF_NODE_LEFT_SPLIT_COUNT ) {
644
+ destination_node = new_node ;
645
+ } else {
646
+ destination_node = old_node ;
647
+ }
648
+ uint32_t index_within_node = i % LEAF_NODE_LEFT_SPLIT_COUNT ;
649
+ void * destination = leaf_node_cell (destination_node , index_within_node );
650
+
651
+ if (i == cursor -> cell_num ) {
652
+ serialize_row (value , destination );
653
+ } else if (i > cursor -> cell_num ) {
654
+ memcpy (destination , leaf_node_cell (old_node , i - 1 ), LEAF_NODE_CELL_SIZE );
655
+ } else {
656
+ memcpy (destination , leaf_node_cell (old_node , i ), LEAF_NODE_CELL_SIZE );
657
+ }
658
+ }
659
+
660
+ /* Update cell count on both leaf nodes */
661
+ * (leaf_node_num_cells (old_node )) = LEAF_NODE_LEFT_SPLIT_COUNT ;
662
+ * (leaf_node_num_cells (new_node )) = LEAF_NODE_RIGHT_SPLIT_COUNT ;
663
+
664
+ if (is_node_root (old_node )) {
665
+ return create_new_root (cursor -> table , new_page_num );
666
+ } else {
667
+ printf ("Need to implement updating parent after split\n" );
668
+ exit (EXIT_FAILURE );
669
+ }
670
+ }
671
+
484
672
void leaf_node_insert (Cursor * cursor , uint32_t key , Row * value ) {
485
673
void * node = get_page (cursor -> table -> pager , cursor -> page_num );
486
674
487
675
uint32_t num_cells = * leaf_node_num_cells (node );
488
676
if (num_cells >= LEAF_NODE_MAX_CELLS ) {
489
677
// Node full
490
- printf ( "Need to implement splitting a leaf node.\n" );
491
- exit ( EXIT_FAILURE ) ;
678
+ leaf_node_split_and_insert ( cursor , key , value );
679
+ return ;
492
680
}
493
681
494
682
if (cursor -> cell_num < num_cells ) {
@@ -507,9 +695,6 @@ void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
507
695
ExecuteResult execute_insert (Statement * statement , Table * table ) {
508
696
void * node = get_page (table -> pager , table -> root_page_num );
509
697
uint32_t num_cells = (* leaf_node_num_cells (node ));
510
- if (num_cells >= LEAF_NODE_MAX_CELLS ) {
511
- return EXECUTE_TABLE_FULL ;
512
- }
513
698
514
699
Row * row_to_insert = & (statement -> row_to_insert );
515
700
uint32_t key_to_insert = row_to_insert -> id ;
0 commit comments