From 22f68b1aa053f9d1f56599500faff87f2a12a731 Mon Sep 17 00:00:00 2001 From: Seph Gentle Date: Sat, 20 Jul 2024 09:20:55 +1000 Subject: [PATCH] Changed DT-CRDT to use faster btree impl. Changed run-on-old to merge delete operations. Added stats tracking. Perf increased. Disabled positional updates & OT code in DT-CRDT - see dt-crdt-fully-working branch for older version of the code. --- crates/diamond-types-crdt/Cargo.toml | 1 + crates/diamond-types-crdt/src/crdtspan.rs | 4 +- crates/diamond-types-crdt/src/lib.rs | 4 + crates/diamond-types-crdt/src/list/check.rs | 54 +- crates/diamond-types-crdt/src/list/doc.rs | 409 ++-- .../src/list/encoding/mod.rs | 2 +- .../src/list/encoding/patch_encoding.rs | 53 +- crates/diamond-types-crdt/src/list/eq.rs | 4 +- .../src/list/external_txn.rs | 32 +- crates/diamond-types-crdt/src/list/markers.rs | 240 +- .../src/list/merge_positional.rs | 528 ++--- crates/diamond-types-crdt/src/list/mod.rs | 77 +- .../src/list/ot/positionmap.rs | 268 +-- crates/diamond-types-crdt/src/list/span.rs | 62 +- crates/diamond-types-crdt/src/list/stats.rs | 60 + .../src/list/time/docpatchiter.rs | 508 ++-- .../src/list/time/external_patches.rs | 14 +- .../src/list/time/history.rs | 2 +- .../src/list/time/positionmap.rs | 1400 ++++++----- .../src/ost/content_tree.rs | 2097 +++++++++++++++++ crates/diamond-types-crdt/src/ost/ct_old.rs | 419 ++++ .../diamond-types-crdt/src/ost/index_tree.rs | 1902 +++++++++++++++ crates/diamond-types-crdt/src/ost/mod.rs | 221 ++ .../src/ost/recording_index_tree.rs | 106 + crates/diamond-types-crdt/tests/fuzzer.rs | 32 +- crates/diamond-types-crdt/tests/realworld.rs | 18 +- .../tests/realworld_positional.rs | 162 +- crates/run_on_old/Cargo.toml | 3 +- crates/run_on_old/src/main.rs | 33 +- src/ost/content_tree.rs | 31 + 30 files changed, 6903 insertions(+), 1843 deletions(-) create mode 100644 crates/diamond-types-crdt/src/list/stats.rs create mode 100644 crates/diamond-types-crdt/src/ost/content_tree.rs create mode 100644 crates/diamond-types-crdt/src/ost/ct_old.rs create mode 100644 crates/diamond-types-crdt/src/ost/index_tree.rs create mode 100644 crates/diamond-types-crdt/src/ost/mod.rs create mode 100644 crates/diamond-types-crdt/src/ost/recording_index_tree.rs diff --git a/crates/diamond-types-crdt/Cargo.toml b/crates/diamond-types-crdt/Cargo.toml index 3b2c451..80995da 100644 --- a/crates/diamond-types-crdt/Cargo.toml +++ b/crates/diamond-types-crdt/Cargo.toml @@ -40,6 +40,7 @@ json_minimal = "0.1.3" memusage = [] inlinerope = [] serde = ["dep:serde", "smallvec/serde", "smartstring/serde"] +stats = [] [lib] bench = false diff --git a/crates/diamond-types-crdt/src/crdtspan.rs b/crates/diamond-types-crdt/src/crdtspan.rs index 32ac977..6daa04b 100644 --- a/crates/diamond-types-crdt/src/crdtspan.rs +++ b/crates/diamond-types-crdt/src/crdtspan.rs @@ -1,7 +1,7 @@ +use content_tree::ContentLength; use diamond_core_old::CRDTId; use rle::{HasLength, MergableSpan, SplitableSpanHelpers}; -use content_tree::ContentLength; use rle::Searchable; #[derive(Debug, Copy, Clone, Default, Eq, PartialEq)] @@ -20,7 +20,7 @@ impl Searchable for CRDTSpan { if self.loc.agent == loc.agent && loc.seq >= self.loc.seq && loc.seq < self.loc.seq + self.len { - Some((loc.seq - self.loc.seq) as usize) + Some(loc.seq - self.loc.seq) } else { None } } diff --git a/crates/diamond-types-crdt/src/lib.rs b/crates/diamond-types-crdt/src/lib.rs index 286fbcc..8fd05d4 100644 --- a/crates/diamond-types-crdt/src/lib.rs +++ b/crates/diamond-types-crdt/src/lib.rs @@ -12,6 +12,10 @@ mod unicount; mod crdtspan; mod rangeextra; mod dtrange; +mod ost; + +#[cfg(feature = "stats")] +pub use list::stats::take_stats; // TODO: Move this somewhere else. pub fn root_id() -> RemoteId { diff --git a/crates/diamond-types-crdt/src/list/check.rs b/crates/diamond-types-crdt/src/list/check.rs index 0bc7a23..7a2df7b 100644 --- a/crates/diamond-types-crdt/src/list/check.rs +++ b/crates/diamond-types-crdt/src/list/check.rs @@ -2,6 +2,7 @@ use jumprope::JumpRope; use crate::list::{ListCRDT, ROOT_LV}; use rle::HasLength; use smallvec::{SmallVec, smallvec}; +use crate::ost::content_tree::Content; /// This file contains debugging assertions to validate the document's internal state. /// @@ -21,8 +22,10 @@ impl ListCRDT { pub fn check(&self, deep: bool) { // self.index.check(); + self.range_tree.dbg_check(); + if let Some(text) = self.text_content.as_ref() { - assert_eq!(self.range_tree.content_len() as usize, text.len_chars()); + assert_eq!(self.range_tree.total_len() as usize, text.len_chars()); let num_deleted_items = self.deletes.iter().fold(0, |x, y| x + y.len()); if let Some(del_content) = self.deleted_content.as_ref() { @@ -30,27 +33,24 @@ impl ListCRDT { } } - let mut cursor = self.range_tree.cursor_at_start(); - loop { - // The call to cursor.next() places the cursor at the next item before returning. - let this_cursor = cursor.clone(); - - if let Some(e) = cursor.next() { // Iterating manually for the borrow checker. - // Each item's ID should come after its origin left and right - assert!(e.origin_left == ROOT_LV || e.lv > e.origin_left); - assert!(e.origin_right == ROOT_LV || e.lv > e.origin_right); - assert_ne!(e.len, 0); - - if deep { - // Also check that the origin left appears before this entry, and origin right - // appears after it. - let left = self.get_cursor_after(e.origin_left, true); - assert!(left <= this_cursor); - - let right = self.get_cursor_before(e.origin_right); - assert!(this_cursor < right); - } - } else { break; } + for e in self.range_tree.iter() { + // Each item's ID should come after its origin left and right + assert!(e.origin_left == ROOT_LV || e.lv > e.origin_left); + if e.origin_left != ROOT_LV || e.origin_right != ROOT_LV { + assert_ne!(e.origin_left, e.origin_right); + } + assert!(e.origin_right == ROOT_LV || e.lv > e.origin_right); + assert_ne!(e.len, 0); + + // if deep { + // // Also check that the origin left appears before this entry, and origin right + // // appears after it. + // let left = self.get_cursor_after(e.origin_left, true); + // assert!(left <= this_cursor); + // + // let right = self.get_cursor_before(e.origin_right); + // assert!(this_cursor < right); + // } } if deep { @@ -61,9 +61,13 @@ impl ListCRDT { fn check_index(&self) { // Go through each entry in the range tree and make sure we can find it using the index. - for entry in self.range_tree.raw_iter() { - let marker = self.marker_at(entry.lv); - unsafe { marker.as_ref() }.find(entry.lv).unwrap(); + for (leaf_idx, items) in self.range_tree.iter_leaves() { + for e in items { + if !e.exists() { break; } + + let marker = self.marker_at(e.lv); + assert_eq!(marker.0, leaf_idx); + } } } diff --git a/crates/diamond-types-crdt/src/list/doc.rs b/crates/diamond-types-crdt/src/list/doc.rs index 7612d20..ce58c6d 100644 --- a/crates/diamond-types-crdt/src/list/doc.rs +++ b/crates/diamond-types-crdt/src/list/doc.rs @@ -1,24 +1,33 @@ -use crate::list::*; -// use crate::content_tree::*; -use smallvec::smallvec; -use std::ptr::NonNull; -use rle::{MergeableIterator, HasLength}; use std::cmp::Ordering; -use crate::rle::RleVec; use std::mem::replace; -use crate::list::external_txn::{RemoteTxn, RemoteCRDTOp}; -use crate::unicount::{split_at_char, count_chars, consume_chars}; -use crate::list::ot::traversal::TraversalOp; -use crate::list::ot::transform; +use std::ops::Range; + +use humansize::{file_size_opts, FileSize}; +// use crate::content_tree::*; +use smallvec::smallvec; + use diamond_core_old::*; -use crate::crdtspan::CRDTSpan; +use rle::{HasLength, MergeableIterator, RleRun}; use rle::Searchable; + +use crate::crdtspan::CRDTSpan; +use crate::list::*; use crate::list::branch::advance_branch_by_known; -use std::ops::Range; -use humansize::{file_size_opts, FileSize}; -use crate::list::positional::InsDelTag::*; +use crate::list::external_txn::{RemoteCRDTOp, RemoteTxn}; +use crate::list::InsDelTag::{Del, Ins}; +use crate::list::ot::transform; +use crate::list::ot::traversal::TraversalOp; +use crate::list::positional::PositionalOpRef; +use crate::list::stats::{marker_a, marker_b, marker_c}; +// use crate::list::ot::transform; +// use crate::list::ot::traversal::TraversalOp; +// use crate::list::positional::{PositionalComponent, PositionalOpRef}; +// use crate::list::positional::InsDelTag::*; +use crate::ost::*; +use crate::ost::content_tree::{Content, ContentCursor, DeltaCursor}; use crate::rangeextra::OrderRange; -use crate::list::positional::{PositionalComponent, PositionalOpRef}; +use crate::rle::RleVec; +use crate::unicount::{consume_chars, count_chars, split_at_char}; impl ClientData { pub fn get_next_seq(&self) -> usize { @@ -41,9 +50,9 @@ impl ClientData { } } -pub(super) fn notify_for(index: &mut SpaceIndex) -> impl FnMut(YjsSpan, NonNull>) + '_ { +pub(super) fn notify_for<'a>(index: &'a mut SpaceIndex) -> impl FnMut(YjsSpan, LeafIdx) + 'a { move |entry: YjsSpan, leaf| { - index.set_range_2(entry.lv..entry.lv + entry.len(), Marker(Some(leaf))); + index.set_range_2(entry.lv..entry.lv + entry.len(), Marker(leaf)); // let mut len = entry.len(); // let mut lv = entry.lv; @@ -64,22 +73,14 @@ pub(super) fn notify_for(index: &mut SpaceIndex) -> impl FnMut(YjsSpan, NonNull< impl Clone for ListCRDT { fn clone(&self) -> Self { - // Clone is complex because we need to walk the b-tree. Cloning the b-tree could probably - // be done in a more efficient way, but this is honestly fine. - let mut range_tree = ContentTreeRaw::new(); - let mut index = IndexTree::new(); - - let mut cursor = range_tree.mut_cursor_at_start(); - for e in self.range_tree.iter() { - cursor.insert_notify(e, notify_for(&mut index)); - } - let result = ListCRDT { frontier: self.frontier.clone(), client_with_time: self.client_with_time.clone(), client_data: self.client_data.clone(), - range_tree, - index, + + // This is fine. + range_tree: self.range_tree.clone(), + index: self.index.clone(), deletes: self.deletes.clone(), double_deletes: self.double_deletes.clone(), txns: self.txns.clone(), @@ -99,7 +100,7 @@ impl ListCRDT { frontier: smallvec![ROOT_LV], client_data: vec![], - range_tree: ContentTreeRaw::new(), + range_tree: ContentTree::new(), index: IndexTree::new(), // index: SplitList::new(), @@ -201,60 +202,114 @@ impl ListCRDT { &self.frontier } - pub(super) fn marker_at(&self, time: LV) -> NonNull> { - self.index.get_entry(time).val.0.unwrap() + pub(super) fn marker_at(&self, time: LV) -> Marker { + self.index.get_entry(time).val // let cursor = self.index.cursor_at_offset_pos(time as usize, false); // // Gross. // cursor.get_item().unwrap().unwrap() } - pub(crate) fn get_unsafe_cursor_before(&self, time: LV) -> UnsafeCursor { - if time == ROOT_LV { - // Or maybe we should just abort? - self.range_tree.unsafe_cursor_at_end() - } else { - let marker = self.marker_at(time); - unsafe { - ContentTreeRaw::unsafe_cursor_before_item(time, marker) - } - } - } + // pub(crate) fn get_unsafe_cursor_before(&self, time: LV) -> UnsafeCursor { + // if time == ROOT_LV { + // // Or maybe we should just abort? + // self.range_tree.unsafe_cursor_at_end() + // } else { + // let marker = self.marker_at(time); + // unsafe { + // ContentTreeRaw::unsafe_cursor_before_item(time, marker) + // } + // } + // } #[inline(always)] - pub(crate) fn get_cursor_before(&self, time: LV) -> Cursor { - unsafe { Cursor::unchecked_from_raw(&self.range_tree, self.get_unsafe_cursor_before(time)) } + pub(crate) fn get_cursor_before(&self, lv: LV) -> ContentCursor { + if lv == usize::MAX { + // This case doesn't seem to ever get hit by the fuzzer. It might be equally correct to + // just panic() here. + self.range_tree.cursor_at_end() + // panic!() + } else { + // self.check(true); + let leaf_idx = self.marker_at(lv); + self.range_tree.cursor_before_item(lv, leaf_idx.0) + } + // unsafe { Cursor::unchecked_from_raw(&self.range_tree, self.get_unsafe_cursor_before(time)) } } + #[inline(always)] - pub(crate) fn get_mut_cursor_before(&mut self, time: LV) -> MutCursor { - let unsafe_cursor = self.get_unsafe_cursor_before(time); - unsafe { MutCursor::unchecked_from_raw(&mut self.range_tree, unsafe_cursor) } + pub(crate) fn get_mut_cursor_before(&mut self, lv: LV) -> DeltaCursor { + if lv == usize::MAX { + // This case doesn't seem to ever get hit by the fuzzer. It might be equally correct to + // just panic() here. + self.range_tree.mut_cursor_at_end() + // panic!() + } else { + let leaf_idx = self.marker_at(lv); + // marker_a(); + self.range_tree.mut_cursor_before_item(lv, leaf_idx.0) + } } - // This does not stick_end to the found item. - pub(super) fn get_unsafe_cursor_after(&self, time: LV, stick_end: bool) -> UnsafeCursor { - if time == ROOT_LV { - self.range_tree.unsafe_cursor_at_start() + fn get_cursor_after(&self, lv: LV, stick_end: bool) -> ContentCursor { + if lv == usize::MAX { + self.range_tree.cursor_at_start_nothing_emplaced() } else { - let marker = self.marker_at(time); + let leaf_idx = self.marker_at(lv).0; // let marker: NonNull> = self.markers.at(order as usize).unwrap(); // self.content_tree. - let mut cursor = unsafe { - ContentTreeRaw::unsafe_cursor_before_item(time, marker) - }; + let mut cursor = self.range_tree.cursor_before_item(lv, leaf_idx); // The cursor points to parent. This is safe because of guarantees provided by // cursor_before_item. - cursor.offset += 1; - if !stick_end { cursor.roll_to_next_entry(); } + cursor.inc_offset(&self.range_tree); + if !stick_end { cursor.roll_next_item(&self.range_tree); } cursor } } + fn get_mut_cursor_after(&mut self, lv: LV, stick_end: bool) -> DeltaCursor { + if lv == usize::MAX { + self.range_tree.mut_cursor_at_start() + } else { + let leaf_idx = self.marker_at(lv).0; + // let marker: NonNull> = self.markers.at(order as usize).unwrap(); + // self.content_tree. + // marker_b(); - // TODO: Can I remove the stick_end field here? - #[inline(always)] - pub(crate) fn get_cursor_after(&self, time: LV, stick_end: bool) -> Cursor { - unsafe { Cursor::unchecked_from_raw(&self.range_tree, self.get_unsafe_cursor_after(time, stick_end)) } + let mut cursor = self.range_tree.mut_cursor_before_item(lv, leaf_idx); + // The cursor points to parent. This is safe because of guarantees provided by + // cursor_before_item. + cursor.0.inc_offset(&self.range_tree); + if !stick_end { cursor.roll_next_item(&mut self.range_tree); } + cursor + } } + + + // // This does not stick_end to the found item. + // pub(super) fn get_unsafe_cursor_after(&self, time: LV, stick_end: bool) -> UnsafeCursor { + // if time == ROOT_LV { + // self.range_tree.unsafe_cursor_at_start() + // } else { + // let marker = self.marker_at(time); + // // let marker: NonNull> = self.markers.at(order as usize).unwrap(); + // // self.content_tree. + // let mut cursor = unsafe { + // ContentTreeRaw::unsafe_cursor_before_item(time, marker) + // }; + // // The cursor points to parent. This is safe because of guarantees provided by + // // cursor_before_item. + // cursor.offset += 1; + // if !stick_end { cursor.roll_to_next_entry(); } + // cursor + // } + // } + + // // TODO: Can I remove the stick_end field here? + // #[inline(always)] + // pub(crate) fn get_cursor_after(&self, time: LV, stick_end: bool) -> Cursor { + // unsafe { Cursor::unchecked_from_raw(&self.range_tree, self.get_unsafe_cursor_after(time, stick_end)) } + // } + pub(super) fn assign_lv_to_client(&mut self, loc: CRDTId, time: LV, len: usize) { self.client_with_time.push(KVPair(time, CRDTSpan { loc, @@ -272,7 +327,8 @@ impl ListCRDT { span.1.len - span_offset } - pub(super) fn integrate(&mut self, agent: AgentId, item: YjsSpan, ins_content: Option<&str>, cursor_hint: Option>) { + // pub(super) fn integrate(&mut self, agent: AgentId, item: YjsSpan, ins_content: Option<&str>, cursor_hint: Option>) { + pub(super) fn integrate(&mut self, agent: AgentId, item: YjsSpan, ins_content: Option<&str>, mut cursor: DeltaCursor) { // if cfg!(debug_assertions) { // let next_order = self.get_next_order(); // assert_eq!(item.order, next_order); @@ -283,51 +339,65 @@ impl ListCRDT { // self.assign_order_to_client(loc, item.order, item.len as _); // Ok now that's out of the way, lets integrate! - let mut cursor = cursor_hint.map_or_else(|| { - self.get_unsafe_cursor_after(item.origin_left, false) - }, |mut c| { - // Ideally this wouldn't be necessary. - c.roll_to_next_entry(); - c - }); + // let mut cursor = cursor_hint.map_or_else(|| { + // self.get_unsafe_cursor_after(item.origin_left, false) + // }, |mut c| { + // // Ideally this wouldn't be necessary. + // c.roll_to_next_entry(); + // c + // }); + cursor.roll_next_item(&mut self.range_tree); // let mut cursor = cursor_hint.unwrap_or_else(|| { // self.get_unsafe_cursor_after(item.origin_left, false) // }); // These are almost never used. Could avoid the clone here... though its pretty cheap. - let left_cursor = cursor.clone(); - let mut scan_start = cursor.clone(); + let left_cursor = cursor.0.clone(); + let mut scan_cursor = cursor.0.clone(); let mut scanning = false; loop { - let other_order = match unsafe { cursor.unsafe_get_item() } { - None => { break; } // End of the document - Some(o) => { o } - }; + if !cursor.roll_next_item(&mut self.range_tree) { // End of the document + break; + } + + // let other_order = match unsafe { cursor.unsafe_get_item() } { + // None => { break; } // End of the document + // Some(o) => { o } + // }; + + let other_entry = *cursor.0.get_item(&self.range_tree).0; + let other_lv = other_entry.lv + cursor.0.offset; // Almost always true. Could move this short circuit earlier? - if other_order == item.origin_right { break; } + if other_lv == item.origin_right { break; } + + // We're now in the rare case there's actually concurrent inserts. To make the logic + // simpler, at this point we'll zero out the delta. + cursor.flush_delta_and_clear(&mut self.range_tree); + + debug_assert_eq!(cursor.1, LenUpdate::default()); // This code could be better optimized, but its already O(n * log n), and its extremely // rare that you actually get concurrent inserts at the same location in the document // anyway. - let other_entry = *cursor.get_raw_entry(); + // let other_entry = *cursor.get_raw_entry(); // let other_order = other_entry.order + cursor.offset as u32; - let other_left_order = other_entry.origin_left_at_offset(cursor.offset); - let other_left_cursor = self.get_unsafe_cursor_after(other_left_order, false); + let other_left_order = other_entry.origin_left_at_offset(cursor.0.offset); + let other_left_cursor = self.get_cursor_after(other_left_order, false); // YjsMod semantics - match unsafe { other_left_cursor.unsafe_cmp(&left_cursor) } { + match other_left_cursor.cmp(&left_cursor, &self.range_tree) { Ordering::Less => { break; } // Top row Ordering::Greater => { } // Bottom row. Continue. Ordering::Equal => { if item.origin_right == other_entry.origin_right { // Items are concurrent and "double siblings". Order by agent names. let my_name = self.get_agent_name(agent); - let other_loc = self.client_with_time.get(other_order); + let other_loc = self.client_with_time.get(other_lv); let other_name = self.get_agent_name(other_loc.agent); // Its possible for a user to conflict with themself if they commit to @@ -340,21 +410,18 @@ impl ListCRDT { Ordering::Greater => false, }; - if ins_here { - // Insert here. - break; - } else { - scanning = false; - } + // Insert here. + if ins_here { break; } + else { scanning = false; } } else { // Set scanning based on how the origin_right entries are ordered. let my_right_cursor = self.get_cursor_before(item.origin_right); let other_right_cursor = self.get_cursor_before(other_entry.origin_right); - if other_right_cursor < my_right_cursor { + if other_right_cursor.cmp(&my_right_cursor, &self.range_tree) == Ordering::Less { if !scanning { scanning = true; - scan_start = cursor.clone(); + scan_cursor = cursor.0.clone(); } } else { scanning = false; @@ -372,24 +439,25 @@ impl ListCRDT { // The fuzzer says no, we don't need to do that. I assume its because internal entries // have higher origin_left, and thus they can't be peers with the newly inserted item // (which has a lower origin_left). - if !cursor.next_entry() { + if !cursor.0.next_entry(&self.range_tree).0 { // This is dirty. If the cursor can't move to the next entry, we still need to move // it to the end of the current element or we'll prepend. next_entry() doesn't do // that for some reason. TODO: Clean this up. - cursor.offset = other_entry.len(); + cursor.0.offset = other_entry.len(); break; } } - if scanning { cursor = scan_start; } + if scanning { cursor.0 = scan_cursor; } - if cfg!(debug_assertions) { - let pos = unsafe { cursor.unsafe_count_content_pos() as usize }; - let len = self.range_tree.content_len() as usize; - assert!(pos <= len); - } + // if cfg!(debug_assertions) { + // let pos = unsafe { cursor.unsafe_count_content_pos() as usize }; + // let len = self.range_tree.content_len() as usize; + // assert!(pos <= len); + // } + + let mut pos = cursor.0.get_pos(&self.range_tree); if let Some(text) = self.text_content.as_mut() { - let pos = unsafe { cursor.unsafe_count_content_pos() as usize }; if let Some(ins_content) = ins_content { // debug_assert_eq!(count_chars(&ins_content), item.len as usize); text.insert(pos, ins_content); @@ -411,7 +479,10 @@ impl ListCRDT { } // Now insert here. - unsafe { ContentTreeRaw::unsafe_insert_notify(&mut cursor, item, notify_for(&mut self.index)); } + // unsafe { ContentTreeRaw::unsafe_insert_notify(&mut cursor, item, notify_for(&mut self.index)); } + pos += item.content_len(); + self.range_tree.insert(item, &mut cursor, true, &mut notify_for(&mut self.index)); + self.range_tree.emplace_cursor(pos, cursor); // cursor } @@ -516,15 +587,26 @@ impl ListCRDT { pub(super) fn internal_mark_deleted(&mut self, id: LV, target: LV, max_len: usize, update_content: bool) -> LV { // TODO: Make this use mut_cursor instead. The problem is notify_for mutably borrows // self.index, and the cursor is borrowing self (rather than self.range_tree). - let mut cursor = self.get_unsafe_cursor_before(target); - self.internal_mark_deleted_at(&mut cursor, id, max_len, update_content) + // let mut cursor = self.get_unsafe_cursor_before(target); + let mut cursor = self.get_mut_cursor_before(target); + let result = self.internal_mark_deleted_at(&mut cursor, id, max_len, update_content); + self.range_tree.emplace_cursor_unknown(cursor); + result } - pub(super) fn internal_mark_deleted_at(&mut self, cursor: &mut <&RangeTree as Cursors>::UnsafeCursor, id: LV, max_len: usize, update_content: bool) -> LV { - let target = unsafe { cursor.unsafe_get_item().unwrap() }; + pub(super) fn internal_mark_deleted_at(&mut self, cursor: &mut DeltaCursor, id: LV, max_len: usize, update_content: bool) -> LV { + // let target = unsafe { cursor.unsafe_get_item().unwrap() }; + let (e, offset) = cursor.0.get_item(&self.range_tree); + let target = e.lv + offset; let (deleted_here, succeeded) = unsafe { - ContentTreeRaw::unsafe_remote_deactivate_notify(cursor, max_len as _, notify_for(&mut self.index)) + self.range_tree.mutate_entry(cursor, max_len, &mut notify_for(&mut self.index), |e| { + if e.len > 0 { + e.len = -e.len; + true + } else { false } + }) + // ContentTreeRaw::unsafe_remote_deactivate_notify(cursor, max_len as _, notify_for(&mut self.index)) }; // let deleted_here = deleted_here as u32; @@ -539,20 +621,23 @@ impl ListCRDT { } else if let (Some(text), true) = (&mut self.text_content, update_content) { // The call to remote_deactivate will have modified the cursor, but the content position // will have stayed the same. - let pos = unsafe { cursor.unsafe_count_content_pos() as usize }; - text.remove(pos..pos + deleted_here as usize); + let pos = cursor.0.get_pos(&self.range_tree); + // let pos = unsafe { cursor.unsafe_count_content_pos() as usize }; + text.remove(pos..pos + deleted_here); } deleted_here } pub fn apply_remote_txn(&mut self, txn: &RemoteTxn) { + // self.range_tree.dbg_check(); + let agent = self.get_or_create_agent_id(txn.id.agent.as_str()); let client = &self.client_data[agent as usize]; // let next_seq = client.get_next_seq(); // Check that the txn hasn't already been applied. - assert!(client.item_localtime.find(txn.id.seq).is_none()); + debug_assert!(client.item_localtime.find(txn.id.seq).is_none()); let first_time = self.get_next_lv(); let mut next_time = first_time; @@ -587,6 +672,8 @@ impl ListCRDT { // Apply the changes. for op in txn.ops.iter() { + // self.range_tree.dbg_check(); + match op { RemoteCRDTOp::Ins { origin_left, origin_right, len, content_known } => { // let ins_len = ins_content.chars().count(); @@ -598,11 +685,11 @@ impl ListCRDT { let origin_left = self.remote_id_to_order(origin_left); let origin_right = self.remote_id_to_order(origin_right); - if cfg!(debug_assertions) { - let left = self.get_cursor_after(origin_left, true); - let right = self.get_cursor_before(origin_right); - assert!(left <= right); - } + // if cfg!(debug_assertions) { + // let left = self.get_cursor_after(origin_left, true); + // let right = self.get_cursor_before(origin_right); + // assert!(left <= right); + // } let item = YjsSpan { lv: order, @@ -619,7 +706,10 @@ impl ListCRDT { None }; - self.integrate(agent, item, ins_content, None); + let cursor = self.get_mut_cursor_after(origin_left, false); + // cursor.0.inc_offset(&self.range_tree); + self.integrate(agent, item, ins_content, cursor); + // self.range_tree.dbg_check(); } RemoteCRDTOp::Del { id, len } => { @@ -676,8 +766,11 @@ impl ListCRDT { // len: *len, ptr: last_entry.ptr // }; // self.index.insert(&mut cursor, entry, null_notify); + + // self.range_tree.dbg_check(); } } + } assert!(content.is_empty()); @@ -701,8 +794,8 @@ impl ListCRDT { // for LocalOp { pos, ins_content, del_span } in local_ops { for c in op.components { - let pos = c.pos as usize; - let len = c.len as usize; + let pos = c.pos; + let len = c.len; match c.tag { Ins => { @@ -710,20 +803,31 @@ impl ListCRDT { let time = next_time; next_time += c.len; + // self.range_tree.dbg_check(); + // Find the preceding item and successor - let (origin_left, cursor) = if pos == 0 { - (ROOT_LV, self.range_tree.unsafe_cursor_at_start()) + let (origin_left, mut cursor) = if pos == 0 { + (ROOT_LV, self.range_tree.mut_cursor_at_start()) } else { - let mut cursor = self.range_tree.unsafe_cursor_at_content_pos((pos - 1) as usize, false); - let origin_left = unsafe { cursor.unsafe_get_item() }.unwrap(); - assert!(cursor.next_item()); + let mut cursor = self.range_tree.mut_cursor_before_cur_pos(pos - 1); + let (e, offset) = cursor.0.get_item(&self.range_tree); + let origin_left = e.lv + offset; + // if CHECK_TREES { assert_eq!(origin_left, origin_left_2); } + cursor.0.inc_offset(&self.range_tree); + (origin_left, cursor) }; // There's an open question of whether this should skip past deleted items. // It would be *correct* both ways, though you get slightly different merging // & pruning behaviour in each case. - let origin_right = unsafe { cursor.unsafe_get_item() }.unwrap_or(ROOT_LV); + let origin_right = if cursor.roll_next_item(&mut self.range_tree) { + cursor.0.try_get_item(&self.range_tree) + .map(|(span, offset)| span.lv + offset) + .unwrap_or(ROOT_LV) + } else { + ROOT_LV + }; let item = YjsSpan { lv: time, @@ -737,24 +841,41 @@ impl ListCRDT { Some(consume_chars(&mut op.content, len)) } else { None }; - self.integrate(agent, item, ins_content, Some(cursor)); + self.integrate(agent, item, ins_content, cursor); } Del => { - let deleted_items = self.range_tree.local_deactivate_at_content_notify(pos as usize, len, notify_for(&mut self.index)); + let mut cursor = self.range_tree.mut_cursor_before_cur_pos(pos); + cursor.roll_next_item(&mut self.range_tree); - // dbg!(&deleted_items); - let mut deleted_length = 0; // To check. - for item in deleted_items { - self.deletes.push(KVPair(next_time, TimeSpan { - start: item.lv, - len: item.len as usize - })); - deleted_length += item.len as usize; - next_time += item.len as usize; + let mut len_remaining = len; + + loop { + let del_here = self.range_tree.mutate_entry(&mut cursor, len_remaining, &mut notify_for(&mut self.index), |item| { + debug_assert!(item.len > 0); + + self.deletes.push(KVPair(next_time, TimeSpan { + start: item.lv, + len: item.len as usize + })); + + item.len = -item.len; + }).0; + next_time += del_here; + + len_remaining -= del_here; + if len_remaining == 0 { break; } + + self.range_tree.slide_cursor_to_next_content(&mut cursor.0, &mut cursor.1); } + + self.range_tree.emplace_cursor(pos, cursor); + // let deleted_items = self.range_tree.local_deactivate_at_content_notify(pos, len, notify_for(&mut self.index)); + + // dbg!(&deleted_items); + // I might be able to relax this, but we'd need to change del_span above. - assert_eq!(deleted_length, len); + // assert_eq!(deleted_length, len); if let Some(ref mut text) = self.text_content { if let Some(deleted_content) = self.deleted_content.as_mut() { @@ -827,18 +948,19 @@ impl ListCRDT { } pub fn len(&self) -> usize { - self.range_tree.content_len() + self.range_tree.total_len() } pub fn is_empty(&self) -> bool { - self.range_tree.content_len() != 0 + // self.range_tree.content_len() != 0 + self.range_tree.is_empty() } pub fn print_stats(&self, detailed: bool) { println!("Document of length {}", self.len()); - let ins_del_count = self.range_tree.raw_iter() - .map(|e| RleRun::new(e.is_activated(), e.len())) + let ins_del_count = self.range_tree.iter() + .map(|e| RleRun::new(e.takes_up_space(), e.len())) .merge_spans() .count(); println!("As alternating inserts and deletes: {} items", ins_del_count); @@ -847,7 +969,8 @@ impl ListCRDT { println!("Content memory size: {}", r.borrow().mem_size().file_size(file_size_opts::CONVENTIONAL).unwrap()); } - self.range_tree.print_stats("content", detailed); + // self.range_tree.print_stats("content", detailed); + // self.index.print_stats("index", detailed); // self.markers.print_rle_size(); self.deletes.print_stats("deletes", detailed); @@ -857,7 +980,7 @@ impl ListCRDT { #[allow(unused)] pub fn debug_print_segments(&self) { - for entry in self.range_tree.raw_iter() { + for entry in self.range_tree.iter() { let loc = self.get_crdt_location(entry.lv); println!("order {} len {} ({}) agent {} / {} <-> {}", entry.lv, entry.len(), entry.content_len(), loc.agent, entry.origin_left, entry.origin_right); } @@ -909,10 +1032,12 @@ impl Default for ListCRDT { #[cfg(test)] mod tests { - use crate::list::*; - use crate::list::external_txn::{RemoteTxn, RemoteId, RemoteCRDTOp}; use smallvec::smallvec; + + use crate::list::*; + use crate::list::external_txn::{RemoteCRDTOp, RemoteId, RemoteTxn}; use crate::list::ot::traversal::TraversalOp; + // use crate::list::ot::traversal::TraversalOp; use crate::root_id; #[test] diff --git a/crates/diamond-types-crdt/src/list/encoding/mod.rs b/crates/diamond-types-crdt/src/list/encoding/mod.rs index 8b88470..11ceefb 100644 --- a/crates/diamond-types-crdt/src/list/encoding/mod.rs +++ b/crates/diamond-types-crdt/src/list/encoding/mod.rs @@ -544,7 +544,7 @@ impl ListCRDT { ins_del_runs.push(PosNegRun((doc_next_order - next_order) as i32)); } - let mut entries = self.range_tree.raw_iter().collect::>(); + let mut entries = self.range_tree.iter().collect::>(); entries.sort_unstable_by_key(|e| e.lv); let mut fancy_runs = Vec::new(); diff --git a/crates/diamond-types-crdt/src/list/encoding/patch_encoding.rs b/crates/diamond-types-crdt/src/list/encoding/patch_encoding.rs index 22ef9cf..3ed4336 100644 --- a/crates/diamond-types-crdt/src/list/encoding/patch_encoding.rs +++ b/crates/diamond-types-crdt/src/list/encoding/patch_encoding.rs @@ -1,12 +1,12 @@ -use crate::list::{encoding, ListCRDT, LV, ROOT_LV}; -use crate::list::encoding::{Chunk, Parents, Run, SpanWriter}; -use crate::list::positional::InsDelTag; -use crate::rangeextra::OrderRange; -use crate::rle::{KVPair, RleSpanHelpers, RleVec}; -use crate::list::encoding::varint::{num_encode_i64_with_extra_bit, mix_bit_u64, encode_u64, encode_u32}; use smallvec::smallvec; + use rle::{HasLength, MergableSpan}; + use crate::dtrange::DTRange; +use crate::list::{encoding, ListCRDT, LV, ROOT_LV}; +use crate::list::encoding::{Chunk, Parents, Run, SpanWriter}; +use crate::list::encoding::varint::{encode_u32, encode_u64, mix_bit_u64, num_encode_i64_with_extra_bit}; +use crate::rle::{KVPair, RleSpanHelpers, RleVec}; #[derive(Debug, Eq, PartialEq, Clone, Copy)] pub struct EditRun { @@ -127,26 +127,27 @@ impl ListCRDT { let mut next_output_order = 0; let mut last_edit_pos: usize = 0; - for (range, patch) in self.iter_original_patches() { - // dbg!(&range); - w.push(EditRun { - diff: isize::wrapping_sub(patch.pos as isize,last_edit_pos as isize), - len: patch.len, - is_delete: patch.tag == InsDelTag::Del, - backspace_mode: false, // Filled in by the appending code (above). - }); - last_edit_pos = patch.pos; - if patch.tag == InsDelTag::Ins { last_edit_pos += patch.len; } - - if range.start != next_output_order { - // To cut down on allocations and copying, these maps are both lazy. They only - // contain entries where the output orders don't match the current document orders. - outer_to_inner_map.push(KVPair(next_output_order, range.clone().into())); - inner_to_outer_map.insert(KVPair(range.start, range.transpose(next_output_order).into())); - } - - next_output_order += range.order_len(); - } + todo!(); + // for (range, patch) in self.iter_original_patches() { + // // dbg!(&range); + // w.push(EditRun { + // diff: isize::wrapping_sub(patch.pos as isize,last_edit_pos as isize), + // len: patch.len, + // is_delete: patch.tag == InsDelTag::Del, + // backspace_mode: false, // Filled in by the appending code (above). + // }); + // last_edit_pos = patch.pos; + // if patch.tag == InsDelTag::Ins { last_edit_pos += patch.len; } + // + // if range.start != next_output_order { + // // To cut down on allocations and copying, these maps are both lazy. They only + // // contain entries where the output orders don't match the current document orders. + // outer_to_inner_map.push(KVPair(next_output_order, range.clone().into())); + // inner_to_outer_map.insert(KVPair(range.start, range.transpose(next_output_order).into())); + // } + // + // next_output_order += range.order_len(); + // } let patch_data = w.flush_into_inner(); // dbg!(&outer_to_inner_map); // dbg!(&inner_to_outer_map); diff --git a/crates/diamond-types-crdt/src/list/eq.rs b/crates/diamond-types-crdt/src/list/eq.rs index 24c7c17..3baa8d3 100644 --- a/crates/diamond-types-crdt/src/list/eq.rs +++ b/crates/diamond-types-crdt/src/list/eq.rs @@ -88,7 +88,7 @@ impl PartialEq for ListCRDT { let mut a_items: RleVec = RleVec::new(); let mut b_items: RleVec = RleVec::new(); - for mut entry in self.range_tree.raw_iter() { + for mut entry in self.range_tree.iter() { // dbg!(entry); // Map the entry to a. The entry could be a mix from multiple user agents. Split it // up if so. @@ -112,7 +112,7 @@ impl PartialEq for ListCRDT { } } } - for entry in other.range_tree.raw_iter() { + for entry in other.range_tree.iter() { b_items.push(entry); } // dbg!(&a_items, &b_items); diff --git a/crates/diamond-types-crdt/src/list/external_txn.rs b/crates/diamond-types-crdt/src/list/external_txn.rs index 83f8dea..a20daec 100644 --- a/crates/diamond-types-crdt/src/list/external_txn.rs +++ b/crates/diamond-types-crdt/src/list/external_txn.rs @@ -7,7 +7,6 @@ use serde::{Deserialize, Serialize}; use smallvec::{SmallVec, smallvec}; use smartstring::alias::String as SmartString; -use content_tree::Toggleable; use diamond_core_old::{AgentId, CRDT_DOC_ROOT, CRDTId}; use rle::{AppendRle, HasLength, MergableSpan}; @@ -63,6 +62,15 @@ pub enum RemoteCRDTOp { } } +impl HasLength for RemoteCRDTOp { + fn len(&self) -> usize { + match self { + Ins { len, .. } => *len, + Del { len, .. } => *len, + } + } +} + impl MergableSpan for RemoteIdSpan { fn can_append(&self, other: &Self) -> bool { self.id.agent == other.id.agent && self.id.seq + self.len == other.id.seq @@ -74,6 +82,8 @@ impl MergableSpan for RemoteIdSpan { } +const ALLOW_REORDERED_DELETES: bool = true; + impl MergableSpan for RemoteCRDTOp { fn can_append(&self, other: &Self) -> bool { // We're assuming the IDs are adjacent. @@ -82,9 +92,10 @@ impl MergableSpan for RemoteCRDTOp { // need to check that other.origin_left == other.id_start - 1. // // We'll just merge deletes. - (Del { id, len }, Del { id: other_id, .. }) => { + (Del { id, len }, Del { id: other_id, len: other_len }) => { id.agent == other_id.agent - && id.seq + len == other_id.seq + && (id.seq + len == other_id.seq + || (ALLOW_REORDERED_DELETES && other_id.seq + other_len == id.seq)) } _ => false, } @@ -95,8 +106,11 @@ impl MergableSpan for RemoteCRDTOp { (Ins { len, .. }, Ins { len: other_len, .. }) => { *len += other_len; }, - (Del { len, .. }, Del { len: other_len, .. }) => { + (Del { id, len }, Del { id: other_id, len: other_len }) => { *len += other_len; + if ALLOW_REORDERED_DELETES { + id.seq = id.seq.min(other_id.seq); + } }, _ => panic!("Cannot append mismatched operations") } @@ -443,16 +457,17 @@ impl ListCRDT { (RemoteCRDTOp::Del { id, len }, len) } else { // It must be an insert. Fish information out of the range tree. - let cursor = self.get_unsafe_cursor_before(order); - let entry = cursor.get_raw_entry(); + let cursor = self.get_cursor_before(order); + let (entry, offset) = cursor.get_item(&self.range_tree); // Limit by #4 - let len = usize::min(entry.len() - cursor.offset, len_remaining); + let len = usize::min(entry.len() - offset, len_remaining); // I'm not fishing out the deleted content at the moment, for any reason. // This might be simpler if I just make up content for deleted items O_o let content_known = if entry.is_activated() { if let Some(ref text) = self.text_content { - let pos = unsafe { cursor.unsafe_count_content_pos() }; + // let pos = unsafe { cursor.unsafe_count_content_pos() }; + let pos = cursor.get_pos(&self.range_tree); // TODO: Could optimize this. let borrow = text.borrow(); let content = borrow.slice_chars(pos..pos+len as usize); @@ -504,6 +519,7 @@ impl ListCRDT { let time_ranges = self.get_time_spans_since::>(&clock); for txn in self.iter_remote_txns(time_ranges.iter()) { dest.apply_remote_txn(&txn); + // self.range_tree.dbg_check(); } } diff --git a/crates/diamond-types-crdt/src/list/markers.rs b/crates/diamond-types-crdt/src/list/markers.rs index 473cf99..ebd9e77 100644 --- a/crates/diamond-types-crdt/src/list/markers.rs +++ b/crates/diamond-types-crdt/src/list/markers.rs @@ -1,125 +1,121 @@ -use std::fmt::Debug; -use std::ptr::NonNull; - -use content_tree::*; -use content_tree::ContentTraits; -use rle::{HasLength, MergableSpan, SplitableSpanHelpers}; -use rle::Searchable; - -use super::{DOC_IE, DOC_LE}; - -// use crate::common::IndexGet; - -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub struct MarkerEntry> { - // This is cleaner as a separate enum and struct, but doing it that way - // bumps it from 16 to 24 bytes per entry because of alignment. - pub len: usize, - pub ptr: Option>>, -} - -impl> HasLength for MarkerEntry { - fn len(&self) -> usize { - self.len - } -} -impl> SplitableSpanHelpers for MarkerEntry { - fn truncate_h(&mut self, at: usize) -> Self { - let remainder_len = self.len - at; - self.len = at; - MarkerEntry { - len: remainder_len, - ptr: self.ptr - } - } - - fn truncate_keeping_right_h(&mut self, at: usize) -> Self { - let left = Self { - len: at as _, - ptr: self.ptr - }; - self.len -= at; - left - } -} -impl> MergableSpan for MarkerEntry { - fn can_append(&self, other: &Self) -> bool { - self.ptr == other.ptr - } - - fn append(&mut self, other: Self) { - self.len += other.len; - } - - fn prepend(&mut self, other: Self) { self.len += other.len; } -} - -// impl, const IE: usize, const LE: usize> IndexGet for MarkerEntry { -// type Output = NonNull>; -// -// fn index_get(&self, _index: usize) -> Self::Output { +// use std::fmt::Debug; +// use std::ptr::NonNull; +// +// use rle::{HasLength, MergableSpan, SplitableSpanHelpers}; +// use rle::Searchable; +// +// // use crate::common::IndexGet; +// +// #[derive(Copy, Clone, Eq, PartialEq, Debug)] +// pub struct MarkerEntry> { +// // This is cleaner as a separate enum and struct, but doing it that way +// // bumps it from 16 to 24 bytes per entry because of alignment. +// pub len: usize, +// pub ptr: Option>>, +// } +// +// impl> HasLength for MarkerEntry { +// fn len(&self) -> usize { +// self.len +// } +// } +// impl> SplitableSpanHelpers for MarkerEntry { +// fn truncate_h(&mut self, at: usize) -> Self { +// let remainder_len = self.len - at; +// self.len = at; +// MarkerEntry { +// len: remainder_len, +// ptr: self.ptr +// } +// } +// +// fn truncate_keeping_right_h(&mut self, at: usize) -> Self { +// let left = Self { +// len: at as _, +// ptr: self.ptr +// }; +// self.len -= at; +// left +// } +// } +// impl> MergableSpan for MarkerEntry { +// fn can_append(&self, other: &Self) -> bool { +// self.ptr == other.ptr +// } +// +// fn append(&mut self, other: Self) { +// self.len += other.len; +// } +// +// fn prepend(&mut self, other: Self) { self.len += other.len; } +// } +// +// // impl, const IE: usize, const LE: usize> IndexGet for MarkerEntry { +// // type Output = NonNull>; +// // +// // fn index_get(&self, _index: usize) -> Self::Output { +// // self.ptr +// // } +// // } +// +// +// +// impl> Default for MarkerEntry { +// fn default() -> Self { +// MarkerEntry {ptr: None, len: 0} +// } +// } +// +// +// impl> MarkerEntry { +// pub fn unwrap_ptr(&self) -> NonNull> { +// self.ptr.unwrap() +// } +// } +// +// impl> Searchable for MarkerEntry { +// type Item = Option>>; +// +// fn get_offset(&self, _loc: Self::Item) -> Option { +// panic!("Should never be used") +// } +// +// fn at_offset(&self, _offset: usize) -> Self::Item { // self.ptr // } // } - - - -impl> Default for MarkerEntry { - fn default() -> Self { - MarkerEntry {ptr: None, len: 0} - } -} - - -impl> MarkerEntry { - pub fn unwrap_ptr(&self) -> NonNull> { - self.ptr.unwrap() - } -} - -impl> Searchable for MarkerEntry { - type Item = Option>>; - - fn get_offset(&self, _loc: Self::Item) -> Option { - panic!("Should never be used") - } - - fn at_offset(&self, _offset: usize) -> Self::Item { - self.ptr - } -} - -#[cfg(test)] -mod tests { - use std::ptr::NonNull; - - use crate::list::LV; - - #[test] - fn test_sizes() { - #[derive(Copy, Clone, Eq, PartialEq, Debug)] - pub enum MarkerOp { - Ins(NonNull), - Del(LV), - } - - #[derive(Copy, Clone, Eq, PartialEq, Debug)] - pub struct MarkerEntry1 { - // The order / seq is implicit from the location in the list. - pub len: usize, - pub op: MarkerOp - } - - dbg!(std::mem::size_of::()); - - #[derive(Copy, Clone, Eq, PartialEq, Debug)] - pub enum MarkerEntry2 { - Ins(usize, NonNull), - Del(usize, LV, bool), - } - dbg!(std::mem::size_of::()); - - pub type MarkerEntry3 = (u64, Option>); - dbg!(std::mem::size_of::()); - } -} \ No newline at end of file +// +// #[cfg(test)] +// mod tests { +// use std::ptr::NonNull; +// +// use crate::list::LV; +// +// #[test] +// fn test_sizes() { +// #[derive(Copy, Clone, Eq, PartialEq, Debug)] +// pub enum MarkerOp { +// Ins(NonNull), +// Del(LV), +// } +// +// #[derive(Copy, Clone, Eq, PartialEq, Debug)] +// pub struct MarkerEntry1 { +// // The order / seq is implicit from the location in the list. +// pub len: usize, +// pub op: MarkerOp +// } +// +// dbg!(std::mem::size_of::()); +// +// #[derive(Copy, Clone, Eq, PartialEq, Debug)] +// pub enum MarkerEntry2 { +// Ins(usize, NonNull), +// Del(usize, LV, bool), +// } +// dbg!(std::mem::size_of::()); +// +// pub type MarkerEntry3 = (u64, Option>); +// dbg!(std::mem::size_of::()); +// } +// } \ No newline at end of file diff --git a/crates/diamond-types-crdt/src/list/merge_positional.rs b/crates/diamond-types-crdt/src/list/merge_positional.rs index 271aec2..7d33c38 100644 --- a/crates/diamond-types-crdt/src/list/merge_positional.rs +++ b/crates/diamond-types-crdt/src/list/merge_positional.rs @@ -1,264 +1,264 @@ -use diamond_core_old::{AgentId, CRDTId}; -use crate::list::{InsDelTag, ListCRDT, ROOT_LV, LV}; -use crate::list::branch::branch_eq; -use crate::list::positional::PositionalOpRef; -use crate::list::span::YjsSpan; -use crate::list::time::positionmap::PositionMap; -use crate::unicount::consume_chars; -use InsDelTag::*; -use crate::list::external_txn::RemoteId; - -impl ListCRDT { - pub fn apply_patch_at_version(&mut self, agent: AgentId, op: PositionalOpRef, branch: &[LV]) { - if branch_eq(branch, self.frontier.as_slice()) { - self.apply_local_txn(agent, op); - } else { - let mut map = PositionMap::new_at_version(self, branch); - self.apply_patch_at_map(&mut map, agent, op, branch); - } - } - - pub fn apply_remote_patch_at_version(&mut self, id: &RemoteId, parents: &[RemoteId], op: PositionalOpRef) { - let agent = self.get_or_create_agent_id(id.agent.as_str()); - let client = &self.client_data[agent as usize]; - let next_seq = client.get_next_seq(); - // If the seq does not match we either need to skip or buffer the transaction. - assert_eq!(next_seq, id.seq, "Sequence numbers are not linear"); - - let parents = self.remote_ids_to_branch(&parents); - self.apply_patch_at_version(agent, op, parents.as_slice()); - } - - pub(crate) fn apply_patch_at_map(&mut self, map: &mut PositionMap, agent: AgentId, mut op: PositionalOpRef, branch: &[LV]) { - // local_ops: &[PositionalComponent], mut content: &str - // TODO: Merge this with apply_local_txn - let first_time = self.get_next_lv(); - let mut next_time = first_time; - let txn_len = op.components.iter().map(|c| c.len).sum::(); - - self.assign_lv_to_client(CRDTId { - agent, - seq: self.client_data[agent as usize].get_next_seq() - }, first_time, txn_len); - - // for LocalOp { pos, ins_content, del_span } in local_ops { - for c in op.components { - let orig_pos = c.pos; - let len = c.len; - - match c.tag { - Ins => { - // First we need the insert's base order - let order = next_time; - next_time += c.len; - - // Find the preceding item and successor - let (origin_left, cursor) = if orig_pos == 0 { - (ROOT_LV, self.range_tree.cursor_at_start()) - } else { - let mut cursor = map.list_cursor_at_content_pos(self, orig_pos - 1).0; - let origin_left = cursor.get_item().unwrap(); - assert!(cursor.next_item()); - (origin_left, cursor) - }; - - // The origin right is interesting. We need to end up after - // let origin_right = map.order_at_content_pos(self, orig_pos, true); - let origin_right = map.right_origin_at(self, orig_pos); - // dbg!((origin_left, origin_right)); - // let origin_right = if orig_pos == map.content_len() { - // ROOT_TIME - // } else { - // // stick_end: false here matches the current semantics where we still use - // // deleted items as the origin_right. - // map.order_at_content_pos(self, orig_pos, true) - // }; - - let item = YjsSpan { - lv: order, - origin_left, - origin_right, - len: len as isize - }; - // dbg!(item); - - let ins_content = if c.content_known { - Some(consume_chars(&mut op.content, len)) - } else { None }; - - // This is dirty. The cursor here implicitly references self. Using cursor.inner - // breaks the borrow checking rules. - let raw_pos = cursor.count_offset_pos(); - - let inner_cursor = cursor.inner; - self.integrate(agent, item, ins_content, Some(inner_cursor)); - // self.integrate(agent, item, ins_content, None); - - // dbg!(&map); - map.update_from_insert(raw_pos, len); - // dbg!(&map); - } - - Del => { - // We need to loop here because the deleted span might have been broken up by - // subsequent inserts. We also need to mark double_deletes when they happen. - - // TODO: remaining_len, len, len_here - Gross. - let mut remaining_len = len; - while remaining_len > 0 { - // self.debug_print_segments(); - let (cursor, mut len) = map.list_cursor_at_content_pos(self, orig_pos); - len = len.min(remaining_len); - debug_assert!(len > 0); - // remaining_len -= len; - - // dbg!(len); - - let mut unsafe_cursor = cursor.inner; - - // unsafe_cursor.roll_to_next_entry(); - // debug_assert!(unsafe_cursor.get_raw_entry().is_activated()); - - // dbg!(unsafe_cursor.get_raw_entry()); - - // let target = unsafe { unsafe_cursor.get_item().unwrap() }; - let len_here = self.internal_mark_deleted_at(&mut unsafe_cursor, next_time, len as _, true); - - // This is wild, but we don't actually care if the delete succeeded. If - // the delete didn't succeed, its because the item was already deleted - // in the main (current) branch. But at this point in time the item - // isn't (can't) have been deleted. So the map will just be modified - // from Inserted -> Upstream. - // dbg!(&map, len_here, orig_pos); - map.update_from_delete(orig_pos, len_here as _); - // dbg!(&map); - - // len -= len_here as usize; - next_time += len_here; - // The cursor has been updated already by internal_mark_deleted_at. - - // We don't need to modify orig_pos because the position will be - // unchanged. - - remaining_len -= len_here as usize; - } - } - } - } - - // self.insert_txn_local(first_order..next_order); - self.insert_txn_full(branch, first_time..next_time); - debug_assert_eq!(next_time, self.get_next_lv()); - } -} - -#[cfg(test)] -mod tests { - use crate::list::PositionalComponent; - use super::*; - - #[test] - fn insert_with_patch_1() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("a"); // 0 - doc.get_or_create_agent_id("b"); // 1 - - doc.local_insert(0, 0, "aaa"); - doc.local_insert(0, 0, "A"); - - doc.apply_patch_at_version(1, PositionalOpRef { - components: &[PositionalComponent { - pos: 1, len: 1, content_known: true, tag: InsDelTag::Ins - }], - content: "b" - }, &[1]); // when the document had "aa" - - // doc.apply_patch_at_version(0, &[PositionalComponent { - // pos: 0, len: 1, content_known: true, tag: InsDelTag::Ins - // }], "a", &[ROOT_ORDER]); - // doc.apply_patch_at_version(1, &[PositionalComponent { - // pos: 0, len: 1, content_known: true, tag: InsDelTag::Ins - // }], "b", &[ROOT_ORDER]); - - if let Some(text) = doc.text_content.as_ref() { - assert_eq!(text, "Aabaa"); - } - doc.check(true); - - // dbg!(&doc); - } - - #[test] - fn del_with_patch_1() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("a"); // 0 - doc.get_or_create_agent_id("b"); // 1 - - doc.local_insert(0, 0, "abc"); - doc.local_insert(0, 0, "A"); - - doc.apply_patch_at_version(1, PositionalOpRef { - components: &[PositionalComponent { - pos: 1, len: 1, content_known: false, tag: InsDelTag::Del - }], - content: "" - }, &[1]); // when the document had "aa" - - if let Some(text) = doc.text_content.as_ref() { - assert_eq!(text, "Aac"); - } - doc.check(true); - - // dbg!(&doc); - } - - #[test] - fn del_with_patch_extended() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("a"); // 0 - doc.get_or_create_agent_id("b"); // 1 - - doc.local_insert(0, 0, "abc"); - doc.local_insert(0, 2, "x"); // abxc - - doc.apply_patch_at_version(1, PositionalOpRef { - components: &[PositionalComponent { - pos: 1, len: 2, content_known: false, tag: InsDelTag::Del - }], - content: "" - }, &[2]); - - if let Some(text) = doc.text_content.as_ref() { - assert_eq!(text, "ax"); - } - doc.check(true); - - // dbg!(&doc); - } - - #[test] - fn patch_double_delete() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("a"); // 0 - doc.get_or_create_agent_id("b"); // 1 - - doc.local_insert(0, 0, "abc"); - doc.local_delete(0, 1, 1); // ac - doc.local_insert(0, 0, "X"); // Xac - - doc.apply_patch_at_version(1, PositionalOpRef { - components: &[PositionalComponent { - pos: 1, len: 2, content_known: false, tag: InsDelTag::Del - }], - content: "" - }, &[2]); // Xa - - - if let Some(text) = doc.text_content.as_ref() { - assert_eq!(text, "Xa"); - } - doc.check(true); - - // dbg!(&doc); - } -} \ No newline at end of file +// use diamond_core_old::{AgentId, CRDTId}; +// use crate::list::{InsDelTag, ListCRDT, ROOT_LV, LV}; +// use crate::list::branch::branch_eq; +// use crate::list::positional::PositionalOpRef; +// use crate::list::span::YjsSpan; +// // use crate::list::time::positionmap::PositionMap; +// use crate::unicount::consume_chars; +// use InsDelTag::*; +// use crate::list::external_txn::RemoteId; +// +// impl ListCRDT { +// pub fn apply_patch_at_version(&mut self, agent: AgentId, op: PositionalOpRef, branch: &[LV]) { +// if branch_eq(branch, self.frontier.as_slice()) { +// self.apply_local_txn(agent, op); +// } else { +// let mut map = PositionMap::new_at_version(self, branch); +// self.apply_patch_at_map(&mut map, agent, op, branch); +// } +// } +// +// pub fn apply_remote_patch_at_version(&mut self, id: &RemoteId, parents: &[RemoteId], op: PositionalOpRef) { +// let agent = self.get_or_create_agent_id(id.agent.as_str()); +// let client = &self.client_data[agent as usize]; +// let next_seq = client.get_next_seq(); +// // If the seq does not match we either need to skip or buffer the transaction. +// assert_eq!(next_seq, id.seq, "Sequence numbers are not linear"); +// +// let parents = self.remote_ids_to_branch(&parents); +// self.apply_patch_at_version(agent, op, parents.as_slice()); +// } +// +// pub(crate) fn apply_patch_at_map(&mut self, map: &mut PositionMap, agent: AgentId, mut op: PositionalOpRef, branch: &[LV]) { +// // local_ops: &[PositionalComponent], mut content: &str +// // TODO: Merge this with apply_local_txn +// let first_time = self.get_next_lv(); +// let mut next_time = first_time; +// let txn_len = op.components.iter().map(|c| c.len).sum::(); +// +// self.assign_lv_to_client(CRDTId { +// agent, +// seq: self.client_data[agent as usize].get_next_seq() +// }, first_time, txn_len); +// +// // for LocalOp { pos, ins_content, del_span } in local_ops { +// for c in op.components { +// let orig_pos = c.pos; +// let len = c.len; +// +// match c.tag { +// Ins => { +// // First we need the insert's base order +// let order = next_time; +// next_time += c.len; +// +// // Find the preceding item and successor +// let (origin_left, cursor) = if orig_pos == 0 { +// (ROOT_LV, self.range_tree.cursor_at_start()) +// } else { +// let mut cursor = map.list_cursor_at_content_pos(self, orig_pos - 1).0; +// let origin_left = cursor.get_item().unwrap(); +// assert!(cursor.next_item()); +// (origin_left, cursor) +// }; +// +// // The origin right is interesting. We need to end up after +// // let origin_right = map.order_at_content_pos(self, orig_pos, true); +// let origin_right = map.right_origin_at(self, orig_pos); +// // dbg!((origin_left, origin_right)); +// // let origin_right = if orig_pos == map.content_len() { +// // ROOT_TIME +// // } else { +// // // stick_end: false here matches the current semantics where we still use +// // // deleted items as the origin_right. +// // map.order_at_content_pos(self, orig_pos, true) +// // }; +// +// let item = YjsSpan { +// lv: order, +// origin_left, +// origin_right, +// len: len as isize +// }; +// // dbg!(item); +// +// let ins_content = if c.content_known { +// Some(consume_chars(&mut op.content, len)) +// } else { None }; +// +// // This is dirty. The cursor here implicitly references self. Using cursor.inner +// // breaks the borrow checking rules. +// let raw_pos = cursor.count_offset_pos(); +// +// let inner_cursor = cursor.inner; +// self.integrate(agent, item, ins_content, Some(inner_cursor)); +// // self.integrate(agent, item, ins_content, None); +// +// // dbg!(&map); +// map.update_from_insert(raw_pos, len); +// // dbg!(&map); +// } +// +// Del => { +// // We need to loop here because the deleted span might have been broken up by +// // subsequent inserts. We also need to mark double_deletes when they happen. +// +// // TODO: remaining_len, len, len_here - Gross. +// let mut remaining_len = len; +// while remaining_len > 0 { +// // self.debug_print_segments(); +// let (cursor, mut len) = map.list_cursor_at_content_pos(self, orig_pos); +// len = len.min(remaining_len); +// debug_assert!(len > 0); +// // remaining_len -= len; +// +// // dbg!(len); +// +// let mut unsafe_cursor = cursor.inner; +// +// // unsafe_cursor.roll_to_next_entry(); +// // debug_assert!(unsafe_cursor.get_raw_entry().is_activated()); +// +// // dbg!(unsafe_cursor.get_raw_entry()); +// +// // let target = unsafe { unsafe_cursor.get_item().unwrap() }; +// let len_here = self.internal_mark_deleted_at(&mut unsafe_cursor, next_time, len as _, true); +// +// // This is wild, but we don't actually care if the delete succeeded. If +// // the delete didn't succeed, its because the item was already deleted +// // in the main (current) branch. But at this point in time the item +// // isn't (can't) have been deleted. So the map will just be modified +// // from Inserted -> Upstream. +// // dbg!(&map, len_here, orig_pos); +// map.update_from_delete(orig_pos, len_here as _); +// // dbg!(&map); +// +// // len -= len_here as usize; +// next_time += len_here; +// // The cursor has been updated already by internal_mark_deleted_at. +// +// // We don't need to modify orig_pos because the position will be +// // unchanged. +// +// remaining_len -= len_here as usize; +// } +// } +// } +// } +// +// // self.insert_txn_local(first_order..next_order); +// self.insert_txn_full(branch, first_time..next_time); +// debug_assert_eq!(next_time, self.get_next_lv()); +// } +// } +// +// #[cfg(test)] +// mod tests { +// use crate::list::PositionalComponent; +// use super::*; +// +// #[test] +// fn insert_with_patch_1() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("a"); // 0 +// doc.get_or_create_agent_id("b"); // 1 +// +// doc.local_insert(0, 0, "aaa"); +// doc.local_insert(0, 0, "A"); +// +// doc.apply_patch_at_version(1, PositionalOpRef { +// components: &[PositionalComponent { +// pos: 1, len: 1, content_known: true, tag: InsDelTag::Ins +// }], +// content: "b" +// }, &[1]); // when the document had "aa" +// +// // doc.apply_patch_at_version(0, &[PositionalComponent { +// // pos: 0, len: 1, content_known: true, tag: InsDelTag::Ins +// // }], "a", &[ROOT_ORDER]); +// // doc.apply_patch_at_version(1, &[PositionalComponent { +// // pos: 0, len: 1, content_known: true, tag: InsDelTag::Ins +// // }], "b", &[ROOT_ORDER]); +// +// if let Some(text) = doc.text_content.as_ref() { +// assert_eq!(text, "Aabaa"); +// } +// doc.check(true); +// +// // dbg!(&doc); +// } +// +// #[test] +// fn del_with_patch_1() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("a"); // 0 +// doc.get_or_create_agent_id("b"); // 1 +// +// doc.local_insert(0, 0, "abc"); +// doc.local_insert(0, 0, "A"); +// +// doc.apply_patch_at_version(1, PositionalOpRef { +// components: &[PositionalComponent { +// pos: 1, len: 1, content_known: false, tag: InsDelTag::Del +// }], +// content: "" +// }, &[1]); // when the document had "aa" +// +// if let Some(text) = doc.text_content.as_ref() { +// assert_eq!(text, "Aac"); +// } +// doc.check(true); +// +// // dbg!(&doc); +// } +// +// #[test] +// fn del_with_patch_extended() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("a"); // 0 +// doc.get_or_create_agent_id("b"); // 1 +// +// doc.local_insert(0, 0, "abc"); +// doc.local_insert(0, 2, "x"); // abxc +// +// doc.apply_patch_at_version(1, PositionalOpRef { +// components: &[PositionalComponent { +// pos: 1, len: 2, content_known: false, tag: InsDelTag::Del +// }], +// content: "" +// }, &[2]); +// +// if let Some(text) = doc.text_content.as_ref() { +// assert_eq!(text, "ax"); +// } +// doc.check(true); +// +// // dbg!(&doc); +// } +// +// #[test] +// fn patch_double_delete() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("a"); // 0 +// doc.get_or_create_agent_id("b"); // 1 +// +// doc.local_insert(0, 0, "abc"); +// doc.local_delete(0, 1, 1); // ac +// doc.local_insert(0, 0, "X"); // Xac +// +// doc.apply_patch_at_version(1, PositionalOpRef { +// components: &[PositionalComponent { +// pos: 1, len: 2, content_known: false, tag: InsDelTag::Del +// }], +// content: "" +// }, &[2]); // Xa +// +// +// if let Some(text) = doc.text_content.as_ref() { +// assert_eq!(text, "Xa"); +// } +// doc.check(true); +// +// // dbg!(&doc); +// } +// } \ No newline at end of file diff --git a/crates/diamond-types-crdt/src/list/mod.rs b/crates/diamond-types-crdt/src/list/mod.rs index b30de4e..007e8f4 100644 --- a/crates/diamond-types-crdt/src/list/mod.rs +++ b/crates/diamond-types-crdt/src/list/mod.rs @@ -1,11 +1,7 @@ -use std::pin::Pin; -use std::ptr::NonNull; - use jumprope::JumpRopeBuf; use smallvec::SmallVec; use smartstring::alias::String as SmartString; -use content_tree::*; use diamond_core_old::AgentId; pub use ot::traversal::TraversalComponent; pub use positional::{InsDelTag, PositionalComponent, PositionalOp}; @@ -13,10 +9,11 @@ pub use positional::{InsDelTag, PositionalComponent, PositionalOp}; use crate::common::ClientName; use crate::crdtspan::CRDTSpan; use crate::list::double_delete::DoubleDelete; -use crate::list::index_tree::{IndexContent, IndexTree}; +use crate::ost::{IndexContent, IndexTree, LeafIdx}; use crate::list::span::YjsSpan; use crate::list::txn::TxnSpan; use crate::order::TimeSpan; +use crate::ost::content_tree::ContentTree; // use crate::list::delete::DeleteEntry; use crate::rle::{KVPair, RleVec}; @@ -35,9 +32,11 @@ pub mod time; pub mod positional; mod merge_positional; -#[cfg(test)] -mod positional_fuzzer; -mod index_tree; +pub(crate) mod stats; + +// #[cfg(test)] +// mod positional_fuzzer; +// mod index_tree; // #[cfg(inlinerope)] // pub const USE_INNER_ROPE: bool = true; @@ -65,30 +64,43 @@ struct ClientData { item_localtime: RleVec>, } -pub(crate) const INDEX_IE: usize = DEFAULT_IE; -pub(crate) const INDEX_LE: usize = DEFAULT_LE; - -pub(crate) const DOC_IE: usize = DEFAULT_IE; -pub(crate) const DOC_LE: usize = DEFAULT_LE; +// pub(crate) const INDEX_IE: usize = DEFAULT_IE; +// pub(crate) const INDEX_LE: usize = DEFAULT_LE; +// +// pub(crate) const DOC_IE: usize = DEFAULT_IE; +// pub(crate) const DOC_LE: usize = DEFAULT_LE; // const DOC_LE: usize = 32; -// type DocRangeIndex = ContentIndex; -// type DocRangeIndex = FullMetricsU32; -type DocRangeIndex = FullMetricsUsize; - -pub(crate) type RangeTree = Pin>>; -// pub(crate) type RangeTreeLeaf = NodeLeaf; - -// type SpaceIndex = Pin, RawPositionMetricsUsize>>>; - -#[derive(Copy, Clone, Debug)] -struct Marker>(Option>>); - -impl> Default for Marker { - fn default() -> Self { Self(None) } -} -impl> IndexContent for Marker { - fn try_append(&mut self, _offset: usize, other: &Self, _other_len: usize) -> bool { +// type DocRangeIndex = FullMetricsUsize; + +// pub(crate) type OldRangeTree = Pin>>; +pub(crate) type RangeTree = ContentTree; + +// #[derive(Copy, Clone, Debug)] +// struct Marker>(Option>>); +// +// impl> Default for Marker { +// fn default() -> Self { Self(None) } +// } +// impl> IndexContent for Marker { +// fn try_append(&mut self, _offset: usize, other: &Self, _other_len: usize) -> bool { +// self.0 == other.0 +// } +// +// fn at_offset(&self, _offset: usize) -> Self { +// *self +// } +// +// fn eq(&self, other: &Self, _upto_len: usize) -> bool { +// self.0 == other.0 +// } +// } + +#[derive(Copy, Clone, Debug, Default)] +struct Marker(LeafIdx); + +impl IndexContent for Marker { + fn try_append(&mut self, offset: usize, other: &Self, other_len: usize) -> bool { self.0 == other.0 } @@ -101,8 +113,7 @@ impl> IndexContent for Marker { } } - -type SpaceIndex = IndexTree>; +type SpaceIndex = IndexTree; pub type DoubleDeleteList = RleVec>; @@ -158,7 +169,7 @@ pub struct ListCRDT { txns: RleVec, /// The document state. - text_content: Option, + pub text_content: Option, /// This is a big ol' string containing everything that's been deleted (self.deletes) in order. deleted_content: Option, } diff --git a/crates/diamond-types-crdt/src/list/ot/positionmap.rs b/crates/diamond-types-crdt/src/list/ot/positionmap.rs index 016feaa..6fa18e6 100644 --- a/crates/diamond-types-crdt/src/list/ot/positionmap.rs +++ b/crates/diamond-types-crdt/src/list/ot/positionmap.rs @@ -2,21 +2,20 @@ // few files! use std::pin::Pin; -use jumprope::JumpRopeBuf; +use jumprope::JumpRopeBuf; use smallvec::SmallVec; use content_tree::*; use diamond_core_old::CRDTId; use rle::AppendRle; -use TraversalComponent::*; use crate::crdtspan::CRDTSpan; use crate::list::{DoubleDeleteList, ListCRDT, LV}; use crate::list::double_delete::DoubleDelete; use crate::list::external_txn::RemoteIdSpan; -use crate::list::positional::{InsDelTag, PositionalComponent, PositionalOp}; use crate::list::ot::traversal::{TraversalComponent, TraversalOp, TraversalOpSequence}; +use crate::list::positional::{PositionalComponent, PositionalOp}; use crate::order::TimeSpan; use crate::rle::{KVPair, RleKey, RleSpanHelpers}; @@ -229,142 +228,144 @@ impl<'a> Iterator for PatchIter<'a> { type Item = (usize, PositionalComponent); fn next(&mut self) -> Option<(usize, PositionalComponent)> { - // We go back through history in reverse order. We need to go in reverse order for a few - // reasons: + // // We go back through history in reverse order. We need to go in reverse order for a few + // // reasons: + // // + // // - Because of duplicate deletes. If an item has been deleted multiple times, we only want + // // to visit it the "first" time chronologically based on the OrderSpan passed in here. + // // - We need to generate the position map anyway. I + // // it for deletion the *first* time it was deleted chronologically according to span. + // // Another approach would be to store in double_deletes the order of the first delete for + // // each entry, but at some point we might want to generate this map from a different time + // // order. This approach uses less memory and generalizes better, at the expense of more + // // complex code. + // while self.span.len > 0 { + // // So instead of searching for span.offset, we start with span.offset + span.len - 1. + // let span_last_order = self.span.end() - 1; // - // - Because of duplicate deletes. If an item has been deleted multiple times, we only want - // to visit it the "first" time chronologically based on the OrderSpan passed in here. - // - We need to generate the position map anyway. I - // it for deletion the *first* time it was deleted chronologically according to span. - // Another approach would be to store in double_deletes the order of the first delete for - // each entry, but at some point we might want to generate this map from a different time - // order. This approach uses less memory and generalizes better, at the expense of more - // complex code. - while self.span.len > 0 { - // So instead of searching for span.offset, we start with span.offset + span.len - 1. - let span_last_order = self.span.end() - 1; - - // First check if the change was a delete or an insert. - if let Ok(d) = self.doc.deletes.search_scanning_backwards_sparse(span_last_order, &mut self.deletes_idx) { - // Its a delete. We need to try to undelete the item, unless the item was deleted - // multiple times (in which case, it stays deleted for now). - let base = usize::max(self.span.start, d.0); - let del_span_size = span_last_order + 1 - base; // TODO: Clean me up - debug_assert!(del_span_size > 0); - - // d_offset -= span_last_order - base; // equivalent to d_offset -= undelete_here - 1; - - // Ok, undelete here. An earlier version of this code iterated *forwards* amongst - // the deleted span. This worked correctly and was slightly simpler, but it was a - // confusing API to use and test because delete changes in particular were sometimes - // arbitrarily reordered. - let last_del_target = d.1.start + (span_last_order - d.0); - - // I'm also going to limit what we visit each iteration by the size of the visited - // item in the range tree. For performance I could hold off looking this up until - // we've got the go ahead from marked_deletes, but given how rare double deletes - // are, this is fine. - - let rt_cursor = self.doc.get_unsafe_cursor_after(last_del_target, true); - // Cap the number of items to undelete each iteration based on the span in content_tree. - let entry = rt_cursor.get_raw_entry(); - debug_assert!(entry.is_deactivated()); - let first_del_target = usize::max(entry.lv, last_del_target + 1 - del_span_size); - - let (allowed, first_del_target) = self.marked_deletes.mark_range(&self.doc.double_deletes, last_del_target, first_del_target); - let len_here = last_del_target + 1 - first_del_target; - // println!("Delete from {} to {}", first_del_target, last_del_target); - self.span.len -= len_here; - - if allowed { - // let len_here = len_here.min((-entry.len) as u32 - rt_cursor.offset as u32); - let post_pos = unsafe { rt_cursor.unsafe_count_content_pos() }; - let mut map_cursor = positionmap_mut_cursor_at_post(&mut self.map, post_pos as _, true); - // We call insert instead of replace_range here because the delete doesn't - // consume "space". - - let pre_pos = count_cursor_pre_len(&map_cursor); - map_cursor.insert(Del(len_here)); - - // The content might have later been deleted. - let entry = PositionalComponent { - pos: pre_pos, - len: len_here, - content_known: false, - tag: InsDelTag::Del, - }; - return Some((post_pos, entry)); - } // else continue. - } else { - // println!("Insert at {:?} (last order: {})", span, span_last_order); - // The operation was an insert operation, not a delete operation. - let mut rt_cursor = self.doc.get_unsafe_cursor_after(span_last_order, true); - - // Check how much we can tag in one go. - let len_here = usize::min(self.span.len, rt_cursor.offset); // usize? u32? blehh - debug_assert_ne!(len_here, 0); - // let base = span_last_order + 1 - len_here; // not needed. - // let base = u32::max(span.order, span_last_order + 1 - cursor.offset); - // dbg!(&cursor, len_here); - rt_cursor.offset -= len_here as usize; - - // Where in the final document are we? - let post_pos = unsafe { rt_cursor.unsafe_count_content_pos() }; - - // So this is also dirty. We need to skip any deletes, which have a size of 0. - let content_known = rt_cursor.get_raw_entry().is_activated(); - - - // There's two cases here. Either we're inserting something fresh, or we're - // cancelling out a delete we found earlier. - let entry = if content_known { - // post_pos + 1 is a hack. cursor_at_offset_pos returns the first cursor - // location which has the right position. - let mut map_cursor = positionmap_mut_cursor_at_post(&mut self.map, post_pos + 1, true); - map_cursor.inner.offset -= 1; - let pre_pos = count_cursor_pre_len(&map_cursor); - map_cursor.replace_range(Ins { len: len_here, content_known }); - PositionalComponent { - pos: pre_pos, - len: len_here, - content_known: true, - tag: InsDelTag::Ins - } - } else { - let mut map_cursor = positionmap_mut_cursor_at_post(&mut self.map, post_pos, true); - map_cursor.inner.roll_to_next_entry(); - map_cursor.delete(len_here as usize); - PositionalComponent { - pos: count_cursor_pre_len(&map_cursor), - len: len_here, - content_known: false, - tag: InsDelTag::Ins - } - }; - - // The content might have later been deleted. - - self.span.len -= len_here; - return Some((post_pos, entry)); - } - } - None + // // First check if the change was a delete or an insert. + // if let Ok(d) = self.doc.deletes.search_scanning_backwards_sparse(span_last_order, &mut self.deletes_idx) { + // // Its a delete. We need to try to undelete the item, unless the item was deleted + // // multiple times (in which case, it stays deleted for now). + // let base = usize::max(self.span.start, d.0); + // let del_span_size = span_last_order + 1 - base; // TODO: Clean me up + // debug_assert!(del_span_size > 0); + // + // // d_offset -= span_last_order - base; // equivalent to d_offset -= undelete_here - 1; + // + // // Ok, undelete here. An earlier version of this code iterated *forwards* amongst + // // the deleted span. This worked correctly and was slightly simpler, but it was a + // // confusing API to use and test because delete changes in particular were sometimes + // // arbitrarily reordered. + // let last_del_target = d.1.start + (span_last_order - d.0); + // + // // I'm also going to limit what we visit each iteration by the size of the visited + // // item in the range tree. For performance I could hold off looking this up until + // // we've got the go ahead from marked_deletes, but given how rare double deletes + // // are, this is fine. + // + // let rt_cursor = self.doc.get_unsafe_cursor_after(last_del_target, true); + // // Cap the number of items to undelete each iteration based on the span in content_tree. + // let entry = rt_cursor.get_raw_entry(); + // debug_assert!(entry.is_deactivated()); + // let first_del_target = usize::max(entry.lv, last_del_target + 1 - del_span_size); + // + // let (allowed, first_del_target) = self.marked_deletes.mark_range(&self.doc.double_deletes, last_del_target, first_del_target); + // let len_here = last_del_target + 1 - first_del_target; + // // println!("Delete from {} to {}", first_del_target, last_del_target); + // self.span.len -= len_here; + // + // if allowed { + // // let len_here = len_here.min((-entry.len) as u32 - rt_cursor.offset as u32); + // let post_pos = unsafe { rt_cursor.unsafe_count_content_pos() }; + // let mut map_cursor = positionmap_mut_cursor_at_post(&mut self.map, post_pos as _, true); + // // We call insert instead of replace_range here because the delete doesn't + // // consume "space". + // + // let pre_pos = count_cursor_pre_len(&map_cursor); + // map_cursor.insert(Del(len_here)); + // + // // The content might have later been deleted. + // let entry = PositionalComponent { + // pos: pre_pos, + // len: len_here, + // content_known: false, + // tag: InsDelTag::Del, + // }; + // return Some((post_pos, entry)); + // } // else continue. + // } else { + // // println!("Insert at {:?} (last order: {})", span, span_last_order); + // // The operation was an insert operation, not a delete operation. + // let mut rt_cursor = self.doc.get_unsafe_cursor_after(span_last_order, true); + // + // // Check how much we can tag in one go. + // let len_here = usize::min(self.span.len, rt_cursor.offset); // usize? u32? blehh + // debug_assert_ne!(len_here, 0); + // // let base = span_last_order + 1 - len_here; // not needed. + // // let base = u32::max(span.order, span_last_order + 1 - cursor.offset); + // // dbg!(&cursor, len_here); + // rt_cursor.offset -= len_here as usize; + // + // // Where in the final document are we? + // let post_pos = unsafe { rt_cursor.unsafe_count_content_pos() }; + // + // // So this is also dirty. We need to skip any deletes, which have a size of 0. + // let content_known = rt_cursor.get_raw_entry().is_activated(); + // + // + // // There's two cases here. Either we're inserting something fresh, or we're + // // cancelling out a delete we found earlier. + // let entry = if content_known { + // // post_pos + 1 is a hack. cursor_at_offset_pos returns the first cursor + // // location which has the right position. + // let mut map_cursor = positionmap_mut_cursor_at_post(&mut self.map, post_pos + 1, true); + // map_cursor.inner.offset -= 1; + // let pre_pos = count_cursor_pre_len(&map_cursor); + // map_cursor.replace_range(Ins { len: len_here, content_known }); + // PositionalComponent { + // pos: pre_pos, + // len: len_here, + // content_known: true, + // tag: InsDelTag::Ins + // } + // } else { + // let mut map_cursor = positionmap_mut_cursor_at_post(&mut self.map, post_pos, true); + // map_cursor.inner.roll_to_next_entry(); + // map_cursor.delete(len_here as usize); + // PositionalComponent { + // pos: count_cursor_pre_len(&map_cursor), + // len: len_here, + // content_known: false, + // tag: InsDelTag::Ins + // } + // }; + // + // // The content might have later been deleted. + // + // self.span.len -= len_here; + // return Some((post_pos, entry)); + // } + // } + // None + todo!() } } impl<'a> PatchIter<'a> { // TODO: Consider swapping these two new() functions around as new_since_order is more useful. fn new(doc: &'a ListCRDT, span: TimeSpan) -> Self { - let mut iter = PatchIter { - doc, - span, - map: ContentTreeRaw::new(), - deletes_idx: doc.deletes.len().wrapping_sub(1), - marked_deletes: DoubleDeleteVisitor::new(), - }; - iter.map.insert_at_start(Retain(doc.range_tree.content_len() as _)); - - iter + // let mut iter = PatchIter { + // doc, + // span, + // map: ContentTreeRaw::new(), + // deletes_idx: doc.deletes.len().wrapping_sub(1), + // marked_deletes: DoubleDeleteVisitor::new(), + // }; + // iter.map.insert_at_start(Retain(doc.range_tree.content_len() as _)); + // + // iter + todo!() } fn new_since_order(doc: &'a ListCRDT, base_order: LV) -> Self { @@ -568,9 +569,10 @@ mod test { use rle::AppendRle; use crate::list::{ListCRDT, ROOT_LV}; - use crate::list::positional::*; use crate::list::ot::positionmap::*; use crate::list::ot::traversal::*; + use crate::list::positional::*; + use crate::list::TraversalComponent::*; use crate::test_helpers::make_random_change; // use crate::list::external_txn::{RemoteTxn, RemoteId}; diff --git a/crates/diamond-types-crdt/src/list/span.rs b/crates/diamond-types-crdt/src/list/span.rs index 8d4e698..154c520 100644 --- a/crates/diamond-types-crdt/src/list/span.rs +++ b/crates/diamond-types-crdt/src/list/span.rs @@ -1,12 +1,11 @@ use std::fmt::{Debug, DebugStruct, Formatter}; use rle::{HasLength, MergableSpan, Searchable, SplitableSpanHelpers}; -use content_tree::ContentLength; -use content_tree::Toggleable; use crate::list::{ROOT_LV, LV}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::ost::content_tree::Content; /// This is exposed for diamond-wasm's vis output. The internal fields here should not be considered /// part of the public API and are not to be relied on. @@ -48,6 +47,10 @@ impl YjsSpan { pub fn contains(&self, time: LV) -> bool { self.lv <= time && time < self.lv + self.len.abs() as LV } + + pub fn is_activated(&self) -> bool { + self.len > 0 + } } impl HasLength for YjsSpan { @@ -115,37 +118,54 @@ impl Searchable for YjsSpan { } } -impl ContentLength for YjsSpan { - #[inline(always)] +// impl ContentLength for YjsSpan { +// #[inline(always)] +// fn content_len(&self) -> usize { +// self.len.max(0) as usize +// } +// +// fn content_len_at_offset(&self, offset: usize) -> usize { +// // let mut e = *self; +// // e.truncate(offset); +// // e.content_len() +// self.len.clamp(0, offset as isize) as usize +// } +// } +// +// impl Toggleable for YjsSpan { +// fn is_activated(&self) -> bool { +// self.len > 0 +// } +// +// fn mark_activated(&mut self) { +// debug_assert!(self.len < 0); +// self.len = -self.len; +// } +// +// fn mark_deactivated(&mut self) { +// debug_assert!(self.len > 0); +// self.len = -self.len +// } +// } + +impl Content for YjsSpan { fn content_len(&self) -> usize { self.len.max(0) as usize } - fn content_len_at_offset(&self, offset: usize) -> usize { - // let mut e = *self; - // e.truncate(offset); - // e.content_len() - self.len.clamp(0, offset as isize) as usize + fn exists(&self) -> bool { + self.len != 0 } -} -impl Toggleable for YjsSpan { - fn is_activated(&self) -> bool { + fn takes_up_space(&self) -> bool { self.len > 0 } - fn mark_activated(&mut self) { - debug_assert!(self.len < 0); - self.len = -self.len; - } - - fn mark_deactivated(&mut self) { - debug_assert!(self.len > 0); - self.len = -self.len + fn none() -> Self { + Self::default() } } - #[derive(Debug)] struct RootTime; diff --git a/crates/diamond-types-crdt/src/list/stats.rs b/crates/diamond-types-crdt/src/list/stats.rs new file mode 100644 index 0000000..f6a925e --- /dev/null +++ b/crates/diamond-types-crdt/src/list/stats.rs @@ -0,0 +1,60 @@ +#[cfg(feature = "stats")] +use std::cell::RefCell; + +#[cfg(feature = "stats")] +thread_local! { + static CACHE_HITS: RefCell = RefCell::default(); + static CACHE_MISSES: RefCell = RefCell::default(); + static AS: RefCell = RefCell::default(); + static BS: RefCell = RefCell::default(); + static CS: RefCell = RefCell::default(); +} + +pub(crate) fn cache_hit() { + #[cfg(feature = "stats")] { + let old_val = CACHE_HITS.take(); + CACHE_HITS.set(old_val + 1); + } +} + +pub(crate) fn cache_miss() { + #[cfg(feature = "stats")] { + let old_val = CACHE_MISSES.take(); + CACHE_MISSES.set(old_val + 1); + } +} + +pub(crate) fn marker_a() { + #[cfg(feature = "stats")] { + let old_val = AS.take(); + AS.set(old_val + 1); + } +} +pub(crate) fn marker_b() { + #[cfg(feature = "stats")] { + let old_val = BS.take(); + BS.set(old_val + 1); + } +} +pub(crate) fn marker_c() { + #[cfg(feature = "stats")] { + let old_val = CS.take(); + CS.set(old_val + 1); + } +} + +/// Returns (cache hits, cache misses). +pub fn take_stats() -> (usize, usize) { + #[cfg(feature = "stats")] { + let (a, b, c) = (AS.take(), BS.take(), CS.take()); + if a != 0 || b != 0 || c != 0 { + println!("A: {a} / B: {b} / C: {c}"); + } + + (CACHE_HITS.take(), CACHE_MISSES.take()) + } + + #[cfg(not(feature = "stats"))] { + (0, 0) + } +} \ No newline at end of file diff --git a/crates/diamond-types-crdt/src/list/time/docpatchiter.rs b/crates/diamond-types-crdt/src/list/time/docpatchiter.rs index a8c4c40..a1078ae 100644 --- a/crates/diamond-types-crdt/src/list/time/docpatchiter.rs +++ b/crates/diamond-types-crdt/src/list/time/docpatchiter.rs @@ -7,7 +7,7 @@ use crate::list::{ListCRDT, LV}; use crate::list::positional::PositionalComponent; use crate::list::time::patchiter::{ListPatchItem, ListPatchIter}; use crate::list::time::txn_trace::OptimizedTxnsIter; -use crate::list::time::positionmap::PositionMap; +// use crate::list::time::positionmap::PositionMap; /// This is similar to PositionalOp, but where positional ops can be applied in sequence, when /// applying a walk like this the components need to be interpreted from the perspective of the @@ -29,256 +29,256 @@ impl PositionalOpWalk { } } -impl ListCRDT { - pub fn iter_original_patches(&self) -> OrigPatchesIter { - OrigPatchesIter::new(self) - } -} - -/// An iterator over original insert positions - which tells us *where* each insert and delete -/// happened in the document, at the time when that edit happened. This code would all be much -/// cleaner and simpler using coroutines. -#[derive(Debug)] -pub struct OrigPatchesIter<'a> { - txn_iter: OptimizedTxnsIter<'a>, - map: PositionMap, - - // TODO: Consider / try to lower this to a tighter reference. - list: &'a ListCRDT, - /// Inside a txn we iterate over each rle patch with this. - current_item: ListPatchItem, - current_inner: Option >, // extra space to work around intellij-rust bug -} - -impl<'a> OrigPatchesIter<'a> { - fn new(list: &'a ListCRDT) -> Self { - Self { - txn_iter: list.txns.txn_spanning_tree_iter(), - map: PositionMap::new_void(list), - list, - current_item: Default::default(), - current_inner: None, - } - } - - fn next_inner(&mut self) -> Option { - if let Some(current_inner) = &mut self.current_inner { - if let Some(op_item) = current_inner.next() { - return Some(op_item) - } - } - - // current_inner is either empty or None. Iterate to the next txn. - let walk = self.txn_iter.next()?; - - for range in walk.retreat { - for op in self.list.patch_iter_in_range(range) { - self.map.retreat_all_by_range(self.list, op); - } - } - - for range in walk.advance_rev.into_iter().rev() { - for op in self.list.patch_iter_in_range_rev(range) { - self.map.advance_all_by_range(self.list, op); - } - } - - debug_assert!(!walk.consume.is_empty()); - let mut inner = self.list.patch_iter_in_range(walk.consume); - let next = inner.next(); - debug_assert!(next.is_some()); // The walk cannot be empty. - - self.current_inner = Some(inner); - return next; - } - - fn fill_current_item(&mut self) -> Option<()> { // Option instead of bool so we can use try - if self.current_item.range.is_empty() { - if let Some(item) = self.next_inner() { - debug_assert!(!item.is_empty()); - self.current_item = item; - } else { return None; } - } - Some(()) - } - - pub(crate) fn next_patch_with_content(&mut self) -> Option<(Range, PositionalComponent, Option)> { - self.fill_current_item()?; - - let consumed_start = self.current_item.range.start; - let (c, str) = self.map.advance_and_consume_with_content(self.list, &mut self.current_item); - Some((consumed_start .. consumed_start + c.len, c, str)) - } - - pub(crate) fn into_patch(mut self) -> PositionalOpWalk { - let mut result = PositionalOpWalk::new(); - while let Some((range, component, str)) = self.next_patch_with_content() { - result.origin_order.push_rle(range); - result.components.push(component); - if let Some(str) = str { - result.content.push_str(&str); - } - } - - result - } -} - -impl<'a> Iterator for OrigPatchesIter<'a> { - type Item = (Range, PositionalComponent); - - fn next(&mut self) -> Option { - self.fill_current_item()?; - - let consumed_start = self.current_item.range.start; - let result = self.map.advance_and_consume(self.list, &mut self.current_item); - Some((consumed_start .. consumed_start + result.len, result)) - } -} - -impl<'a> From> for PositionalOpWalk { - fn from(iter: OrigPatchesIter<'a>) -> Self { - iter.into_patch() - } -} - -#[cfg(test)] -mod test { - use std::ops::Range; - use crate::list::{ListCRDT, LV, PositionalComponent}; - use smallvec::{smallvec, SmallVec}; - use rle::{AppendRle, MergeableIterator}; - use crate::list::external_txn::{RemoteCRDTOp, RemoteId, RemoteTxn}; - use crate::list::positional::InsDelTag::*; - use crate::list::time::docpatchiter::PositionalOpWalk; - use crate::list::time::patchiter::ListPatchItem; - - fn assert_patches_matches(doc: &ListCRDT, expected: &PositionalOpWalk) { - let actual: PositionalOpWalk = doc.iter_original_patches().into(); - assert_eq!(expected, &actual); - - // Also check we get the same thing if we don't ask for content. - let expected_c = expected.components.iter().cloned().map(|mut c| { - c.content_known = false; - c - }).merge_spans(); - - let mut from: SmallVec, 1> = smallvec![]; - let actual_c = doc.iter_original_patches().map(|(origin, c)| { - from.push_rle(origin); - c - }).merge_spans(); - - // dbg!(expected_c.collect::>()); - // dbg!(actual_c.collect::>()); - assert!(actual_c.eq(expected_c)); - assert_eq!(from, expected.origin_order); - } - - #[test] - fn patch_smoke() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("seph"); - doc.local_insert(0, 0, "hi there"); - doc.local_delete(0, 2, 3); // hiere - - assert!(doc.patch_iter().eq([ - ListPatchItem { - range: 0..8, - op_type: Ins, - target_start: 0, - }, - ListPatchItem { - range: 8..11, - op_type: Del, - target_start: 2, - }, - ])); - - let expected = PositionalOpWalk { - components: smallvec![ - PositionalComponent {pos: 0, len: 2, content_known: true, tag: Ins}, - PositionalComponent {pos: 2, len: 3, content_known: false, tag: Ins}, - PositionalComponent {pos: 5, len: 3, content_known: true, tag: Ins}, - - PositionalComponent {pos: 2, len: 3, content_known: false, tag: Del}, - ], - origin_order: smallvec![0..11], - content: "hiere".into(), - }; - - assert_patches_matches(&doc, &expected); - } - - #[test] - fn concurrent_deletes() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("seph"); - doc.local_insert(0, 0, "xxx"); - - // Ok now two users concurrently delete. - doc.apply_remote_txn(&RemoteTxn { - id: RemoteId { agent: "a".into(), seq: 0 }, - parents: smallvec![RemoteId { agent: "seph".into(), seq: 2 }], - ops: smallvec![RemoteCRDTOp::Del { - id: RemoteId { agent: "seph".into(), seq: 0 }, - len: 3 - }], - ins_content: "".into(), - }); - - doc.apply_remote_txn(&RemoteTxn { - id: RemoteId { agent: "b".into(), seq: 0 }, - parents: smallvec![RemoteId { agent: "seph".into(), seq: 2 }], - ops: smallvec![RemoteCRDTOp::Del { - id: RemoteId { agent: "seph".into(), seq: 0 }, - len: 3 - }], - ins_content: "".into(), - }); - - let expected = PositionalOpWalk { - components: smallvec![ - PositionalComponent { pos: 0, len: 3, content_known: false, tag: Ins }, - PositionalComponent { pos: 0, len: 3, content_known: false, tag: Del }, - PositionalComponent { pos: 0, len: 3, content_known: false, tag: Del }, - ], - origin_order: smallvec![0..9], // Disentangling this is the job of the reader. - content: "".into(), - }; - - assert_patches_matches(&doc, &expected); - } - - #[test] - fn forwards_backwards() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("a"); - doc.local_insert(0, 0, "aa"); - doc.local_insert(0, 1, "bb"); // abba - - doc.apply_remote_txn(&RemoteTxn { - id: RemoteId { agent: "b".into(), seq: 0 }, - parents: smallvec![RemoteId { agent: "a".into(), seq: 2 }], - ops: smallvec![RemoteCRDTOp::Del { - id: RemoteId { agent: "a".into(), seq: 1 }, // delete the last a - len: 1 - }], - ins_content: "".into(), - }); // abb - doc.check(true); - - let expected = PositionalOpWalk { - components: smallvec![ - PositionalComponent { pos: 0, len: 1, content_known: true, tag: Ins }, - PositionalComponent { pos: 1, len: 1, content_known: false, tag: Ins }, - PositionalComponent { pos: 1, len: 2, content_known: true, tag: Ins }, - PositionalComponent { pos: 2, len: 1, content_known: false, tag: Del }, - ], - origin_order: smallvec![0..5], - content: "abb".into(), - }; - - assert_patches_matches(&doc, &expected); - } -} \ No newline at end of file +// impl ListCRDT { +// pub fn iter_original_patches(&self) -> OrigPatchesIter { +// OrigPatchesIter::new(self) +// } +// } + +// /// An iterator over original insert positions - which tells us *where* each insert and delete +// /// happened in the document, at the time when that edit happened. This code would all be much +// /// cleaner and simpler using coroutines. +// #[derive(Debug)] +// pub struct OrigPatchesIter<'a> { +// txn_iter: OptimizedTxnsIter<'a>, +// map: PositionMap, +// +// // TODO: Consider / try to lower this to a tighter reference. +// list: &'a ListCRDT, +// /// Inside a txn we iterate over each rle patch with this. +// current_item: ListPatchItem, +// current_inner: Option >, // extra space to work around intellij-rust bug +// } +// +// impl<'a> OrigPatchesIter<'a> { +// fn new(list: &'a ListCRDT) -> Self { +// Self { +// txn_iter: list.txns.txn_spanning_tree_iter(), +// map: PositionMap::new_void(list), +// list, +// current_item: Default::default(), +// current_inner: None, +// } +// } +// +// fn next_inner(&mut self) -> Option { +// if let Some(current_inner) = &mut self.current_inner { +// if let Some(op_item) = current_inner.next() { +// return Some(op_item) +// } +// } +// +// // current_inner is either empty or None. Iterate to the next txn. +// let walk = self.txn_iter.next()?; +// +// for range in walk.retreat { +// for op in self.list.patch_iter_in_range(range) { +// self.map.retreat_all_by_range(self.list, op); +// } +// } +// +// for range in walk.advance_rev.into_iter().rev() { +// for op in self.list.patch_iter_in_range_rev(range) { +// self.map.advance_all_by_range(self.list, op); +// } +// } +// +// debug_assert!(!walk.consume.is_empty()); +// let mut inner = self.list.patch_iter_in_range(walk.consume); +// let next = inner.next(); +// debug_assert!(next.is_some()); // The walk cannot be empty. +// +// self.current_inner = Some(inner); +// return next; +// } +// +// fn fill_current_item(&mut self) -> Option<()> { // Option instead of bool so we can use try +// if self.current_item.range.is_empty() { +// if let Some(item) = self.next_inner() { +// debug_assert!(!item.is_empty()); +// self.current_item = item; +// } else { return None; } +// } +// Some(()) +// } +// +// pub(crate) fn next_patch_with_content(&mut self) -> Option<(Range, PositionalComponent, Option)> { +// self.fill_current_item()?; +// +// let consumed_start = self.current_item.range.start; +// let (c, str) = self.map.advance_and_consume_with_content(self.list, &mut self.current_item); +// Some((consumed_start .. consumed_start + c.len, c, str)) +// } +// +// pub(crate) fn into_patch(mut self) -> PositionalOpWalk { +// let mut result = PositionalOpWalk::new(); +// while let Some((range, component, str)) = self.next_patch_with_content() { +// result.origin_order.push_rle(range); +// result.components.push(component); +// if let Some(str) = str { +// result.content.push_str(&str); +// } +// } +// +// result +// } +// } +// +// impl<'a> Iterator for OrigPatchesIter<'a> { +// type Item = (Range, PositionalComponent); +// +// fn next(&mut self) -> Option { +// self.fill_current_item()?; +// +// let consumed_start = self.current_item.range.start; +// let result = self.map.advance_and_consume(self.list, &mut self.current_item); +// Some((consumed_start .. consumed_start + result.len, result)) +// } +// } +// +// impl<'a> From> for PositionalOpWalk { +// fn from(iter: OrigPatchesIter<'a>) -> Self { +// iter.into_patch() +// } +// } +// +// #[cfg(test)] +// mod test { +// use std::ops::Range; +// use crate::list::{ListCRDT, LV, PositionalComponent}; +// use smallvec::{smallvec, SmallVec}; +// use rle::{AppendRle, MergeableIterator}; +// use crate::list::external_txn::{RemoteCRDTOp, RemoteId, RemoteTxn}; +// use crate::list::positional::InsDelTag::*; +// use crate::list::time::docpatchiter::PositionalOpWalk; +// use crate::list::time::patchiter::ListPatchItem; +// +// fn assert_patches_matches(doc: &ListCRDT, expected: &PositionalOpWalk) { +// let actual: PositionalOpWalk = doc.iter_original_patches().into(); +// assert_eq!(expected, &actual); +// +// // Also check we get the same thing if we don't ask for content. +// let expected_c = expected.components.iter().cloned().map(|mut c| { +// c.content_known = false; +// c +// }).merge_spans(); +// +// let mut from: SmallVec, 1> = smallvec![]; +// let actual_c = doc.iter_original_patches().map(|(origin, c)| { +// from.push_rle(origin); +// c +// }).merge_spans(); +// +// // dbg!(expected_c.collect::>()); +// // dbg!(actual_c.collect::>()); +// assert!(actual_c.eq(expected_c)); +// assert_eq!(from, expected.origin_order); +// } +// +// #[test] +// fn patch_smoke() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("seph"); +// doc.local_insert(0, 0, "hi there"); +// doc.local_delete(0, 2, 3); // hiere +// +// assert!(doc.patch_iter().eq([ +// ListPatchItem { +// range: 0..8, +// op_type: Ins, +// target_start: 0, +// }, +// ListPatchItem { +// range: 8..11, +// op_type: Del, +// target_start: 2, +// }, +// ])); +// +// let expected = PositionalOpWalk { +// components: smallvec![ +// PositionalComponent {pos: 0, len: 2, content_known: true, tag: Ins}, +// PositionalComponent {pos: 2, len: 3, content_known: false, tag: Ins}, +// PositionalComponent {pos: 5, len: 3, content_known: true, tag: Ins}, +// +// PositionalComponent {pos: 2, len: 3, content_known: false, tag: Del}, +// ], +// origin_order: smallvec![0..11], +// content: "hiere".into(), +// }; +// +// assert_patches_matches(&doc, &expected); +// } +// +// #[test] +// fn concurrent_deletes() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("seph"); +// doc.local_insert(0, 0, "xxx"); +// +// // Ok now two users concurrently delete. +// doc.apply_remote_txn(&RemoteTxn { +// id: RemoteId { agent: "a".into(), seq: 0 }, +// parents: smallvec![RemoteId { agent: "seph".into(), seq: 2 }], +// ops: smallvec![RemoteCRDTOp::Del { +// id: RemoteId { agent: "seph".into(), seq: 0 }, +// len: 3 +// }], +// ins_content: "".into(), +// }); +// +// doc.apply_remote_txn(&RemoteTxn { +// id: RemoteId { agent: "b".into(), seq: 0 }, +// parents: smallvec![RemoteId { agent: "seph".into(), seq: 2 }], +// ops: smallvec![RemoteCRDTOp::Del { +// id: RemoteId { agent: "seph".into(), seq: 0 }, +// len: 3 +// }], +// ins_content: "".into(), +// }); +// +// let expected = PositionalOpWalk { +// components: smallvec![ +// PositionalComponent { pos: 0, len: 3, content_known: false, tag: Ins }, +// PositionalComponent { pos: 0, len: 3, content_known: false, tag: Del }, +// PositionalComponent { pos: 0, len: 3, content_known: false, tag: Del }, +// ], +// origin_order: smallvec![0..9], // Disentangling this is the job of the reader. +// content: "".into(), +// }; +// +// assert_patches_matches(&doc, &expected); +// } +// +// #[test] +// fn forwards_backwards() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("a"); +// doc.local_insert(0, 0, "aa"); +// doc.local_insert(0, 1, "bb"); // abba +// +// doc.apply_remote_txn(&RemoteTxn { +// id: RemoteId { agent: "b".into(), seq: 0 }, +// parents: smallvec![RemoteId { agent: "a".into(), seq: 2 }], +// ops: smallvec![RemoteCRDTOp::Del { +// id: RemoteId { agent: "a".into(), seq: 1 }, // delete the last a +// len: 1 +// }], +// ins_content: "".into(), +// }); // abb +// doc.check(true); +// +// let expected = PositionalOpWalk { +// components: smallvec![ +// PositionalComponent { pos: 0, len: 1, content_known: true, tag: Ins }, +// PositionalComponent { pos: 1, len: 1, content_known: false, tag: Ins }, +// PositionalComponent { pos: 1, len: 2, content_known: true, tag: Ins }, +// PositionalComponent { pos: 2, len: 1, content_known: false, tag: Del }, +// ], +// origin_order: smallvec![0..5], +// content: "abb".into(), +// }; +// +// assert_patches_matches(&doc, &expected); +// } +// } \ No newline at end of file diff --git a/crates/diamond-types-crdt/src/list/time/external_patches.rs b/crates/diamond-types-crdt/src/list/time/external_patches.rs index cf289f3..feabd90 100644 --- a/crates/diamond-types-crdt/src/list/time/external_patches.rs +++ b/crates/diamond-types-crdt/src/list/time/external_patches.rs @@ -3,16 +3,18 @@ /// we're leaning on is correct. use smallvec::{SmallVec, smallvec}; -use crate::list::{ListCRDT, LV, PositionalComponent}; +// use crate::list::{ListCRDT, LV, PositionalComponent}; use smartstring::alias::{String as SmartString}; use rle::{AppendRle, HasLength, MergableSpan, SplitableSpanHelpers}; use crate::list::external_txn::{RemoteId, RemoteIdSpan}; -use crate::list::time::docpatchiter::PositionalOpWalk; +// use crate::list::time::docpatchiter::PositionalOpWalk; use crate::list::txn::TxnSpan; use crate::rangeextra::OrderRange; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::list::{ListCRDT, LV, PositionalComponent}; +use crate::list::time::docpatchiter::PositionalOpWalk; #[derive(Debug, Clone, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -103,8 +105,8 @@ impl RemotePositionalPatches { } impl ListCRDT { - pub fn as_external_patch(&self) -> RemotePositionalPatches { - let int_patch = self.iter_original_patches().into(); - RemotePositionalPatches::from_internal(int_patch, self) - } + // pub fn as_external_patch(&self) -> RemotePositionalPatches { + // let int_patch = self.iter_original_patches().into(); + // RemotePositionalPatches::from_internal(int_patch, self) + // } } \ No newline at end of file diff --git a/crates/diamond-types-crdt/src/list/time/history.rs b/crates/diamond-types-crdt/src/list/time/history.rs index 9ca222c..98d7854 100644 --- a/crates/diamond-types-crdt/src/list/time/history.rs +++ b/crates/diamond-types-crdt/src/list/time/history.rs @@ -251,7 +251,7 @@ pub mod test { fn assert_diff_eq(txns: &RleVec, a: &[LV], b: &[LV], expect_a: &[Range], expect_b: &[Range]) { let slow_result = txns.diff_slow(a, b); let fast_result = txns.diff(a, b); - dbg!(&slow_result, &fast_result); + // dbg!(&slow_result, &fast_result); assert_eq!(slow_result, fast_result); assert_eq!(slow_result.0.as_slice(), expect_a); diff --git a/crates/diamond-types-crdt/src/list/time/positionmap.rs b/crates/diamond-types-crdt/src/list/time/positionmap.rs index 85da373..1b7ba2a 100644 --- a/crates/diamond-types-crdt/src/list/time/positionmap.rs +++ b/crates/diamond-types-crdt/src/list/time/positionmap.rs @@ -1,17 +1,7 @@ -use std::iter::FromIterator; -use std::mem::take; use content_tree::*; use rle::{HasLength, MergableSpan, SplitableSpanHelpers}; -use smartstring::alias::{String as SmartString}; use crate::list::time::positionmap::MapTag::*; -use std::pin::Pin; -use crate::list::{DoubleDeleteList, ListCRDT, LV, RangeTree, ROOT_LV}; -use crate::list::positional::{InsDelTag, PositionalComponent}; -use std::ops::Range; -use crate::rangeextra::OrderRange; -use crate::list::time::patchiter::ListPatchItem; -use crate::list::branch::{branch_eq, branch_is_root}; /// There's 3 states a component in the position map can be in: /// - Not inserted (yet), @@ -117,721 +107,721 @@ impl ContentLength for PositionRun { type PositionMapInternal = ContentTreeRaw; -/// A PositionMap is a data structure used internally to track a set of positional changes to the -/// document as a result of inserts and deletes. -/// -/// This is used for a couple functions: -/// -/// - When generating positional patches (eg for saving), each patch names its position with respect -/// to the state of the document when that patch was created. To do this, we walk the document in -/// time order and iteratively update a PositionMap as we visit each change -/// - When loading positional patches from disk or over the network, sometimes we need to interpret -/// positional information based on a particular version. For this, we generate a PositionMap -/// at the requested version (branch) and then use that to translate the incoming patch's position -/// information. -/// -/// This data structure *should* also be used to generate and process OT changes, though they work -/// slightly differently in general. -#[derive(Debug, Eq)] -pub(crate) struct PositionMap { - /// Helpers to map from Order -> raw positions -> position at the current point in time - pub(crate) map: Pin>, - // order_to_raw_map: OrderToRawInsertMap<'a>, - - // There's two ways we could handle double deletes: - // 1. Use a double delete list. Have the map simply store whether or not an item was deleted - // at all, and if something is deleted multiple times, mark as such in double_deletes. - // 2. Have map store the number of times each item has been deleted. This would be better if - // double deletes were common, but they're vanishingly rare in practice. - double_deletes: DoubleDeleteList, -} - -// The double delete list will sometimes end up with empty entries. This is fine in practice, but -// it does mean we unfortunately need an explicit PartialEq function. (This is only really called -// from tests anyway). -impl PartialEq for PositionMap { - fn eq(&self, other: &Self) -> bool { - self.map == other.map - && self.double_deletes.iter_merged().filter(|e| e.1.excess_deletes > 0) - .eq(other.double_deletes.iter_merged().filter(|e| e.1.excess_deletes > 0)) - } -} - -const PARANOID_CHECKING: bool = false; - -impl PositionMap { - pub(super) fn new_void(list: &ListCRDT) -> Self { - let mut map = PositionMapInternal::new(); - - let total_post_len = list.range_tree.offset_len(); - // let (order_to_raw_map, total_post_len) = OrderToRawInsertMap::new(&list.range_tree); - // TODO: This is something we should cache somewhere. - if total_post_len > 0 { - map.push(PositionRun::new_void(total_post_len)); - } - - Self { map, double_deletes: DoubleDeleteList::new() } - } - - pub(super) fn new_upstream(list: &ListCRDT) -> Self { - let mut map = PositionMapInternal::new(); - - let total_post_len = list.range_tree.offset_len(); - if total_post_len > 0 { - let total_content_len = list.range_tree.content_len(); - // let (order_to_raw_map, total_post_len) = OrderToRawInsertMap::new(&list.range_tree); - // TODO: This is something we should cache somewhere. - map.push(PositionRun::new_upstream(total_post_len, total_content_len)); - } - - Self { - map, - // TODO: Eww gross! Refactor to avoid this allocation. - double_deletes: list.double_deletes.clone() - } - } - - fn new_at_version_from_start(list: &ListCRDT, branch: &[LV]) -> Self { - let mut result = Self::new_void(list); - if branch != &[ROOT_LV] { - let changes = list.txns.diff(&[ROOT_LV], branch).1; - - for range in changes.iter().rev() { - let patches = list.patch_iter_in_range(range.clone()); - for patch in patches { - result.advance_all_by_range(list, patch); - } - } - } - - result - } - - fn new_at_version_from_end(list: &ListCRDT, branch: &[LV]) -> Self { - let mut result = Self::new_upstream(list); - - if !branch_eq(branch, list.frontier.as_slice()) { - let (changes, nil) = list.txns.diff(&list.frontier, branch); - debug_assert!(nil.is_empty()); - - for range in changes.iter() { - let patches = list.patch_iter_in_range_rev(range.clone()); - for patch in patches { - result.retreat_all_by_range(list, patch); - } - } - } - - result - } - - pub(crate) fn new_at_version(list: &ListCRDT, branch: &[LV]) -> Self { - // There's two strategies here: We could start at the start of time and walk forward, or we - // could start at the current version and walk backward. Walking backward will be much more - // common in practice, but either approach will generate an identical result. - - if branch_is_root(branch) { return Self::new_void(list); } - - let sum: LV = branch.iter().sum(); - - let start_work = sum; - let end_work = (list.get_next_lv() - 1) * branch.len() - sum; - - if PARANOID_CHECKING { - // We should end up with identical results regardless of whether we start from the start - // or end. - let a = Self::new_at_version_from_start(list, branch); - let b = Self::new_at_version_from_end(list, branch); - - // dbg!(list.txns.diff(&branch, &[ROOT_TIME])); - // dbg!(list.txns.diff(&branch, &list.frontier)); - // list.debug_print_segments(); - // list.debug_print_del(); - - if a != b { - list.check(true); - dbg!(&list.txns); - dbg!(&branch, &list.frontier); - dbg!(list.txns.diff(&branch, &[ROOT_LV])); - dbg!(list.txns.diff(&branch, &list.frontier)); - list.debug_print_segments(); - dbg!(&a.map); - dbg!(&b.map); - - dbg!(&a.double_deletes); - dbg!(&b.double_deletes); - } - assert_eq!(a, b); - return a; - } - - if start_work < end_work { Self::new_at_version_from_start(list, branch) } - else { Self::new_at_version_from_end(list, branch) } - } - - pub(super) fn order_to_raw(&self, list: &ListCRDT, order: LV) -> (InsDelTag, Range) { - let cursor = list.get_cursor_before(order); - let base = cursor.count_offset_pos() as LV; - - let e = cursor.get_raw_entry(); - let tag = if e.is_activated() { InsDelTag::Ins } else { InsDelTag::Del }; - (tag, base..(base + e.order_len() - cursor.offset as LV)) - } - - pub(super) fn order_to_raw_and_content_len(&self, list: &ListCRDT, order: LV) -> (InsDelTag, Range, Option) { - // This is a modified version of order_to_raw, above. I'm not just reusing the same code - // because of expected perf, but TODO: Reuse code more! :p - let cursor = list.get_cursor_before(order); - let Pair(base, content_pos) = unsafe { cursor.count_pos() }; - debug_assert_eq!(base, cursor.count_offset_pos() as LV); - - let e = cursor.get_raw_entry(); - let range = base..(base + e.order_len() - cursor.offset as LV); - - if e.is_activated() { - (InsDelTag::Ins, range, Some(content_pos)) - } else { - (InsDelTag::Del, range, None) - } - } - - pub(crate) fn content_len(&self) -> usize { - self.map.content_len() - } - - pub(crate) fn list_cursor_at_content_pos<'a>(&self, list: &'a ListCRDT, pos: usize) -> (<&'a RangeTree as Cursors>::Cursor, usize) { - let map_cursor = self.map.cursor_at_content_pos(pos, false); - self.map_to_list_cursor(map_cursor, list, false) - } - - pub(crate) fn right_origin_at(&self, list: &ListCRDT, pos: usize) -> LV { - // The behaviour of the right_origin marker is unfortunately complicated here. We want to - // mark as right origin: - // - The next item after pos - // - Regardless of whether or not its been deleted - // - But skipping anything that hasn't been inserted yet (NotInsertedYet items). - // - // If we reach the end of the document, the right origin is set to ROOT_TIME. - - // Note it would also be valid to also skip all deleted items here. That would result in - // incompatibly different CRDT semantics. - - // dbg!(&self.map); - // println!("{:?}", &self.map); - - // We need stick_end: true here so we don't skip deleted items. - let mut map_cursor = self.map.cursor_at_content_pos(pos, true); - - // .. But we still need to scan past NotInsertedYet stuff. - loop { - // dbg!(&map_cursor); - if let Some(e) = map_cursor.try_get_raw_entry() { - // println!("Scanning {:?}", &e); - - if e.len() == map_cursor.offset || e.tag == NotInsertedYet { - // Go to next. - } else if e.tag == Upstream && e.content_len > 0 && e.content_len == map_cursor.offset { - // This cursor *might* point to the end of this chunk of upstream content. We - // have to look in the range tree in the doc to tell. If it does, we'll roll - // next so we can check if the subsequent entry is NotInsertedYet. (So we can - // skip it). - - // This is a little unfortunate, as we're duplicating logic from - // map_to_list_cursor. - - // The starting offset position of this entry. - - // TODO: This is pretty gross. Find a way to clean this up. - map_cursor.offset = 0; - let start_offset_pos = map_cursor.count_offset_pos(); - let doc_cursor = list.range_tree.cursor_at_offset_pos(start_offset_pos, true); - let doc_content_pos = doc_cursor.count_content_pos() + e.content_len; - - let c1 = list.range_tree.cursor_at_offset_pos(start_offset_pos + e.len(), true); - let c2 = list.range_tree.cursor_at_content_pos(doc_content_pos, true); - - if c1 != c2 { - return unsafe { c2.unsafe_get_item() }.unwrap_or(ROOT_LV) - // map_cursor.offset = e.content_len; - // break; - } // Otherwise, if they're equal. Roll next. - } else { break; } - - // This replicates cursor.roll_to_next(). - if map_cursor.next_entry() { - continue; - } else { return ROOT_LV; } - } else { - // The cursor is at the end of the map. Origin right will be ROOT. - return ROOT_LV; - } - } - - let list_cursor = self.map_to_list_cursor(map_cursor, list, true).0; - unsafe { list_cursor.unsafe_get_item() }.unwrap_or(ROOT_LV) - } - - fn map_to_list_cursor<'a>(&self, mut map_cursor: Cursor, list: &'a ListCRDT, stick_end: bool) -> (<&'a RangeTree as Cursors>::Cursor, usize) { - // The max span is used when deleting items. Something thats been inserted can be deleted, - // and also something thats been - let e = map_cursor.get_raw_entry(); - let max_span = e.content_len - map_cursor.offset; - - // If we're in an upstream section the local offset is actually a content offset, and its - // meaningless here. - - // TODO: This could be optimized via a special method in content-tree in one pass, rather - // than traversing down the tree (to make the cursor) and then immediately walking back up - // again. - - // TODO: All this logic feels pretty contrived. Once I'm correct, clean me up. - let tag_is_upstream = e.tag == Upstream; - let content_offset = if tag_is_upstream { - take(&mut map_cursor.offset) - } else { 0 }; - - let offset_pos = map_cursor.count_offset_pos(); - let mut doc_cursor = list.range_tree.cursor_at_offset_pos(offset_pos, false); - - // If the item is Upstream, we need to skip any deleted items at this location in the range - // tree. - if content_offset > 0 || (tag_is_upstream && !stick_end) { - let content_pos = doc_cursor.count_content_pos() + content_offset; - // dbg!(offset_pos, content_offset, (content_pos, doc_cursor.count_content_pos(), content_offset)); - doc_cursor = list.range_tree.cursor_at_content_pos(content_pos, stick_end); - } - // dbg!(&doc_cursor); - // dbg!(doc_cursor.get_raw_entry()); - - // doc_cursor.get_raw_entry().at_offset(doc_cursor.offset) - // unsafe { doc_cursor.get_item() }.unwrap() - (doc_cursor, max_span) - } - - // pub(crate) fn order_at_content_pos(&self, list: &ListCRDT, pos: usize, stick_end: bool) -> Time { - // let cursor = self.list_cursor_at_content_pos(list, pos, stick_end).0; - // // cursor.get_raw_entry().at_offset(cursor.offset) - // unsafe { cursor.get_item() }.unwrap() - // // unsafe { cursor.get_item() }.unwrap_or(ROOT_TIME) - // } - - // pub(crate) fn content_pos_to_order(&self, list: &ListCRDT, pos: usize) -> Order { - // // TODO: This could be optimized via a special method in content-tree. - // let cursor = self.map.cursor_at_content_pos(pos, true); - // let offset_pos = cursor.count_offset_pos(); - // - // let doc_cursor = list.range_tree.cursor_at_offset_pos(offset_pos, false); - // doc_cursor.get_raw_entry().at_offset(cursor.offset) - // } - - pub(super) fn retreat_all_by_range(&mut self, list: &ListCRDT, patch: ListPatchItem) { - let mut target = patch.target_range(); - while !target.is_empty() { - let len = self.retreat_first_by_range(list, target.clone(), patch.op_type); - target.start += len; - debug_assert!(target.start <= target.end); - } - } - - pub(super) fn retreat_first_by_range(&mut self, list: &ListCRDT, target: Range, op_type: InsDelTag) -> LV { - // dbg!(&target, self.map.iter().collect::>()); - // This variant is only actually used in one place - which makes things easier. - - let (final_tag, raw_range) = self.order_to_raw(list, target.start); - let raw_start = raw_range.start; - let mut len = LV::min(raw_range.order_len(), target.order_len()); - - let mut cursor = self.map.mut_cursor_at_offset_pos(raw_start as usize, false); - if op_type == InsDelTag::Del { - let e = cursor.get_raw_entry(); - len = len.min(e.final_len - cursor.offset); - debug_assert!(len > 0); - - // Usually there's no double-deletes, but we need to check just in case. - let allowed_len = self.double_deletes.find_zero_range(target.start, len); - if allowed_len == 0 { // Unlikely. There's a double delete here. - let len_dd_here = self.double_deletes.decrement_delete_range(target.start, len); - debug_assert!(len_dd_here > 0); - - // What a minefield. O_o - return len_dd_here; - } else { - len = allowed_len; - } - } - - debug_assert!(len >= 1); - // So the challenge here is we need to un-merge upstream position runs into their - // constituent parts. We can't use replace_range for this because that calls truncate(). - // let mut len_remaining = len; - // while len_remaining > 0 { - // - // } - if op_type == InsDelTag::Ins && final_tag == InsDelTag::Del { - // The easy case. The entry in PositionRun will be Inserted. - debug_assert_eq!(cursor.get_raw_entry().tag, Inserted); - cursor.replace_range(PositionRun::new_void(len as _)); - } else { - // We have merged everything into Upstream. We need to pull it apart, which is bleh. - debug_assert_eq!(cursor.get_raw_entry().tag, Upstream); - debug_assert_eq!(op_type, final_tag); // Ins/Ins or Del/Del. - // TODO: Is this a safe assumption? Let the fuzzer verify it. - assert!(cursor.get_raw_entry().len() - cursor.offset >= len as usize); - - let (new_entry, eat_content) = match op_type { - InsDelTag::Ins => (PositionRun::new_void(len as _), len as usize), - InsDelTag::Del => (PositionRun::new_ins(len as _), 0), - }; - - let current_entry = cursor.get_raw_entry(); - - // So we want to replace the cursor entry with [start, X, end]. The trick is figuring - // out where we split the content in the current entry. - if cursor.offset == 0 { - // dbg!(&new_entry, current_entry); - // Cursor is at the start of this entry. This variant is easier. - let remainder = PositionRun::new_upstream( - current_entry.final_len - new_entry.final_len, - current_entry.content_len - eat_content - ); - // dbg!(remainder); - if remainder.final_len > 0 { - cursor.replace_entry(&[new_entry, remainder]); - } else { - cursor.replace_entry(&[new_entry]); - } - } else { - // TODO: Accidentally this whole thing. Clean me up buttercup! - - // The cursor isn't at the start. We need to figure out how much to slice off. - // Basically, we need to know how much content is in cursor.offset. - - // TODO(opt): A cursor comparator function would make this much more performant. - let entry_start_offset = raw_start as usize - cursor.offset; - let start_cursor = list.range_tree.cursor_at_offset_pos(entry_start_offset, true); - let start_content = start_cursor.count_content_pos(); - - // TODO: Reuse the cursor from order_to_raw(). - let midpoint_cursor = list.range_tree.cursor_at_offset_pos(raw_start as _, true); - let midpoint_content = midpoint_cursor.count_content_pos(); - - let content_chomp = midpoint_content - start_content; - - let start = PositionRun::new_upstream(cursor.offset, content_chomp); - - let remainder = PositionRun::new_upstream( - current_entry.final_len - new_entry.final_len - cursor.offset, - current_entry.content_len - eat_content - content_chomp - ); - - if remainder.final_len > 0 { - cursor.replace_entry(&[start, new_entry, remainder]); - } else { - cursor.replace_entry(&[start, new_entry]); - } - } - } - len - } - - #[inline] - pub(super) fn advance_all_by_range(&mut self, list: &ListCRDT, mut patch: ListPatchItem) { - while !patch.range.is_empty() { - let (final_tag, raw_range) = self.order_to_raw(list, patch.target_start); - self.advance_first_by_range_internal(raw_range, final_tag, &mut patch, true); - debug_assert!(patch.target_start <= patch.range.start); - } - } - - pub(super) fn advance_and_consume(&mut self, list: &ListCRDT, patch: &mut ListPatchItem) -> PositionalComponent { - let (final_tag, raw_range) = self.order_to_raw(list, patch.target_start); - self.advance_first_by_range_internal(raw_range, final_tag, patch, false).unwrap() - } - - // TODO: This method could work taking in a content_builder parameter, but I have no idea how - // that impacts performance. Benchmark me! - // pub(super) fn advance_and_consume_with_content(&mut self, list: &ListCRDT, patch: &mut ListPatchItem, content_builder: &mut SmartString) -> PositionalComponent { - pub(super) fn advance_and_consume_with_content(&mut self, list: &ListCRDT, patch: &mut ListPatchItem) -> (PositionalComponent, Option) { - let (final_tag, raw_range, content_pos) = self.order_to_raw_and_content_len(list, patch.target_start); - let mut c = self.advance_first_by_range_internal(raw_range, final_tag, patch, false).unwrap(); - if let (Some(content_pos), Some(rope)) = (content_pos, &list.text_content) { - c.content_known = true; - let borrow = rope.borrow(); - let chars = borrow.slice_chars(content_pos as usize .. (content_pos + c.len) as usize); - (c, Some(SmartString::from_iter(chars))) - // content_builder.extend(chars.take(c.len as usize)); - } else { (c, None) } - } - - fn advance_first_by_range_internal(&mut self, raw_range: Range, final_tag: InsDelTag, patch: &mut ListPatchItem, handle_dd: bool) -> Option { - let target = patch.target_range(); - let op_type = patch.op_type; - - let raw_start = raw_range.start; - let mut len = LV::min(raw_range.order_len(), target.order_len()); - - let mut cursor = self.map.mut_cursor_at_offset_pos(raw_start as usize, false); - - if op_type == InsDelTag::Del { - // So the item will usually be in the Inserted state. If its in the Deleted - // state, we need to mark it as double-deleted. - let e = cursor.get_raw_entry(); - - if handle_dd { - // Handling double-deletes is only an issue while consuming. Never advancing. - len = len.min(e.final_len - cursor.offset); - debug_assert!(len > 0); - if e.tag == Upstream { // This can never happen while consuming. Only while advancing. - self.double_deletes.increment_delete_range(target.start, len); - patch.consume(len); - return None; - } - } else { - // When the insert was created, the content must exist in the document. - // TODO: Actually verify this assumption when integrating remote txns. - debug_assert_eq!(e.tag, Inserted); - } - } - - let content_pos = cursor.count_content_pos(); - // Life could be so simple... - // cursor.replace_range(PositionRun::new(op_type.into(), len as _)); - - // So there's kinda 3 different states - if final_tag == op_type { - // Transition into the Upstream state - let content_len: usize = if op_type == InsDelTag::Del { 0 } else { len as usize }; - cursor.replace_range(PositionRun::new_upstream(len as _, content_len)); - // Calling compress_node (in just this branch) improves performance by about 1%. - cursor.inner.compress_node(); - } else { - debug_assert_eq!(op_type, InsDelTag::Ins); - debug_assert_eq!(final_tag, InsDelTag::Del); - cursor.replace_range(PositionRun::new_ins(len as _)); - } - - debug_assert!(len > 0); - patch.consume(len); - Some(PositionalComponent { - pos: content_pos, - len, - content_known: false, - tag: op_type.into(), - }) - } - - /// Note this takes in the position as a raw position, because otherwise we can't distinguish - /// where an insert happened amidst a sea of deletes. - pub(crate) fn update_from_insert(&mut self, raw_pos: usize, len: usize) { - let mut cursor = self.map.mut_cursor_at_offset_pos(raw_pos, true); - let e = cursor.get_raw_entry(); - match e.tag { - NotInsertedYet | Inserted => { - cursor.insert(PositionRun::new_upstream(len, len)); - } - Upstream => { - // Just modify the entry in-place. - let new_entry = PositionRun::new_upstream( - e.final_len + len, - e.content_len + len - ); - cursor.replace_entry_simple(new_entry); - } - } - } - - pub(crate) fn update_from_delete(&mut self, content_pos: usize, mut len: usize) { - let mut cursor = self.map.mut_cursor_at_content_pos(content_pos, false); - debug_assert!(len > 0); - loop { - let e = cursor.get_raw_entry(); - let len_here = usize::min(len, e.content_len - cursor.inner.offset); - debug_assert!(len_here > 0); - len -= len_here; - match e.tag { - NotInsertedYet => panic!(), - Inserted => { - cursor.replace_range(PositionRun::new_upstream(len_here, 0)); - } - Upstream => { - let new_entry = PositionRun::new_upstream(e.final_len, e.content_len - len_here); - cursor.replace_entry_simple(new_entry); - } - } - - if len == 0 { break; } - - assert!(cursor.roll_to_next_entry()); - } - } - - pub(crate) fn check(&self) { - self.map.check(); - } - - pub(crate) fn check_void(&self) { - self.map.check(); - for item in self.map.raw_iter() { - assert_eq!(item.tag, MapTag::NotInsertedYet); - } - for d in self.double_deletes.iter() { - assert_eq!(d.1.excess_deletes, 0); - } - } - - pub(crate) fn check_upstream(&self, list: &ListCRDT) { - // dbg!(&self.map); - self.map.check(); - for item in self.map.raw_iter() { - assert_eq!(item.tag, MapTag::Upstream); - } - - // dbg!(self.double_deletes.iter_raw().collect::>()); - // dbg!(list.double_deletes.iter_raw().collect::>()); - assert!(self.double_deletes.iter_merged().eq(list.double_deletes.iter_merged())); - } -} - - -// #[derive(Debug)] -// pub(crate) struct OrderToRawInsertMap<'a>(Vec<(&'a RangeTreeLeaf, u32)>); +// /// A PositionMap is a data structure used internally to track a set of positional changes to the +// /// document as a result of inserts and deletes. +// /// +// /// This is used for a couple functions: +// /// +// /// - When generating positional patches (eg for saving), each patch names its position with respect +// /// to the state of the document when that patch was created. To do this, we walk the document in +// /// time order and iteratively update a PositionMap as we visit each change +// /// - When loading positional patches from disk or over the network, sometimes we need to interpret +// /// positional information based on a particular version. For this, we generate a PositionMap +// /// at the requested version (branch) and then use that to translate the incoming patch's position +// /// information. +// /// +// /// This data structure *should* also be used to generate and process OT changes, though they work +// /// slightly differently in general. +// #[derive(Debug, Eq)] +// pub(crate) struct PositionMap { +// /// Helpers to map from Order -> raw positions -> position at the current point in time +// pub(crate) map: Pin>, +// // order_to_raw_map: OrderToRawInsertMap<'a>, +// +// // There's two ways we could handle double deletes: +// // 1. Use a double delete list. Have the map simply store whether or not an item was deleted +// // at all, and if something is deleted multiple times, mark as such in double_deletes. +// // 2. Have map store the number of times each item has been deleted. This would be better if +// // double deletes were common, but they're vanishingly rare in practice. +// double_deletes: DoubleDeleteList, +// } +// +// // The double delete list will sometimes end up with empty entries. This is fine in practice, but +// // it does mean we unfortunately need an explicit PartialEq function. (This is only really called +// // from tests anyway). +// impl PartialEq for PositionMap { +// fn eq(&self, other: &Self) -> bool { +// self.map == other.map +// && self.double_deletes.iter_merged().filter(|e| e.1.excess_deletes > 0) +// .eq(other.double_deletes.iter_merged().filter(|e| e.1.excess_deletes > 0)) +// } +// } +// +// const PARANOID_CHECKING: bool = false; +// +// impl PositionMap { +// pub(super) fn new_void(list: &ListCRDT) -> Self { +// let mut map = PositionMapInternal::new(); +// +// let total_post_len = list.range_tree.offset_len(); +// // let (order_to_raw_map, total_post_len) = OrderToRawInsertMap::new(&list.range_tree); +// // TODO: This is something we should cache somewhere. +// if total_post_len > 0 { +// map.push(PositionRun::new_void(total_post_len)); +// } +// +// Self { map, double_deletes: DoubleDeleteList::new() } +// } // -// impl<'a> OrderToRawInsertMap<'a> { -// fn ord_refs(a: &RangeTreeLeaf, b: &RangeTreeLeaf) -> Ordering { -// let a_ptr = a as *const _; -// let b_ptr = b as *const _; +// pub(super) fn new_upstream(list: &ListCRDT) -> Self { +// let mut map = PositionMapInternal::new(); // -// if a_ptr == b_ptr { Ordering::Equal } -// else if a_ptr < b_ptr { Ordering::Less } -// else { Ordering::Greater } +// let total_post_len = list.range_tree.offset_len(); +// if total_post_len > 0 { +// let total_content_len = list.range_tree.content_len(); +// // let (order_to_raw_map, total_post_len) = OrderToRawInsertMap::new(&list.range_tree); +// // TODO: This is something we should cache somewhere. +// map.push(PositionRun::new_upstream(total_post_len, total_content_len)); +// } +// +// Self { +// map, +// // TODO: Eww gross! Refactor to avoid this allocation. +// double_deletes: list.double_deletes.clone() +// } // } // -// fn new(range_tree: &'a RangeTree) -> (Self, u32) { -// let mut nodes = Vec::new(); -// let mut insert_position = 0; +// fn new_at_version_from_start(list: &ListCRDT, branch: &[LV]) -> Self { +// let mut result = Self::new_void(list); +// if branch != &[ROOT_LV] { +// let changes = list.txns.diff(&[ROOT_LV], branch).1; // -// for node in range_tree.node_iter() { -// nodes.push((node, insert_position)); -// let len_here: u32 = node.as_slice().iter().map(|e| e.order_len()).sum(); -// insert_position += len_here; +// for range in changes.iter().rev() { +// let patches = list.patch_iter_in_range(range.clone()); +// for patch in patches { +// result.advance_all_by_range(list, patch); +// } +// } // } // -// nodes.sort_unstable_by(|a, b| { -// Self::ord_refs(a.0, b.0) -// }); +// result +// } +// +// fn new_at_version_from_end(list: &ListCRDT, branch: &[LV]) -> Self { +// let mut result = Self::new_upstream(list); // -// // dbg!(nodes.iter().map(|n| n.0 as *const _).collect::>()); +// if !branch_eq(branch, list.frontier.as_slice()) { +// let (changes, nil) = list.txns.diff(&list.frontier, branch); +// debug_assert!(nil.is_empty()); // -// (Self(nodes), insert_position) +// for range in changes.iter() { +// let patches = list.patch_iter_in_range_rev(range.clone()); +// for patch in patches { +// result.retreat_all_by_range(list, patch); +// } +// } +// } +// +// result // } // -// /// Returns the raw insert position (as if no deletes ever happened) of the requested item. The -// /// returned range always starts with the requested order and the end is the maximum range. -// fn order_to_raw(&self, doc: &ListCRDT, ins_order: Order) -> (InsDelTag, Range) { -// let marker = doc.marker_at(ins_order); +// pub(crate) fn new_at_version(list: &ListCRDT, branch: &[LV]) -> Self { +// // There's two strategies here: We could start at the start of time and walk forward, or we +// // could start at the current version and walk backward. Walking backward will be much more +// // common in practice, but either approach will generate an identical result. +// +// if branch_is_root(branch) { return Self::new_void(list); } +// +// let sum: LV = branch.iter().sum(); // -// let leaf = unsafe { marker.as_ref() }; -// if cfg!(debug_assertions) { -// // The requested item must be in the returned leaf. -// leaf.find(ins_order).unwrap(); +// let start_work = sum; +// let end_work = (list.get_next_lv() - 1) * branch.len() - sum; +// +// if PARANOID_CHECKING { +// // We should end up with identical results regardless of whether we start from the start +// // or end. +// let a = Self::new_at_version_from_start(list, branch); +// let b = Self::new_at_version_from_end(list, branch); +// +// // dbg!(list.txns.diff(&branch, &[ROOT_TIME])); +// // dbg!(list.txns.diff(&branch, &list.frontier)); +// // list.debug_print_segments(); +// // list.debug_print_del(); +// +// if a != b { +// list.check(true); +// dbg!(&list.txns); +// dbg!(&branch, &list.frontier); +// dbg!(list.txns.diff(&branch, &[ROOT_LV])); +// dbg!(list.txns.diff(&branch, &list.frontier)); +// list.debug_print_segments(); +// dbg!(&a.map); +// dbg!(&b.map); +// +// dbg!(&a.double_deletes); +// dbg!(&b.double_deletes); +// } +// assert_eq!(a, b); +// return a; // } // -// // TODO: Check if this is actually more efficient compared to a linear scan. -// let idx = self.0.binary_search_by(|elem| { -// Self::ord_refs(elem.0, leaf) -// }).unwrap(); +// if start_work < end_work { Self::new_at_version_from_start(list, branch) } +// else { Self::new_at_version_from_end(list, branch) } +// } +// +// pub(super) fn order_to_raw(&self, list: &ListCRDT, order: LV) -> (InsDelTag, Range) { +// let cursor = list.get_cursor_before(order); +// let base = cursor.count_offset_pos() as LV; +// +// let e = cursor.get_raw_entry(); +// let tag = if e.is_activated() { InsDelTag::Ins } else { InsDelTag::Del }; +// (tag, base..(base + e.order_len() - cursor.offset as LV)) +// } +// +// pub(super) fn order_to_raw_and_content_len(&self, list: &ListCRDT, order: LV) -> (InsDelTag, Range, Option) { +// // This is a modified version of order_to_raw, above. I'm not just reusing the same code +// // because of expected perf, but TODO: Reuse code more! :p +// let cursor = list.get_cursor_before(order); +// let Pair(base, content_pos) = unsafe { cursor.count_pos() }; +// debug_assert_eq!(base, cursor.count_offset_pos() as LV); // -// let mut start_position = self.0[idx].1; -// for e in leaf.as_slice() { -// if let Some(offset) = e.contains(ins_order) { -// let tag = if e.is_activated() { InsDelTag::Ins } else { InsDelTag::Del }; -// return (tag, (start_position + offset as u32)..(start_position + e.order_len())); +// let e = cursor.get_raw_entry(); +// let range = base..(base + e.order_len() - cursor.offset as LV); +// +// if e.is_activated() { +// (InsDelTag::Ins, range, Some(content_pos)) +// } else { +// (InsDelTag::Del, range, None) +// } +// } +// +// pub(crate) fn content_len(&self) -> usize { +// self.map.content_len() +// } +// +// pub(crate) fn list_cursor_at_content_pos<'a>(&self, list: &'a ListCRDT, pos: usize) -> (<&'a OldRangeTree as Cursors>::Cursor, usize) { +// let map_cursor = self.map.cursor_at_content_pos(pos, false); +// self.map_to_list_cursor(map_cursor, list, false) +// } +// +// pub(crate) fn right_origin_at(&self, list: &ListCRDT, pos: usize) -> LV { +// // The behaviour of the right_origin marker is unfortunately complicated here. We want to +// // mark as right origin: +// // - The next item after pos +// // - Regardless of whether or not its been deleted +// // - But skipping anything that hasn't been inserted yet (NotInsertedYet items). +// // +// // If we reach the end of the document, the right origin is set to ROOT_TIME. +// +// // Note it would also be valid to also skip all deleted items here. That would result in +// // incompatibly different CRDT semantics. +// +// // dbg!(&self.map); +// // println!("{:?}", &self.map); +// +// // We need stick_end: true here so we don't skip deleted items. +// let mut map_cursor = self.map.cursor_at_content_pos(pos, true); +// +// // .. But we still need to scan past NotInsertedYet stuff. +// loop { +// // dbg!(&map_cursor); +// if let Some(e) = map_cursor.try_get_raw_entry() { +// // println!("Scanning {:?}", &e); +// +// if e.len() == map_cursor.offset || e.tag == NotInsertedYet { +// // Go to next. +// } else if e.tag == Upstream && e.content_len > 0 && e.content_len == map_cursor.offset { +// // This cursor *might* point to the end of this chunk of upstream content. We +// // have to look in the range tree in the doc to tell. If it does, we'll roll +// // next so we can check if the subsequent entry is NotInsertedYet. (So we can +// // skip it). +// +// // This is a little unfortunate, as we're duplicating logic from +// // map_to_list_cursor. +// +// // The starting offset position of this entry. +// +// // TODO: This is pretty gross. Find a way to clean this up. +// map_cursor.offset = 0; +// let start_offset_pos = map_cursor.count_offset_pos(); +// let doc_cursor = list.range_tree.cursor_at_offset_pos(start_offset_pos, true); +// let doc_content_pos = doc_cursor.count_content_pos() + e.content_len; +// +// let c1 = list.range_tree.cursor_at_offset_pos(start_offset_pos + e.len(), true); +// let c2 = list.range_tree.cursor_at_content_pos(doc_content_pos, true); +// +// if c1 != c2 { +// return unsafe { c2.unsafe_get_item() }.unwrap_or(ROOT_LV) +// // map_cursor.offset = e.content_len; +// // break; +// } // Otherwise, if they're equal. Roll next. +// } else { break; } +// +// // This replicates cursor.roll_to_next(). +// if map_cursor.next_entry() { +// continue; +// } else { return ROOT_LV; } // } else { -// start_position += e.order_len(); +// // The cursor is at the end of the map. Origin right will be ROOT. +// return ROOT_LV; // } // } // -// unreachable!("Marker tree is invalid"); +// let list_cursor = self.map_to_list_cursor(map_cursor, list, true).0; +// unsafe { list_cursor.unsafe_get_item() }.unwrap_or(ROOT_LV) +// } +// +// fn map_to_list_cursor<'a>(&self, mut map_cursor: Cursor, list: &'a ListCRDT, stick_end: bool) -> (<&'a OldRangeTree as Cursors>::Cursor, usize) { +// // The max span is used when deleting items. Something thats been inserted can be deleted, +// // and also something thats been +// let e = map_cursor.get_raw_entry(); +// let max_span = e.content_len - map_cursor.offset; +// +// // If we're in an upstream section the local offset is actually a content offset, and its +// // meaningless here. +// +// // TODO: This could be optimized via a special method in content-tree in one pass, rather +// // than traversing down the tree (to make the cursor) and then immediately walking back up +// // again. +// +// // TODO: All this logic feels pretty contrived. Once I'm correct, clean me up. +// let tag_is_upstream = e.tag == Upstream; +// let content_offset = if tag_is_upstream { +// take(&mut map_cursor.offset) +// } else { 0 }; +// +// let offset_pos = map_cursor.count_offset_pos(); +// let mut doc_cursor = list.range_tree.cursor_at_offset_pos(offset_pos, false); +// +// // If the item is Upstream, we need to skip any deleted items at this location in the range +// // tree. +// if content_offset > 0 || (tag_is_upstream && !stick_end) { +// let content_pos = doc_cursor.count_content_pos() + content_offset; +// // dbg!(offset_pos, content_offset, (content_pos, doc_cursor.count_content_pos(), content_offset)); +// doc_cursor = list.range_tree.cursor_at_content_pos(content_pos, stick_end); +// } +// // dbg!(&doc_cursor); +// // dbg!(doc_cursor.get_raw_entry()); +// +// // doc_cursor.get_raw_entry().at_offset(doc_cursor.offset) +// // unsafe { doc_cursor.get_item() }.unwrap() +// (doc_cursor, max_span) // } // -// // /// Same as raw_insert_order, but constrain the return value based on the length -// // fn raw_insert_order_limited(&self, doc: &ListCRDT, order: Order, max_len: Order) -> Range { -// // let mut result = self.raw_insert_order(list, order); -// // result.end = result.end.min(result.start + max_len); -// // result +// // pub(crate) fn order_at_content_pos(&self, list: &ListCRDT, pos: usize, stick_end: bool) -> Time { +// // let cursor = self.list_cursor_at_content_pos(list, pos, stick_end).0; +// // // cursor.get_raw_entry().at_offset(cursor.offset) +// // unsafe { cursor.get_item() }.unwrap() +// // // unsafe { cursor.get_item() }.unwrap_or(ROOT_TIME) // // } +// +// // pub(crate) fn content_pos_to_order(&self, list: &ListCRDT, pos: usize) -> Order { +// // // TODO: This could be optimized via a special method in content-tree. +// // let cursor = self.map.cursor_at_content_pos(pos, true); +// // let offset_pos = cursor.count_offset_pos(); +// // +// // let doc_cursor = list.range_tree.cursor_at_offset_pos(offset_pos, false); +// // doc_cursor.get_raw_entry().at_offset(cursor.offset) +// // } +// +// pub(super) fn retreat_all_by_range(&mut self, list: &ListCRDT, patch: ListPatchItem) { +// let mut target = patch.target_range(); +// while !target.is_empty() { +// let len = self.retreat_first_by_range(list, target.clone(), patch.op_type); +// target.start += len; +// debug_assert!(target.start <= target.end); +// } +// } +// +// pub(super) fn retreat_first_by_range(&mut self, list: &ListCRDT, target: Range, op_type: InsDelTag) -> LV { +// // dbg!(&target, self.map.iter().collect::>()); +// // This variant is only actually used in one place - which makes things easier. +// +// let (final_tag, raw_range) = self.order_to_raw(list, target.start); +// let raw_start = raw_range.start; +// let mut len = LV::min(raw_range.order_len(), target.order_len()); +// +// let mut cursor = self.map.mut_cursor_at_offset_pos(raw_start as usize, false); +// if op_type == InsDelTag::Del { +// let e = cursor.get_raw_entry(); +// len = len.min(e.final_len - cursor.offset); +// debug_assert!(len > 0); +// +// // Usually there's no double-deletes, but we need to check just in case. +// let allowed_len = self.double_deletes.find_zero_range(target.start, len); +// if allowed_len == 0 { // Unlikely. There's a double delete here. +// let len_dd_here = self.double_deletes.decrement_delete_range(target.start, len); +// debug_assert!(len_dd_here > 0); +// +// // What a minefield. O_o +// return len_dd_here; +// } else { +// len = allowed_len; +// } +// } +// +// debug_assert!(len >= 1); +// // So the challenge here is we need to un-merge upstream position runs into their +// // constituent parts. We can't use replace_range for this because that calls truncate(). +// // let mut len_remaining = len; +// // while len_remaining > 0 { +// // +// // } +// if op_type == InsDelTag::Ins && final_tag == InsDelTag::Del { +// // The easy case. The entry in PositionRun will be Inserted. +// debug_assert_eq!(cursor.get_raw_entry().tag, Inserted); +// cursor.replace_range(PositionRun::new_void(len as _)); +// } else { +// // We have merged everything into Upstream. We need to pull it apart, which is bleh. +// debug_assert_eq!(cursor.get_raw_entry().tag, Upstream); +// debug_assert_eq!(op_type, final_tag); // Ins/Ins or Del/Del. +// // TODO: Is this a safe assumption? Let the fuzzer verify it. +// assert!(cursor.get_raw_entry().len() - cursor.offset >= len as usize); +// +// let (new_entry, eat_content) = match op_type { +// InsDelTag::Ins => (PositionRun::new_void(len as _), len as usize), +// InsDelTag::Del => (PositionRun::new_ins(len as _), 0), +// }; +// +// let current_entry = cursor.get_raw_entry(); +// +// // So we want to replace the cursor entry with [start, X, end]. The trick is figuring +// // out where we split the content in the current entry. +// if cursor.offset == 0 { +// // dbg!(&new_entry, current_entry); +// // Cursor is at the start of this entry. This variant is easier. +// let remainder = PositionRun::new_upstream( +// current_entry.final_len - new_entry.final_len, +// current_entry.content_len - eat_content +// ); +// // dbg!(remainder); +// if remainder.final_len > 0 { +// cursor.replace_entry(&[new_entry, remainder]); +// } else { +// cursor.replace_entry(&[new_entry]); +// } +// } else { +// // TODO: Accidentally this whole thing. Clean me up buttercup! +// +// // The cursor isn't at the start. We need to figure out how much to slice off. +// // Basically, we need to know how much content is in cursor.offset. +// +// // TODO(opt): A cursor comparator function would make this much more performant. +// let entry_start_offset = raw_start as usize - cursor.offset; +// let start_cursor = list.range_tree.cursor_at_offset_pos(entry_start_offset, true); +// let start_content = start_cursor.count_content_pos(); +// +// // TODO: Reuse the cursor from order_to_raw(). +// let midpoint_cursor = list.range_tree.cursor_at_offset_pos(raw_start as _, true); +// let midpoint_content = midpoint_cursor.count_content_pos(); +// +// let content_chomp = midpoint_content - start_content; +// +// let start = PositionRun::new_upstream(cursor.offset, content_chomp); +// +// let remainder = PositionRun::new_upstream( +// current_entry.final_len - new_entry.final_len - cursor.offset, +// current_entry.content_len - eat_content - content_chomp +// ); +// +// if remainder.final_len > 0 { +// cursor.replace_entry(&[start, new_entry, remainder]); +// } else { +// cursor.replace_entry(&[start, new_entry]); +// } +// } +// } +// len +// } +// +// #[inline] +// pub(super) fn advance_all_by_range(&mut self, list: &ListCRDT, mut patch: ListPatchItem) { +// while !patch.range.is_empty() { +// let (final_tag, raw_range) = self.order_to_raw(list, patch.target_start); +// self.advance_first_by_range_internal(raw_range, final_tag, &mut patch, true); +// debug_assert!(patch.target_start <= patch.range.start); +// } +// } +// +// pub(super) fn advance_and_consume(&mut self, list: &ListCRDT, patch: &mut ListPatchItem) -> PositionalComponent { +// let (final_tag, raw_range) = self.order_to_raw(list, patch.target_start); +// self.advance_first_by_range_internal(raw_range, final_tag, patch, false).unwrap() +// } +// +// // TODO: This method could work taking in a content_builder parameter, but I have no idea how +// // that impacts performance. Benchmark me! +// // pub(super) fn advance_and_consume_with_content(&mut self, list: &ListCRDT, patch: &mut ListPatchItem, content_builder: &mut SmartString) -> PositionalComponent { +// pub(super) fn advance_and_consume_with_content(&mut self, list: &ListCRDT, patch: &mut ListPatchItem) -> (PositionalComponent, Option) { +// let (final_tag, raw_range, content_pos) = self.order_to_raw_and_content_len(list, patch.target_start); +// let mut c = self.advance_first_by_range_internal(raw_range, final_tag, patch, false).unwrap(); +// if let (Some(content_pos), Some(rope)) = (content_pos, &list.text_content) { +// c.content_known = true; +// let borrow = rope.borrow(); +// let chars = borrow.slice_chars(content_pos as usize .. (content_pos + c.len) as usize); +// (c, Some(SmartString::from_iter(chars))) +// // content_builder.extend(chars.take(c.len as usize)); +// } else { (c, None) } +// } +// +// fn advance_first_by_range_internal(&mut self, raw_range: Range, final_tag: InsDelTag, patch: &mut ListPatchItem, handle_dd: bool) -> Option { +// let target = patch.target_range(); +// let op_type = patch.op_type; +// +// let raw_start = raw_range.start; +// let mut len = LV::min(raw_range.order_len(), target.order_len()); +// +// let mut cursor = self.map.mut_cursor_at_offset_pos(raw_start as usize, false); +// +// if op_type == InsDelTag::Del { +// // So the item will usually be in the Inserted state. If its in the Deleted +// // state, we need to mark it as double-deleted. +// let e = cursor.get_raw_entry(); +// +// if handle_dd { +// // Handling double-deletes is only an issue while consuming. Never advancing. +// len = len.min(e.final_len - cursor.offset); +// debug_assert!(len > 0); +// if e.tag == Upstream { // This can never happen while consuming. Only while advancing. +// self.double_deletes.increment_delete_range(target.start, len); +// patch.consume(len); +// return None; +// } +// } else { +// // When the insert was created, the content must exist in the document. +// // TODO: Actually verify this assumption when integrating remote txns. +// debug_assert_eq!(e.tag, Inserted); +// } +// } +// +// let content_pos = cursor.count_content_pos(); +// // Life could be so simple... +// // cursor.replace_range(PositionRun::new(op_type.into(), len as _)); +// +// // So there's kinda 3 different states +// if final_tag == op_type { +// // Transition into the Upstream state +// let content_len: usize = if op_type == InsDelTag::Del { 0 } else { len as usize }; +// cursor.replace_range(PositionRun::new_upstream(len as _, content_len)); +// // Calling compress_node (in just this branch) improves performance by about 1%. +// cursor.inner.compress_node(); +// } else { +// debug_assert_eq!(op_type, InsDelTag::Ins); +// debug_assert_eq!(final_tag, InsDelTag::Del); +// cursor.replace_range(PositionRun::new_ins(len as _)); +// } +// +// debug_assert!(len > 0); +// patch.consume(len); +// Some(PositionalComponent { +// pos: content_pos, +// len, +// content_known: false, +// tag: op_type.into(), +// }) +// } +// +// /// Note this takes in the position as a raw position, because otherwise we can't distinguish +// /// where an insert happened amidst a sea of deletes. +// pub(crate) fn update_from_insert(&mut self, raw_pos: usize, len: usize) { +// let mut cursor = self.map.mut_cursor_at_offset_pos(raw_pos, true); +// let e = cursor.get_raw_entry(); +// match e.tag { +// NotInsertedYet | Inserted => { +// cursor.insert(PositionRun::new_upstream(len, len)); +// } +// Upstream => { +// // Just modify the entry in-place. +// let new_entry = PositionRun::new_upstream( +// e.final_len + len, +// e.content_len + len +// ); +// cursor.replace_entry_simple(new_entry); +// } +// } +// } +// +// pub(crate) fn update_from_delete(&mut self, content_pos: usize, mut len: usize) { +// let mut cursor = self.map.mut_cursor_at_content_pos(content_pos, false); +// debug_assert!(len > 0); +// loop { +// let e = cursor.get_raw_entry(); +// let len_here = usize::min(len, e.content_len - cursor.inner.offset); +// debug_assert!(len_here > 0); +// len -= len_here; +// match e.tag { +// NotInsertedYet => panic!(), +// Inserted => { +// cursor.replace_range(PositionRun::new_upstream(len_here, 0)); +// } +// Upstream => { +// let new_entry = PositionRun::new_upstream(e.final_len, e.content_len - len_here); +// cursor.replace_entry_simple(new_entry); +// } +// } +// +// if len == 0 { break; } +// +// assert!(cursor.roll_to_next_entry()); +// } +// } +// +// pub(crate) fn check(&self) { +// self.map.check(); +// } +// +// pub(crate) fn check_void(&self) { +// self.map.check(); +// for item in self.map.raw_iter() { +// assert_eq!(item.tag, MapTag::NotInsertedYet); +// } +// for d in self.double_deletes.iter() { +// assert_eq!(d.1.excess_deletes, 0); +// } +// } +// +// pub(crate) fn check_upstream(&self, list: &ListCRDT) { +// // dbg!(&self.map); +// self.map.check(); +// for item in self.map.raw_iter() { +// assert_eq!(item.tag, MapTag::Upstream); +// } +// +// // dbg!(self.double_deletes.iter_raw().collect::>()); +// // dbg!(list.double_deletes.iter_raw().collect::>()); +// assert!(self.double_deletes.iter_merged().eq(list.double_deletes.iter_merged())); +// } // } - - - -#[cfg(test)] -mod test { - use rle::test_splitable_methods_valid; - use super::*; - use crate::test_helpers::*; - - #[test] - fn positionrun_is_splitablespan() { - test_splitable_methods_valid(PositionRun::new_void(5)); - test_splitable_methods_valid(PositionRun::new_ins(5)); - } - - fn check_doc(list: &ListCRDT) { - // We should be able to go forward from void to upstream. - let mut map = PositionMap::new_void(list); - for patch in list.patch_iter() { - // dbg!(&patch); - map.advance_all_by_range(list, patch); - } - // dbg!(&map); - map.check_upstream(list); - - // And go back from upstream to void, by iterating backwards through all changes. - let mut map = PositionMap::new_upstream(list); - for patch in list.patch_iter_rev() { - map.retreat_all_by_range(list, patch); - } - map.check_void(); - } - - #[test] - fn foo() { - let mut doc = ListCRDT::new(); - doc.get_or_create_agent_id("seph"); - doc.local_insert(0, 0, "aa"); - doc.local_insert(0, 0, "bb"); - // doc.local_delete(0, 2, 3); - // doc.local_insert(0, 0, "hi there"); - // doc.local_delete(0, 2, 3); - - let map = PositionMap::new_at_version(&doc, &[1]); - dbg!(&map); - } - - #[test] - fn fuzz_walk_single_docs() { - let iter = RandomSingleDocIter::new(2, 10).take(1000); - for doc in iter { - check_doc(&doc); - } - } - - #[test] - fn fuzz_walk_multi_docs() { - for i in 0..30 { - let docs = gen_complex_docs(i, 20); - check_doc(&docs[0]); // I could do this every iteration of each_complex, but its slow. - } - } - - #[test] - #[ignore] - fn fuzz_walk_multi_docs_forever() { - for i in 0.. { - if i % 1000 == 0 { println!("{}", i); } - // println!("{}", i); - let docs = gen_complex_docs(i, 20); - check_doc(&docs[0]); // I could do this every iteration of each_complex, but its slow. - } - } -} \ No newline at end of file +// +// +// // #[derive(Debug)] +// // pub(crate) struct OrderToRawInsertMap<'a>(Vec<(&'a RangeTreeLeaf, u32)>); +// // +// // impl<'a> OrderToRawInsertMap<'a> { +// // fn ord_refs(a: &RangeTreeLeaf, b: &RangeTreeLeaf) -> Ordering { +// // let a_ptr = a as *const _; +// // let b_ptr = b as *const _; +// // +// // if a_ptr == b_ptr { Ordering::Equal } +// // else if a_ptr < b_ptr { Ordering::Less } +// // else { Ordering::Greater } +// // } +// // +// // fn new(range_tree: &'a RangeTree) -> (Self, u32) { +// // let mut nodes = Vec::new(); +// // let mut insert_position = 0; +// // +// // for node in range_tree.node_iter() { +// // nodes.push((node, insert_position)); +// // let len_here: u32 = node.as_slice().iter().map(|e| e.order_len()).sum(); +// // insert_position += len_here; +// // } +// // +// // nodes.sort_unstable_by(|a, b| { +// // Self::ord_refs(a.0, b.0) +// // }); +// // +// // // dbg!(nodes.iter().map(|n| n.0 as *const _).collect::>()); +// // +// // (Self(nodes), insert_position) +// // } +// // +// // /// Returns the raw insert position (as if no deletes ever happened) of the requested item. The +// // /// returned range always starts with the requested order and the end is the maximum range. +// // fn order_to_raw(&self, doc: &ListCRDT, ins_order: Order) -> (InsDelTag, Range) { +// // let marker = doc.marker_at(ins_order); +// // +// // let leaf = unsafe { marker.as_ref() }; +// // if cfg!(debug_assertions) { +// // // The requested item must be in the returned leaf. +// // leaf.find(ins_order).unwrap(); +// // } +// // +// // // TODO: Check if this is actually more efficient compared to a linear scan. +// // let idx = self.0.binary_search_by(|elem| { +// // Self::ord_refs(elem.0, leaf) +// // }).unwrap(); +// // +// // let mut start_position = self.0[idx].1; +// // for e in leaf.as_slice() { +// // if let Some(offset) = e.contains(ins_order) { +// // let tag = if e.is_activated() { InsDelTag::Ins } else { InsDelTag::Del }; +// // return (tag, (start_position + offset as u32)..(start_position + e.order_len())); +// // } else { +// // start_position += e.order_len(); +// // } +// // } +// // +// // unreachable!("Marker tree is invalid"); +// // } +// // +// // // /// Same as raw_insert_order, but constrain the return value based on the length +// // // fn raw_insert_order_limited(&self, doc: &ListCRDT, order: Order, max_len: Order) -> Range { +// // // let mut result = self.raw_insert_order(list, order); +// // // result.end = result.end.min(result.start + max_len); +// // // result +// // // } +// // } +// +// +// +// #[cfg(test)] +// mod test { +// use rle::test_splitable_methods_valid; +// use super::*; +// use crate::test_helpers::*; +// +// #[test] +// fn positionrun_is_splitablespan() { +// test_splitable_methods_valid(PositionRun::new_void(5)); +// test_splitable_methods_valid(PositionRun::new_ins(5)); +// } +// +// fn check_doc(list: &ListCRDT) { +// // We should be able to go forward from void to upstream. +// let mut map = PositionMap::new_void(list); +// for patch in list.patch_iter() { +// // dbg!(&patch); +// map.advance_all_by_range(list, patch); +// } +// // dbg!(&map); +// map.check_upstream(list); +// +// // And go back from upstream to void, by iterating backwards through all changes. +// let mut map = PositionMap::new_upstream(list); +// for patch in list.patch_iter_rev() { +// map.retreat_all_by_range(list, patch); +// } +// map.check_void(); +// } +// +// #[test] +// fn foo() { +// let mut doc = ListCRDT::new(); +// doc.get_or_create_agent_id("seph"); +// doc.local_insert(0, 0, "aa"); +// doc.local_insert(0, 0, "bb"); +// // doc.local_delete(0, 2, 3); +// // doc.local_insert(0, 0, "hi there"); +// // doc.local_delete(0, 2, 3); +// +// let map = PositionMap::new_at_version(&doc, &[1]); +// dbg!(&map); +// } +// +// #[test] +// fn fuzz_walk_single_docs() { +// let iter = RandomSingleDocIter::new(2, 10).take(1000); +// for doc in iter { +// check_doc(&doc); +// } +// } +// +// #[test] +// fn fuzz_walk_multi_docs() { +// for i in 0..30 { +// let docs = gen_complex_docs(i, 20); +// check_doc(&docs[0]); // I could do this every iteration of each_complex, but its slow. +// } +// } +// +// #[test] +// #[ignore] +// fn fuzz_walk_multi_docs_forever() { +// for i in 0.. { +// if i % 1000 == 0 { println!("{}", i); } +// // println!("{}", i); +// let docs = gen_complex_docs(i, 20); +// check_doc(&docs[0]); // I could do this every iteration of each_complex, but its slow. +// } +// } +// } \ No newline at end of file diff --git a/crates/diamond-types-crdt/src/ost/content_tree.rs b/crates/diamond-types-crdt/src/ost/content_tree.rs new file mode 100644 index 0000000..13b96e5 --- /dev/null +++ b/crates/diamond-types-crdt/src/ost/content_tree.rs @@ -0,0 +1,2097 @@ +use std::cmp::Ordering; +use std::fmt::Debug; +use std::mem::{replace, take}; +use std::ops::{Index, IndexMut, Range}; +use rle::{HasLength, MergableSpan, MergeableIterator, Searchable, SplitableSpan, SplitableSpanHelpers}; +use crate::dtrange::DTRange; +use crate::list::stats::{cache_hit, cache_miss, marker_a, marker_b, marker_c}; +use crate::ost::{LEAF_CHILDREN, LeafIdx, LenPair, LenUpdate, NODE_CHILDREN, NodeIdx, remove_from_array, remove_from_array_fill, update_by}; + +pub(crate) trait Content: SplitableSpan + MergableSpan + Copy + HasLength { + /// The length of the item. If IS_CUR then this is the "current length". Otherwise, this is the + /// end length of the item. + fn content_len(&self) -> usize; + fn content_len_cur(&self) -> usize { self.content_len() } + // fn content_len_end(&self) -> usize { self.content_len::() } + fn content_len_pair(&self) -> LenPair { + self.content_len() + // LenPair { + // cur: self.content_len_cur(), + // end: self.content_len_end(), + // } + } + + /// The default item must "not exist". + fn exists(&self) -> bool; + fn takes_up_space(&self) -> bool; + // fn current_len(&self) -> usize; + + // split_at_current_len() ? + + // fn underwater() -> Self; + + fn none() -> Self; +} + +trait LeafMap { + fn notify(&mut self, range: DTRange, leaf_idx: LeafIdx); +} + +pub(crate) trait FlushUpdate: Default { + // fn flush_delta_len(&mut self, leaf_idx: LeafIdx, delta: LenUpdate) { + fn flush(&self, tree: &mut ContentTree, leaf_idx: LeafIdx); + + #[inline] + fn flush_and_clear(&mut self, tree: &mut ContentTree, leaf_idx: LeafIdx) { + self.flush(tree, leaf_idx); + *self = Self::default(); + } +} + +impl FlushUpdate for () { + fn flush(&self, _tree: &mut ContentTree, _leaf_idx: LeafIdx) {} +} +impl FlushUpdate for LenUpdate { + fn flush(&self, tree: &mut ContentTree, leaf_idx: LeafIdx) { + tree.flush_delta_len(leaf_idx, *self); + } +} + +#[derive(Debug, Clone)] +pub(crate) struct ContentTree { + leaves: Vec>, + nodes: Vec, + + /// The number of internal nodes between the root and the leaves. This is initialized to 0, + /// indicating we start with no internal nodes and just a single leaf. + height: usize, + + /// The root node. If height == 0, this is a leaf (and has value 0). Otherwise, this is an index + /// into the nodes vec pointing to the node representing the root. + root: usize, + total_len: LenPair, + + // cursor: ContentCursor, + /// There is a cached cursor currently at some content position, with a held delta update. + // cursor: Cell>, + // cursor: Option<(LenPair, MutContentCursor)>, + cursor: Option<(Option, ContentCursor, LenUpdate)>, + + // Linked lists. + // free_leaf_pool_head: LeafIdx, + // free_node_pool_head: NodeIdx, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct ContentCursor { + // The item pointed to by the cursor should still be in the CPU's L1 cache. I could cache some + // properties of the cursor's leaf item here, but I think it wouldn't improve performance - + // since we wouldn't be saving any memory loads anyway. + pub leaf_idx: LeafIdx, + pub elem_idx: usize, + + /// Offset into the item. + pub offset: usize, +} + +// Wouldn't need this impl if LeafIdx defaulted to 0... +impl Default for ContentCursor { + fn default() -> Self { + ContentCursor { + leaf_idx: LeafIdx(0), + elem_idx: 0, + offset: 0, + } + } +} + +pub struct DeltaCursor(pub ContentCursor, pub LenUpdate); + +// /// Same as a cursor, but with a cached delta object. This delta must be flushed whenever the +// /// cursor changes leaf node. +// #[derive(Debug, Clone, Copy)] +// pub(crate) struct MutContentCursor { +// inner: ContentCursor, +// delta: LenUpdate, +// } +// +// impl From for MutContentCursor { +// fn from(inner: ContentCursor) -> Self { +// MutContentCursor { +// inner, +// delta: Default::default(), +// } +// } +// } +// +// impl MutContentCursor { +// pub fn clone_immutable(&self) -> ContentCursor { +// self.inner +// } +// } + +// impl From for ContentCursor { +// fn from(cursor: MutCursor) -> Self { +// ContentCursor { +// leaf_idx: cursor.leaf_idx, +// elem_idx: cursor.elem_idx, +// offset: cursor.offset, +// } +// } +// } + +// const EMPTY_LEAF_DATA: (LV, LeafData) = (usize::MAX, LeafData::InsPtr(NonNull::dangling())); + +const NODE_SPLIT_POINT: usize = NODE_CHILDREN / 2; +// const LEAF_CHILDREN: usize = LEAF_SIZE - 1; +const LEAF_SPLIT_POINT: usize = LEAF_CHILDREN / 2; + +#[derive(Debug, Clone)] +pub struct ContentLeaf { + /// Each child object knows its own bounds. + /// + /// It may turn out to be more efficient to split each field in children into its own sub-array. + children: [V; LEAF_CHILDREN], + + // /// (start of range, data). Start == usize::MAX for empty entries. + // children: [(LV, V); LEAF_CHILDREN], + + // upper_bound: LV, + next_leaf: LeafIdx, + parent: NodeIdx, +} + +#[derive(Debug, Clone)] +pub struct ContentNode { + /// The index is either an index into the internal nodes or leaf nodes depending on the height. + /// + /// Children have an index of usize::MAX if the slot is unused. + child_indexes: [usize; NODE_CHILDREN], + + /// Child entries point to either another node or a leaf. We disambiguate using the height. + /// The named LV is the first LV of the child data. + child_width: [LenPair; NODE_CHILDREN], + parent: NodeIdx, +} + +// fn initial_root_leaf() -> ContentLeaf { +fn initial_root_leaf() -> ContentLeaf { + // The tree is initialized with an "underwater" item covering the range. + // let mut children = [V::default(); LEAF_CHILDREN]; + // children[0] = V::underwater(); + + ContentLeaf { + children: [V::none(); LEAF_CHILDREN], + next_leaf: LeafIdx(usize::MAX), + parent: NodeIdx(usize::MAX), // This node won't exist yet - but thats ok. + } +} + +// /// A node child specifies the width of the recursive children and an index in the data +// /// structure. +// type ContentNodeChild = (LenPair, usize); +// +// const EMPTY_NODE_CHILD: ContentNodeChild = (LenPair { cur: 0, end: 0 }, usize::MAX); + +const EMPTY_LEN_PAIR: LenPair = 0;//LenPair { cur: 0, end: 0 }; + +impl ContentLeaf { + fn is_full(&self) -> bool { + self.children.last().unwrap().exists() + } + + #[inline(always)] + fn has_space(&self, space_wanted: usize) -> bool { + if space_wanted == 0 { return true; } + !self.children[LEAF_CHILDREN - space_wanted].exists() + } + + fn is_last(&self) -> bool { !self.next_leaf.exists() } + + fn next<'a>(&self, leaves: &'a [ContentLeaf]) -> Option<&'a ContentLeaf> { + if self.is_last() { None } + else { Some(&leaves[self.next_leaf.0]) } + } + + fn next_mut<'a>(&self, leaves: &'a mut [ContentLeaf]) -> Option<&'a mut ContentLeaf> { + if self.is_last() { None } + else { Some(&mut leaves[self.next_leaf.0]) } + } + + fn remove_children(&mut self, del_range: Range) { + remove_from_array_fill(&mut self.children, del_range, V::none()); + } +} + +impl ContentNode { + fn is_full(&self) -> bool { + *self.child_indexes.last().unwrap() != usize::MAX + } + + fn remove_children(&mut self, del_range: Range) { + remove_from_array_fill(&mut self.child_indexes, del_range.clone(), usize::MAX); + remove_from_array(&mut self.child_width, del_range.clone()); + } + + /// Returns the (local) index of the named child. Aborts if the child is not in this node. + fn idx_of_child(&self, child: usize) -> usize { + self.child_indexes + .iter() + .position(|i| *i == child) + .unwrap() + } +} + +impl Default for ContentTree { + fn default() -> Self { + Self::new() + } +} + +impl Index for ContentTree { + type Output = ContentLeaf; + + fn index(&self, index: LeafIdx) -> &Self::Output { + &self.leaves[index.0] + } +} +impl IndexMut for ContentTree { + fn index_mut(&mut self, index: LeafIdx) -> &mut Self::Output { + &mut self.leaves[index.0] + } +} +impl Index for ContentTree { + type Output = ContentNode; + + fn index(&self, index: NodeIdx) -> &Self::Output { + &self.nodes[index.0] + } +} +impl IndexMut for ContentTree { + fn index_mut(&mut self, index: NodeIdx) -> &mut Self::Output { + &mut self.nodes[index.0] + } +} + +#[inline] +fn inc_delta_update(delta_len: &mut LenUpdate, e: &V) { + delta_len.cur += e.content_len_cur() as isize; + // delta_len.end += e.content_len_end() as isize; +} +#[inline] +fn dec_delta_update(delta_len: &mut LenUpdate, e: &V) { + delta_len.cur -= e.content_len_cur() as isize; + // delta_len.end -= e.content_len_end() as isize; +} + +impl ContentCursor { + + /// Move a cursor at the end of an item to the next item. + /// + /// Returns false if there is no next item. + pub(crate) fn roll_next_item(&mut self, tree: &ContentTree) -> (bool, Option) { + let leaf = &tree[self.leaf_idx]; + if !leaf.children[self.elem_idx].exists() { + debug_assert!(!leaf.next_leaf.exists()); + return (false, None); + } + + if self.offset < leaf.children[self.elem_idx].len() { return (true, None); } + + self.next_entry(tree) + } + + pub(crate) fn next_entry(&mut self, tree: &ContentTree) -> (bool, Option) { + let leaf = &tree[self.leaf_idx]; + + if !leaf.children[self.elem_idx].exists() { + debug_assert!(!leaf.next_leaf.exists()); + return (false, None); + } + + let next_elem_idx = self.elem_idx + 1; + + if next_elem_idx >= leaf.children.len() || !leaf.children[next_elem_idx].exists() { + // Go to the next node. + let old_leaf = self.leaf_idx; + // let old_delta = take(&mut self.delta); + + if !leaf.next_leaf.exists() { + return (false, None); + } + + // Point to the start of the next leaf. + self.leaf_idx = leaf.next_leaf; + self.offset = 0; + self.elem_idx = 0; + + // flush.flush_and_clear(tree, old_leaf); + // self.flush_delta_len(old_leaf, old_delta); + + (self.leaf_idx.exists(), Some(old_leaf)) + } else { + self.elem_idx = next_elem_idx; + self.offset = 0; + (true, None) + } + } + + + /// Modifies the cursor to point to the next item + pub(crate) fn inc_offset(&mut self, tree: &ContentTree) { + if cfg!(debug_assertions) { + let leaf = &tree[self.leaf_idx]; + let e = &leaf.children[self.elem_idx]; + // assert!(e.takes_up_space::()); + assert!(self.offset < e.len()); + } + + self.offset += 1; + } + + fn flush_delta(&self, tree: &mut ContentTree, delta: LenUpdate) { + tree.flush_delta_len(self.leaf_idx, delta); + } + + pub fn try_get_item<'a, V: Content>(&self, tree: &'a ContentTree) -> Option<(&'a V, usize)> { + let leaf = &tree[self.leaf_idx]; + if self.elem_idx >= leaf.children.len() || !leaf.children[self.elem_idx].exists() { + None + } else { + Some((&leaf.children[self.elem_idx], self.offset)) + } + } + + pub fn get_item<'a, V: Content>(&self, tree: &'a ContentTree) -> (&'a V, usize) { + let leaf = &tree[self.leaf_idx]; + (&leaf.children[self.elem_idx], self.offset) + } + + /// Get the current position of the cursor. This is inefficient, and it should not normally be + /// called. + /// + /// Note that any outstanding delta is not relevant, as the delta position only affects the pos + /// of later items. The cursor itself is (just) early enough to be unaffected. + pub(crate) fn get_pos(&self, tree: &ContentTree) -> LenPair { + let mut result = LenPair::default(); + + let leaf = &tree[self.leaf_idx]; + let e = &leaf.children[self.elem_idx]; + if e.takes_up_space() { result += self.offset; } + // if e.takes_up_space::() { result.end += self.offset; } + + for c in leaf.children[0..self.elem_idx].iter() { + result += c.content_len_pair(); + } + + // Then recurse up. + let mut p = leaf.parent; + let mut last_child = self.leaf_idx.0; + + let (upd, mut c) = if let Some((_, cached_cursor, upd)) = tree.cursor.as_ref() { + let parent = tree[cached_cursor.leaf_idx].parent; + (upd.clone(), Some((cached_cursor.leaf_idx.0, parent))) + } else { + (LenUpdate::default(), None) + }; + + + while !p.is_root() { + let node = &tree[p]; + + for i in 0..node.child_indexes.len() { + if node.child_indexes[i] == last_child { break; } + result += node.child_width[i]; + + if let Some((c_child, c_parent)) = c { + if c_parent == p && c_child == node.child_indexes[i] { + update_by(&mut result, upd.cur); + c = None; + } + } + } + + + if let Some((_c_child, c_parent)) = c.take() { + c = Some((c_parent.0, tree[c_parent].parent)); + } + + last_child = p.0; + p = node.parent; + + } + + result + } + + pub fn cmp(&self, other: &Self, tree: &ContentTree) -> Ordering { + if self.leaf_idx == other.leaf_idx { + self.elem_idx.cmp(&other.elem_idx) + .then(self.offset.cmp(&other.offset)) + } else { + // Recursively walk up the trees to find a common ancestor. Because a b-tree is always + // perfectly balanced, we can walk in lock step until both nodes are the same. + let mut c1 = self.leaf_idx.0; + let mut n1 = tree[self.leaf_idx].parent; + let mut c2 = other.leaf_idx.0; + let mut n2 = tree[other.leaf_idx].parent; + + while n1 != n2 { + // Go up the tree. + c1 = n1.0; + n1 = tree[n1].parent; + c2 = n2.0; + n2 = tree[n2].parent; + + debug_assert!(!n1.is_root()); + debug_assert!(!n2.is_root()); + } + + // Find the relative order of c1 and c2. + debug_assert_eq!(n1, n2); + debug_assert_ne!(c1, c2); + let node = &tree[n1]; + node.idx_of_child(c1).cmp(&node.idx_of_child(c2)) + } + } +} + +impl DeltaCursor { + pub(crate) fn roll_next_item(&mut self, tree: &mut ContentTree) -> bool { + let (has_next, flush_leaf) = self.0.roll_next_item(tree); + if let Some(flush_leaf) = flush_leaf { + tree.flush_delta_and_clear(flush_leaf, &mut self.1); + } + + has_next + } + + pub(crate) fn next_entry(&mut self, tree: &mut ContentTree) -> bool { + let (has_next, flush_leaf) = self.0.next_entry(tree); + if let Some(flush_leaf) = flush_leaf { + tree.flush_delta_and_clear(flush_leaf, &mut self.1); + } + + has_next + } + + pub fn flush(self, tree: &mut ContentTree) { + tree.flush_delta_len(self.0.leaf_idx, self.1); + } + + pub fn flush_delta_and_clear(&mut self, tree: &mut ContentTree) { + tree.flush_delta_and_clear(self.0.leaf_idx, &mut self.1); + } +} + + +impl ContentTree { + pub fn new() -> Self { + debug_assert_eq!(V::none().content_len_pair(), LenPair::default()); + // debug_assert_eq!(V::none().len(), 0); + debug_assert_eq!(V::none().exists(), false); + + Self { + leaves: vec![initial_root_leaf()], + nodes: vec![], + // upper_bound: 0, + height: 0, + root: 0, + cursor: Default::default(), + total_len: Default::default(), + // free_leaf_pool_head: LeafIdx(usize::MAX), + // free_node_pool_head: NodeIdx(usize::MAX), + } + } + + pub fn clear(&mut self) { + self.leaves.clear(); + self.nodes.clear(); + self.height = 0; + self.root = 0; + self.cursor = Default::default(); + self.total_len = Default::default(); + // self.free_leaf_pool_head = LeafIdx(usize::MAX); + // self.free_node_pool_head = NodeIdx(usize::MAX); + + self.leaves.push(initial_root_leaf()); + } + + pub fn set_single_item_notify(&mut self, item: V, notify: F) + where F: FnOnce(V, LeafIdx) + { + debug_assert!(self.is_empty()); + debug_assert!(self.cursor.is_none()); + + self.total_len = item.content_len_pair(); + notify(item, LeafIdx(0)); + self.leaves[0].children[0] = item; + } + + // fn create_new_root_node(&mut self, child_a: usize, child_b: usize, split_point: LenPair) -> NodeIdx { + fn create_new_root_node(&mut self, child_a: usize, child_b: usize, b_size: LenPair) -> NodeIdx { + self.height += 1; + let mut new_root = ContentNode { + child_indexes: [usize::MAX; NODE_CHILDREN], + child_width: [Default::default(); NODE_CHILDREN], + parent: Default::default(), + }; + + new_root.child_indexes[0] = child_a; + new_root.child_indexes[1] = child_b; + new_root.child_width[0] = self.total_len - b_size; + new_root.child_width[1] = b_size; + + let new_idx = self.nodes.len(); + // println!("Setting root to {new_idx}"); + self.root = new_idx; + self.nodes.push(new_root); + NodeIdx(new_idx) + } + + pub fn insert_notify(&mut self, item: V, cursor: &mut DeltaCursor, notify: &mut N) + where N: FnMut(V, LeafIdx) + { + // let mut delta_len = LenUpdate::default(); + self.insert(item, cursor, true, notify); + // self.flush_delta_len(cursor.leaf_idx, delta_len); + + // if cfg!(debug_assertions) { + // self.dbg_check(); + // } + } + + pub fn total_len(&self) -> LenPair { + let mut len = self.total_len; + // TODO: Try rewriting this into branch-free code. + if let Some((_, _, flush)) = self.cursor.as_ref() { + update_by(&mut len, flush.cur); + } + len + } + + /// Mutate in-place up to replace_max items in the next entry pointed at by the cursor. + /// + /// The cursor ends up right after the modified item. + pub(crate) fn mutate_entry(&mut self, dc: &mut DeltaCursor, replace_max: usize, notify: &mut N, map_fn: MapFn) -> (usize, R) + where N: FnMut(V, LeafIdx), MapFn: FnOnce(&mut V) -> R + { + if !dc.roll_next_item(self) { panic!("Cannot mutate at end of data structure") } + let DeltaCursor(cursor, delta) = dc; + // TODO: Make a variant of roll_next_item that doesn't roll delta. + + let leaf = &mut self.leaves[cursor.leaf_idx.0]; + let entry = &mut leaf.children[cursor.elem_idx]; + let entry_len = entry.len(); + if cursor.offset == 0 && entry_len <= replace_max { + // Replace in-place. + dec_delta_update(delta, entry); + let r = map_fn(entry); + inc_delta_update(delta, entry); + // self.flush_delta_len(cursor.leaf_idx, cursor.delta); + cursor.offset = entry_len; + + // We'll also do a brief best-effort attempt at merging this modified item with + // subsequent items in the leaf. + let mut entry = leaf.children[cursor.elem_idx]; + let scan_start = cursor.elem_idx + 1; + let mut elem_idx2 = scan_start; + while elem_idx2 < LEAF_CHILDREN { + let entry2 = &leaf.children[elem_idx2]; + if !entry2.exists() || !entry.can_append(entry2) { break; } + + entry.append(*entry2); + elem_idx2 += 1; + } + if elem_idx2 > scan_start { + leaf.children[cursor.elem_idx] = entry; + remove_from_array_fill(&mut leaf.children, scan_start..elem_idx2, V::none()); + } + + return (entry_len, r); + } + + // Otherwise we've got ourselves a situation. + let (len, r) = if cursor.offset > 0 { + let mut rest = entry.truncate(cursor.offset); + dec_delta_update(delta, &rest); + + let len = rest.len(); + if len <= replace_max { + // Not so bad. Just splice in the replaced item. This will automatically try and + // join the item to nearby items. + let r = map_fn(&mut rest); + self.insert(rest, dc, false, notify); + (len, r) + } else { + // Ugh. We're modifying the middle of this item. We'll use splice_in_internal, which + // does not try and join the updated item - since its more convenient, and we + // probably can't join it to nearby items anyway. + let remainder = rest.truncate(replace_max); + let r = map_fn(&mut rest); + cursor.offset = replace_max; // Cursor ends up after the item. + let (leaf_idx, elem_idx) = self.splice_in_internal( + rest, Some(remainder), + cursor.leaf_idx, cursor.elem_idx + 1, delta, + false, notify + ); + cursor.leaf_idx = leaf_idx; + cursor.elem_idx = elem_idx; + (replace_max, r) + } + } else { + debug_assert!(entry_len > replace_max); + // In this case, we need to cut the existing item down and modify the start of it. + // There's a few ways to do this. The simplest is to just chop out the modified bit and + // re-insert it. + let mut e = entry.truncate_keeping_right(replace_max); + dec_delta_update(delta, &e); + // The cursor offset is already at 0. + let r = map_fn(&mut e); + self.insert(e, dc, false, notify); + // splice_in will try and join the item to the previous item - which is what we want + // here. And the cursor will be moved to right after the item in all cases. + (replace_max, r) + }; + + // self.flush_delta_len(cursor.leaf_idx, delta_len); + (len, r) + } + + pub fn insert(&mut self, item: V, DeltaCursor(cursor, delta): &mut DeltaCursor, notify_here: bool, notify: &mut N) + where N: FnMut(V, LeafIdx) + { + debug_assert!(item.exists()); + let mut leaf_idx = cursor.leaf_idx; + let mut elem_idx = cursor.elem_idx; + let mut offset = cursor.offset; + + let node = &mut self[leaf_idx]; + debug_assert_ne!(offset, usize::MAX); + + if leaf_idx.0 != 0 || offset != 0 { + debug_assert!(node.children[elem_idx].exists()); + // debug_assert!(node.children[elem_idx].content_len() >= offset); + } + + let remainder = if offset == 0 && elem_idx > 0 { + // Roll the cursor back to opportunistically see if we can append. + elem_idx -= 1; + offset = node.children[elem_idx].len(); // blerp could be cleaner. + None + } else if offset == node.children[elem_idx].len() || offset == 0 { + None + } else { + // We could also roll back to the previous leaf node if offset == 0 and + // elem_idx == 0 but when I tried it, it didn't make any difference in practice + // because insert() is always called with stick_end. + + // Remainder is the trimmed off returned value. + // splice the item into the current cursor location. + let entry: &mut V = &mut node.children[elem_idx]; + let remainder = entry.truncate(offset); + dec_delta_update(delta, &remainder); + // We don't need to update cursor since its already where it needs to be. + + Some(remainder) + }; + + if offset != 0 { + // We're at the end of an element. Try and append here. + debug_assert_eq!(offset, node.children[elem_idx].len()); + // Try and append as much as we can after the current entry + let cur_entry: &mut V = &mut node.children[elem_idx]; + if cur_entry.can_append(&item) { + inc_delta_update(delta, &item); + // flush_marker += next.content_len() as isize; + if notify_here { notify(item, leaf_idx); } + cur_entry.append(item); + cursor.elem_idx = elem_idx; + cursor.offset = cur_entry.len(); + + if let Some(remainder) = remainder { + let (leaf_idx_2, elem_idx_2) = self.splice_in_internal(remainder, None, leaf_idx, elem_idx + 1, delta, notify_here, notify); + // If the remainder was inserted into a new item, we might need to update the + // cursor. + if leaf_idx_2 != leaf_idx { + if elem_idx_2 > 0 { + // This is a bit of a hack. Move the cursor to the item before the + // remainder. + cursor.leaf_idx = leaf_idx_2; + cursor.elem_idx = elem_idx_2 - 1; + } else { + // The remainder is on a subsequent element. This is fine, but now delta + // refers to the item the remainder is on, not the cursor element. + // So we need to flush it. + // TODO: Urgh this is gross. Rewrite me! + self.flush_delta_and_clear(leaf_idx_2, delta); + } + } + } + return; + } + + // Insert in the next slot. + + elem_idx += 1; // NOTE: Cursor might point past the end of the node. + // offset = 0; // Offset isn't used anymore anyway. + + // Try and prepend to the start of the next item. + // This optimization improves performance when the user hits backspace. We end up + // merging all the deleted elements together. This adds complexity in exchange for + // making the tree simpler. (For real edit sequences (like the automerge-perf data + // set) this gives about an 8% performance increase on an earlier version of this code) + + if remainder.is_none() + // This is the same as the two lines below. TODO: Check which the compiler prefers. + // && node.children.get(elem_idx).is_some_and(|v| v.exists()) + && elem_idx < node.children.len() + && node.children[elem_idx].exists() + { + // It may be worth being more aggressive here. We're currently not trying this trick + // when the cursor is at the end of the current node. That might be worth trying! + let cur_entry = &mut node.children[elem_idx]; + if item.can_append(cur_entry) { + inc_delta_update(delta, &item); + // Always notify for the item itself. + if notify_here { notify(item, leaf_idx); } + // trailing_offset += item.len(); + cursor.elem_idx = elem_idx; + cursor.offset = item.len(); + cur_entry.prepend(item); + debug_assert!(remainder.is_none()); + return; + } + } + } + + cursor.offset = item.len(); + (leaf_idx, elem_idx) = self.splice_in_internal(item, remainder, leaf_idx, elem_idx, delta, notify_here, notify); + cursor.leaf_idx = leaf_idx; + cursor.elem_idx = elem_idx; + } + + /// Splice in an item, and optionally remainder afterwards. Returns the (leaf_idx, elem_idx) of + /// the inserted item, but NOT the remainder. + fn splice_in_internal(&mut self, item: V, remainder: Option, mut leaf_idx: LeafIdx, mut elem_idx: usize, delta: &mut LenUpdate, notify_here: bool, notify: &mut N) -> (LeafIdx, usize) + where N: FnMut(V, LeafIdx) + { + let space_needed = 1 + remainder.is_some() as usize; + let (new_leaf_idx, new_elem_idx) = self.make_space_in_leaf_for(space_needed, leaf_idx, elem_idx, delta, notify); + // Only call notify if either we're notifying in all cases, or if the item is inserted into + // a different leaf than we were passed. + let moved = new_leaf_idx != leaf_idx; + if notify_here || moved { notify(item, new_leaf_idx); } + + (leaf_idx, elem_idx) = (new_leaf_idx, new_elem_idx); + + let leaf = &mut self.leaves[leaf_idx.0]; + inc_delta_update(delta, &item); + leaf.children[elem_idx] = item; + + if let Some(remainder) = remainder { + if moved { notify(remainder, leaf_idx); } + inc_delta_update(delta, &remainder); + leaf.children[elem_idx + 1] = remainder; + } + + (leaf_idx, elem_idx) + } + + fn flush_delta_len(&mut self, leaf_idx: LeafIdx, delta: LenUpdate) { + if delta.is_empty() { return; } + + let mut idx = self.leaves[leaf_idx.0].parent; + let mut child = leaf_idx.0; + while !idx.is_root() { + let n = &mut self.nodes[idx.0]; + let pos = n.idx_of_child(child); + debug_assert!(pos < n.child_width.len()); + + // n.child_width[pos % n.child_width.len()].update_by(delta); + update_by(&mut n.child_width[pos % n.child_width.len()], delta.cur); + + child = idx.0; + idx = n.parent; + } + + update_by(&mut self.total_len, delta.cur); + } + + #[inline] + fn flush_delta_and_clear(&mut self, leaf_idx: LeafIdx, delta: &mut LenUpdate) { + self.flush_delta_len(leaf_idx, take(delta)); + } + + // #[inline] + // pub fn flush_cursor_delta(&mut self, cursor: MutContentCursor) { + // self.flush_delta_len(cursor.leaf_idx, cursor.delta); + // } + // #[inline] + // fn flush_cursor_delta_and_clear(&mut self, cursor: &mut MutContentCursor) { + // self.flush_delta_len(cursor.inner.leaf_idx, take(&mut cursor.delta)); + // } + + + fn make_space_in_leaf_for(&mut self, space_wanted: usize, mut leaf_idx: LeafIdx, mut elem_idx: usize, delta_len: &mut LenUpdate, notify: &mut F) -> (LeafIdx, usize) + where F: FnMut(V, LeafIdx) + { + assert!(space_wanted == 1 || space_wanted == 2); + + if self.leaves[leaf_idx.0].has_space(space_wanted) { + let leaf = &mut self.leaves[leaf_idx.0]; + + // Could scan to find the actual length of the children, then only memcpy that many. But + // memcpy is cheap. + leaf.children.copy_within(elem_idx..LEAF_CHILDREN - space_wanted, elem_idx + space_wanted); + } else { + self.flush_delta_and_clear(leaf_idx, delta_len); + let new_node = self.split_leaf(leaf_idx, notify); + + if elem_idx >= LEAF_SPLIT_POINT { + // We're inserting into the newly created node. + (leaf_idx, elem_idx) = (new_node, elem_idx - LEAF_SPLIT_POINT); + } + + let leaf = &mut self.leaves[leaf_idx.0]; + leaf.children.copy_within(elem_idx..LEAF_SPLIT_POINT, elem_idx + space_wanted); + } + (leaf_idx, elem_idx) + } + + /// This method always splits a node in the middle. This isn't always optimal, but its simpler. + /// TODO: Try splitting at the "correct" point and see if that makes any difference to + /// performance. + fn split_node(&mut self, old_idx: NodeIdx, children_are_leaves: bool) -> NodeIdx { + // Split a full internal node into 2 nodes. + let new_node_idx = self.nodes.len(); + // println!("split node -> {new_node_idx}"); + let old_node = &mut self.nodes[old_idx.0]; + // The old leaf must be full before we split it. + debug_assert!(old_node.is_full()); + + let split_size: LenPair = old_node.child_width[NODE_SPLIT_POINT..].iter().copied().sum(); + + // eprintln!("split node {:?} -> {:?} + {:?} (leaves: {children_are_leaves})", old_idx, old_idx, new_node_idx); + // eprintln!("split start {:?} / {:?}", &old_node.children[..NODE_SPLIT_POINT], &old_node.children[NODE_SPLIT_POINT..]); + + let mut new_node = ContentNode { + child_indexes: [usize::MAX; NODE_CHILDREN], + child_width: [LenPair::default(); NODE_CHILDREN], + parent: NodeIdx(usize::MAX), // Overwritten below. + }; + + new_node.child_indexes[0..NODE_SPLIT_POINT].copy_from_slice(&old_node.child_indexes[NODE_SPLIT_POINT..]); + new_node.child_width[0..NODE_SPLIT_POINT].copy_from_slice(&old_node.child_width[NODE_SPLIT_POINT..]); + old_node.child_indexes[NODE_SPLIT_POINT..].fill(usize::MAX); + + if children_are_leaves { + for idx in &new_node.child_indexes[..NODE_SPLIT_POINT] { + self.leaves[*idx].parent = NodeIdx(new_node_idx); + } + } else { + for idx in &new_node.child_indexes[..NODE_SPLIT_POINT] { + self.nodes[*idx].parent = NodeIdx(new_node_idx); + } + } + + debug_assert_eq!(new_node_idx, self.nodes.len()); + // let split_point_lv = new_node.children[0].0; + self.nodes.push(new_node); + + // It would be much nicer to do this above earlier - and in earlier versions I did. + // The problem is that both create_new_root_node and insert_into_node can insert new items + // into self.nodes. If that happens, the new node index we're expecting to use is used by + // another node. Hence, we need to call self.nodes.push() before calling any other function + // which modifies the node list. + let old_node = &self.nodes[old_idx.0]; + if old_idx.0 == self.root { + // We'll make a new root. + let parent = self.create_new_root_node(old_idx.0, new_node_idx, split_size); + self.nodes[old_idx.0].parent = parent; + self.nodes[new_node_idx].parent = parent + } else { + let parent = old_node.parent; + self.nodes[new_node_idx].parent = self.split_child_of_node(parent, old_idx.0, new_node_idx, split_size, false); + } + + NodeIdx(new_node_idx) + } + + #[must_use] + fn split_child_of_node(&mut self, mut node_idx: NodeIdx, child_idx: usize, new_child_idx: usize, stolen_len: LenPair, children_are_leaves: bool) -> NodeIdx { + let mut node = &mut self[node_idx]; + + // Where will the child go? I wonder if the compiler can do anything smart with this... + let mut child_pos = node.child_indexes + .iter() + .position(|idx| { *idx == child_idx }) + .unwrap() % node.child_width.len(); + + if node.is_full() { + let new_node = self.split_node(node_idx, children_are_leaves); + + if child_pos >= NODE_SPLIT_POINT { + // Actually we're inserting into the new node. + child_pos -= NODE_SPLIT_POINT; + node_idx = new_node; + } + // Technically this only needs to be reassigned in the if() above, but reassigning it + // in all cases is necessary for the borrowck. + node = &mut self[node_idx]; + } + + node.child_width[child_pos] -= stolen_len; + + let insert_pos = (child_pos + 1) % node.child_width.len(); + + // dbg!(&node); + // println!("insert_into_node n={:?} after_child {after_child} pos {insert_pos}, new_child {:?}", node_idx, new_child); + + + // Could scan to find the actual length of the children, then only memcpy that many. But + // memcpy is cheap. + node.child_indexes.copy_within(insert_pos..NODE_CHILDREN - 1, insert_pos + 1); + node.child_indexes[insert_pos] = new_child_idx; + + node.child_width.copy_within(insert_pos..NODE_CHILDREN - 1, insert_pos + 1); + node.child_width[insert_pos] = stolen_len; + + node_idx + } + + fn split_leaf(&mut self, old_idx: LeafIdx, notify: &mut F) -> LeafIdx + where F: FnMut(V, LeafIdx) + { + // This function splits a full leaf node in the middle, into 2 new nodes. + // The result is two nodes - old_leaf with items 0..N/2 and new_leaf with items N/2..N. + + let old_height = self.height; + // TODO: This doesn't currently use the pool of leaves that we have so carefully prepared. + + let new_leaf_idx = self.leaves.len(); // Weird instruction order for borrowck. + let mut old_leaf = &mut self.leaves[old_idx.0]; + // debug_assert!(old_leaf.is_full()); + debug_assert!(!old_leaf.has_space(2)); + + let mut new_size = LenPair::default(); + for v in &old_leaf.children[LEAF_SPLIT_POINT..] { + // This index isn't actually valid yet, but because we've borrowed self mutably + // here, the borrow checker will make sure that doesn't matter. + if v.exists() { + notify(v.clone(), LeafIdx(new_leaf_idx)); + new_size += v.content_len_pair(); + } else { break; } // TODO: This probably makes the code slower? + } + + let parent = if old_height == 0 { + // Insert this leaf into a new root node. This has to be the first node. + let parent = self.create_new_root_node(old_idx.0, new_leaf_idx, new_size); + old_leaf = &mut self.leaves[old_idx.0]; // borrowck + debug_assert_eq!(parent, NodeIdx(0)); + // let parent = NodeIdx(self.nodes.len()); + old_leaf.parent = NodeIdx(0); + // debug_assert_eq!(old_leaf.parent, NodeIdx(0)); // Ok because its the default. + // old_leaf.parent = NodeIdx(0); // Could just default nodes to have a parent of 0. + + NodeIdx(0) + } else { + let mut parent = old_leaf.parent; + // The parent may change by calling insert_into_node - since the node we're inserting + // into may split off. + + parent = self.split_child_of_node(parent, old_idx.0, new_leaf_idx, new_size, true); + old_leaf = &mut self.leaves[old_idx.0]; // borrowck. + old_leaf.parent = parent; // If the node was split, we may have a new parent. + parent + }; + + // The old leaf must be full before we split it. + // debug_assert!(old_leaf.data.last().unwrap().is_some()); + + let mut new_leaf = ContentLeaf { + children: [V::none(); LEAF_CHILDREN], + next_leaf: old_leaf.next_leaf, + parent, + }; + + // We'll steal the second half of the items in OLD_LEAF. + // Could use ptr::copy_nonoverlapping but this is safe, and they compile to the same code. + new_leaf.children[0..LEAF_SPLIT_POINT].copy_from_slice(&old_leaf.children[LEAF_SPLIT_POINT..]); + + // Needed to mark that these items are gone now. + old_leaf.children[LEAF_SPLIT_POINT..].fill(V::none()); + + // old_leaf.upper_bound = split_lv; + old_leaf.next_leaf = LeafIdx(new_leaf_idx); + + self.leaves.push(new_leaf); + debug_assert_eq!(self.leaves.len() - 1, new_leaf_idx); + + LeafIdx(new_leaf_idx) + } + + /// This function blindly assumes the item is definitely in the recursive children. + /// + /// Returns (child index, len_remaining). + fn find_pos_in_node(node: &ContentNode, mut at_pos: usize) -> (usize, usize) { + for i in 0..NODE_CHILDREN { + let width = node.child_width[i]; + if at_pos <= width { return (node.child_indexes[i], at_pos); } + at_pos -= width; + } + panic!("Position not in node"); + } + + // /// This function blindly assumes the item is definitely in the recursive children. + // /// + // /// Returns (child index, relative position, requested len remaining). + // fn find_pos_in_node_2(node: &ContentNode, at_pos: usize) -> (usize, LenPair) { + // let mut offset = LenPair::default(); + // for i in 0..NODE_CHILDREN { + // let width = node.child_width[i]; + // if at_pos <= offset.get::() + width.get::() { + // return (node.child_indexes[i], offset); + // } + // offset += width; + // } + // panic!("Position not in node"); + // } + + /// This function blindly assumes the item is definitely in the recursive children. + /// + /// Returns (child index, relative end pos of the index, len remaining). + fn find_cur_pos_in_node(node: &ContentNode, mut at_cur_pos: usize) -> (usize, usize) { + // let mut end_pos_offset = 0; + for i in 0..NODE_CHILDREN { + let width = node.child_width[i]; + // if at_cur_pos <= width.cur { + if at_cur_pos < width { + return (node.child_indexes[i], at_cur_pos); + } + at_cur_pos -= width; + // end_pos_offset += width.end; + } + panic!("Position not in node"); + } + + /// Returns (index, offset). + fn find_pos_in_leaf(leaf: &ContentLeaf, mut at_pos: usize) -> (usize, usize) { + for i in 0..LEAF_CHILDREN { + let width = leaf.children[i].content_len(); + if at_pos <= width { return (i, at_pos); } + at_pos -= width; + } + panic!("Position not in leaf"); + } + + /// Returns (index, end_pos, offset). + fn find_cur_pos_in_leaf(leaf: &ContentLeaf, mut at_cur_pos: usize) -> (usize, usize) { + for i in 0..LEAF_CHILDREN { + let width = leaf.children[i].content_len_pair(); + // if at_cur_pos <= width.cur { + if at_cur_pos < width { + // We return the end pos of the offset position, not just the start of this child. + return (i, at_cur_pos); + } + at_cur_pos -= width; + } + panic!("Position not in leaf"); + } + + // /// Returns (index, relative position in leaf, offset in item). + // fn find_pos_in_leaf_2(leaf: &ContentLeaf, at_pos: usize) -> (usize, LenPair, usize) { + // let mut offset = LenPair::default(); + // for i in 0..LEAF_CHILDREN { + // let width = leaf.children[i].content_len_pair(); + // if at_pos <= offset.get::() + width.get::() { + // if width.end { offset.end += + // return (i, offset); + // } + // // if at_pos <= width { return (i, at_pos); } + // // at_pos -= width; + // offset += width; + // } + // panic!("Position not in leaf"); + // } + + // fn check_cursor_at(&self, cursor: ContentCursor, lv: LV, at_end: bool) { + // assert!(cfg!(debug_assertions)); + // let leaf = &self.leaves[cursor.leaf_idx.0]; + // let lower_bound = leaf.bounds[cursor.elem_idx]; + // + // let next = cursor.elem_idx + 1; + // let upper_bound = if next < LEAF_CHILDREN && leaf.bounds[next] != usize::MAX { + // leaf.bounds[next] + // } else { + // self.leaf_upper_bound(leaf) + // }; + // assert!(lv >= lower_bound); + // + // if at_end { + // assert_eq!(lv, upper_bound); + // } else { + // assert!(lv < upper_bound, "Cursor is not within expected bound. Expect {lv} / upper_bound {upper_bound}"); + // } + // } + + // fn cursor_to_next(&self, cursor: &mut ContentCursor) { + // let leaf = &self.leaves[cursor.leaf_idx.0]; + // let next_idx = cursor.elem_idx + 1; + // if next_idx >= LEAF_CHILDREN || leaf.bounds[next_idx] == usize::MAX { + // cursor.elem_idx = 0; + // cursor.leaf_idx = leaf.next_leaf; + // } else { + // cursor.elem_idx += 1; + // } + // } + + // Returns the end length slid past + pub fn slide_cursor_to_next_content(&mut self, cursor: &mut ContentCursor, flush: &mut F) { + let mut leaf = &self.leaves[cursor.leaf_idx.0]; + let e = &leaf.children[cursor.elem_idx]; + // if cursor.offset < e.len() + if !e.exists() || (e.takes_up_space() && cursor.offset < e.len()) { return; } + + cursor.elem_idx += 1; + cursor.offset = 0; + + loop { + // This walks linearly through the nodes. It would be "big-O faster" to walk up and down + // the tree in this case, but I think this will usually be faster in practice. + if cursor.elem_idx >= leaf.children.len() || !leaf.children[cursor.elem_idx].exists() { + // Go to next leaf. + let next_leaf = leaf.next_leaf; + if next_leaf.exists() { + flush.flush_and_clear(self, cursor.leaf_idx); + // self.flush_cursor_delta_and_clear(cursor); + cursor.leaf_idx = next_leaf; + leaf = &self.leaves[cursor.leaf_idx.0]; + cursor.elem_idx = 0; + } else { + // The cursor points past the end of the list. !@#? + panic!("Unreachable?"); + } + } + + let e = &leaf.children[cursor.elem_idx]; + if e.takes_up_space() { + break; + } + + cursor.elem_idx += 1; + } + } + + pub fn cursor_at_start(&mut self) -> ContentCursor { + // I'm never using the cached cursor here because it may have slid to the next content. + if let Some((_, cursor, delta)) = self.cursor.take() { + self.flush_delta_len(cursor.leaf_idx, delta); + // self.flush_cursor_delta(cursor) + } + + // This is always valid because there is always at least 1 leaf item, and its always + // the first item in the tree. + ContentCursor::default().into() + } + + pub fn cursor_at_end(&self) -> ContentCursor { + debug_assert!(self.cursor.is_none()); + // if let Some((_, cursor, delta)) = self.cursor.take() { + // self.flush_delta_len(cursor.leaf_idx, delta); + // } + + let mut idx = self.root; + for _h in 0..self.height { + let node = &self.nodes[idx]; + // Find the last child. + let mut i = 0; + for child_idx in node.child_indexes { + if child_idx == usize::MAX { break; } + else { i = child_idx; } + } + idx = i; + } + + // Make a cursor at the last entry of the leaf. + let leaf = &self.leaves[idx]; + let mut elem_idx = 0; + for i in 0..leaf.children.len() { + if !leaf.children[i].exists() { break; } + elem_idx = i; + } + + ContentCursor { + leaf_idx: LeafIdx(idx), + elem_idx, + offset: if leaf.children[elem_idx].exists() { leaf.children[elem_idx].len() } else { 0 }, + } + } + + pub fn mut_cursor_at_end(&mut self) -> DeltaCursor { + if let Some((_, cursor, delta)) = self.cursor.take() { + self.flush_delta_len(cursor.leaf_idx, delta); + } + + let cursor = self.cursor_at_end(); + DeltaCursor(cursor, LenUpdate::default()) + } + + pub fn cursor_at_start_nothing_emplaced(&self) -> ContentCursor { + debug_assert!(self.cursor.is_none()); + ContentCursor::default().into() + } + + pub fn mut_cursor_at_start(&mut self) -> DeltaCursor { + DeltaCursor(self.cursor_at_start(), Default::default()) + } + + // fn cursor_at_content_pos(&self, content_pos: usize) -> (LenUpdate, ContentCursor) { + + /// Create and return a cursor pointing to (just before) the specified content item. The item + /// must take up space (cur pos size). + /// + /// Returns a tuple containing the end pos and the new cursor. + /// + /// We never "stick end" - ie, the cursor is moved to the start of the next item with actual + /// content. + pub fn mut_cursor_before_cur_pos(&mut self, content_pos: usize) -> DeltaCursor { + if let Some((pos, mut cursor, mut delta)) = self.cursor.take() { + if let Some(mut pos) = pos { + if pos == content_pos { + cache_hit(); + self.slide_cursor_to_next_content(&mut cursor, &mut delta); + return DeltaCursor(cursor, delta); + } + } + cache_miss(); + + // Throw the old cursor away. + self.flush_delta_len(cursor.leaf_idx, delta); + } + + // Make a cursor by descending from the root. + let mut idx = self.root; + let mut content_pos_remaining = content_pos; + + for _h in 0..self.height { + let n = &self.nodes[idx]; + + let (child_idx, cpr) = Self::find_cur_pos_in_node(n, content_pos_remaining); + content_pos_remaining = cpr; + idx = child_idx; + } + + // let (elem_idx, offset) = Self::find_pos_in_leaf::(&self.leaves[idx], pos_remaining); + let (elem_idx, offset) = Self::find_cur_pos_in_leaf(&self.leaves[idx], content_pos_remaining); + // We're guaranteed that the item under elem_idx has size in CUR. Well, unless the tree is empty. + debug_assert!( + (content_pos == 0 && self.is_empty()) + || self.leaves[idx].children[elem_idx].takes_up_space()); + + DeltaCursor(ContentCursor { + leaf_idx: LeafIdx(idx), + elem_idx, + offset, + }, Default::default()) + } + + // fn advance_cursor_by_len(&self, cursor: &mut MutCursor, len: usize) { + // + // } + + pub(crate) fn emplace_cursor(&mut self, pos: LenPair, DeltaCursor(cursor, delta): DeltaCursor) { + assert!(self.cursor.is_none()); + + if cfg!(debug_assertions) { + // let actual_pos = cursor.clone().unwrap().1.get_pos(self); + let actual_pos = cursor.get_pos(self); + assert_eq!(pos, actual_pos); + } + + self.cursor = Some((Some(pos), cursor, delta)); + } + + pub(crate) fn emplace_cursor_unknown(&mut self, DeltaCursor(cursor, delta): DeltaCursor) { + assert!(self.cursor.is_none()); + self.cursor = Some((None, cursor, delta)); + } + + pub(crate) fn cursor_before_item(&self, id: V::Item, leaf_idx: LeafIdx) -> ContentCursor where V: Searchable { + // debug_assert!(self.cursor.is_none()); + + let leaf = &self[leaf_idx]; + + let mut elem_idx = usize::MAX; + let mut offset = usize::MAX; + for (idx, e) in leaf.children.iter().enumerate() { + if let Some(off) = e.get_offset(id) { + elem_idx = idx; + offset = off; + break; + } + } + + assert_ne!(elem_idx, usize::MAX, "Could not find element in leaf"); + + ContentCursor { leaf_idx, elem_idx, offset } + } + + // pub(crate) fn try_find_item(&mut self, id: V::Item) -> Option + // where V: Searchable + // { + // if let Some((_pos, cursor, delta)) = self.cursor.as_ref() { + // let leaf = &self[cursor.leaf_idx]; + // + // for (elem_idx, e) in leaf.children.iter().enumerate() { + // if let Some(offset) = e.get_offset(id) { + // // Yeeeee we found it! + // let leaf_idx = cursor.leaf_idx; + // let delta = *delta; + // self.cursor = None; + // return Some(DeltaCursor( + // ContentCursor { leaf_idx, elem_idx, offset }, + // delta + // )); + // } + // } + // + // // self.flush_delta_len(cursor.leaf_idx, delta); + // } + // None + // } + + pub(crate) fn try_find_item(&mut self, id: V::Item) -> Option + where V: Searchable + { + if let Some((_pos, cursor, delta)) = self.cursor.take() { + let leaf = &self[cursor.leaf_idx]; + + for (elem_idx, e) in leaf.children.iter().enumerate() { + if let Some(offset) = e.get_offset(id) { + // Yeeeee we found it! + cache_hit(); + return Some(DeltaCursor( + ContentCursor { + leaf_idx: cursor.leaf_idx, + elem_idx, + offset, + }, + delta + )); + } + } + + self.flush_delta_len(cursor.leaf_idx, delta); + } + cache_miss(); + None + } + + pub(crate) fn mut_cursor_before_item(&mut self, id: V::Item, leaf_idx: LeafIdx) -> DeltaCursor + where V: Searchable + { + if let Some((mut pos, mut cursor, delta)) = self.cursor.take() { + let (item, cur_offset) = cursor.get_item(self); + if let Some(actual_offset) = item.get_offset(id) { + // The cursor already points to the item. + + // TODO: Rewrite this to use wrapping_add and non-branching code. + if let Some(pos) = pos.as_mut() { + if item.takes_up_space() { + *pos -= cur_offset; + *pos += actual_offset; + } + } + cursor.offset = actual_offset; + + if let Some(pos) = pos { + debug_assert_eq!(cursor.get_pos(self), pos); + } + + cache_hit(); + return DeltaCursor(cursor, delta); + } else if cursor.elem_idx > 0 { + // Try the previous item. + let leaf = &self.leaves[cursor.leaf_idx.0]; + let prev_elem = &leaf.children[cursor.elem_idx - 1]; + if let Some(actual_offset) = prev_elem.get_offset(id) { + // Ok. + if let Some(pos) = pos.as_mut() { + if item.takes_up_space() { + *pos -= cur_offset; + } + if prev_elem.takes_up_space() { + *pos -= prev_elem.content_len() - actual_offset; + } + } + + cursor.elem_idx -= 1; + cursor.offset = actual_offset; + + if let Some(pos) = pos { + debug_assert_eq!(cursor.get_pos(self), pos); + } + + cache_hit(); + return DeltaCursor(cursor, delta); + } + } + + // marker_a(); + + // Throw the old cursor away. + self.flush_delta_len(cursor.leaf_idx, delta); + } else { + // marker_b(); + } + + cache_miss(); + // Otherwise just make a fresh cursor. + DeltaCursor(self.cursor_before_item(id, leaf_idx), LenUpdate::default()) + } + + fn first_leaf(&self) -> LeafIdx { + if cfg!(debug_assertions) { + // dbg!(&self); + let mut idx = self.root; + for _ in 0..self.height { + idx = self.nodes[idx].child_indexes[0]; + } + debug_assert_eq!(idx, 0); + } + LeafIdx(0) + } + + pub fn is_empty(&self) -> bool { + let first_leaf = &self.leaves[self.first_leaf().0]; + !first_leaf.children[0].exists() + } + + // pub fn count_items(&self) -> usize { + // let mut count = 0; + // let mut leaf = &self[self.first_leaf()]; + // loop { + // // SIMD should make this fast. + // count += leaf.bounds.iter().filter(|b| **b != usize::MAX).count(); + // + // // There is always at least one leaf. + // if leaf.is_last() { break; } + // else { + // leaf = &self[leaf.next_leaf]; + // } + // } + // + // count + // } + + /// Iterate over the contents of the index. Note the index tree may contain extra entries + /// for items within the range, with a value of V::default. + pub fn iter(&self) -> ContentTreeIter { + ContentTreeIter { + tree: self, + // If the iterator points to a valid leaf, it should never be empty. This makes the + // iteration logic simpler. + leaf_idx: if self.is_empty() { LeafIdx::default() } else { self.first_leaf() }, + elem_idx: 0, + } + } + + pub fn iter_rle(&self) -> impl Iterator + '_ { + self.iter().merge_spans() + } + + pub fn to_vec(&self) -> Vec { + self.iter().collect::>() + } + + pub fn count_entries(&self) -> usize { + let mut count = 0; + for (_idx, children) in self.iter_leaves() { + for c in children.iter() { + if !c.exists() { break; } + count += 1; + } + } + count + } + + + /// On the walk this returns the size of all children (recursive) and the expected next visited + /// leaf idx. + fn dbg_check_walk_internal(&self, idx: usize, height: usize, mut expect_next_leaf_idx: LeafIdx, expect_parent: NodeIdx) -> (LenPair, LeafIdx, Option) { + if height == self.height { + assert!(idx < self.leaves.len()); + // The item is a leaf node. Check that the previous leaf is correct. + let leaf = &self.leaves[idx]; + assert_eq!(leaf.parent, expect_parent); + assert_eq!(idx, expect_next_leaf_idx.0); + + let leaf_size: LenPair = leaf.children.iter() + .filter(|c| c.exists()) + .map(|c| c.content_len_pair()) + .sum(); + + let mut delta = None; + if let Some((_pos, cursor, c_delta)) = self.cursor.as_ref() { + if cursor.leaf_idx.0 == idx { + delta = Some(*c_delta); + } + } + + // assert_eq!(leaf_size, expect_size); + + (leaf_size, leaf.next_leaf, delta) + } else { + assert!(idx < self.nodes.len()); + let node = &self.nodes[idx]; + assert_eq!(node.parent, expect_parent); + + let mut actual_node_size = LenPair::default(); + let mut delta = None; + + for i in 0..node.child_indexes.len() { + let child_idx = node.child_indexes[i]; + if child_idx == usize::MAX { + assert!(i >= 1); // All nodes have at least 1 child. + // All subsequent child_indexes must be usize::MAX. + assert!(node.child_indexes[i..].iter().all(|i| *i == usize::MAX)); + break; + } + + let (actual_child_size, idx, d) = self.dbg_check_walk_internal(child_idx, height + 1, expect_next_leaf_idx, NodeIdx(idx)); + expect_next_leaf_idx = idx; + + if d.is_some() { + assert!(replace(&mut delta, d).is_none()); + } + + let mut expect_child_size = node.child_width[i]; + update_by(&mut expect_child_size, d.unwrap_or_default().cur); + assert_eq!(actual_child_size, expect_child_size); + + actual_node_size += expect_child_size; + } + // assert_eq!(actual_node_size, expect_size); + + (actual_node_size, expect_next_leaf_idx, delta) + } + } + + fn dbg_check_walk(&self) { + let (actual_len, last_next_ptr, delta) = self.dbg_check_walk_internal(self.root, 0, LeafIdx(0), NodeIdx(usize::MAX)); + // dbg!(actual_len, delta, self.total_len); + let mut total_len = self.total_len; + update_by(&mut total_len, delta.unwrap_or_default().cur); + assert_eq!(actual_len, total_len); + + assert_eq!(last_next_ptr.0, usize::MAX); + } + + + #[allow(unused)] + pub(crate) fn dbg_check(&self) { + // Invariants: + // - Except for the root item, all leaves must have at least 1 data entry. + // - The next pointers iterate through all items in sequence + // - There is at least 1 leaf node + // - The width of all items is correct. + + // This code does 2 traversals of the data structure: + // 1. We walk the leaves by following next_leaf pointers in each leaf node + // 2. We recursively walk the tree + + // Walk the tree structure in the nodes. + self.dbg_check_walk(); + + // Walk the leaves in sequence. + let mut leaves_visited = 0; + let mut leaf_idx = self.first_leaf(); + loop { + let leaf = &self[leaf_idx]; + leaves_visited += 1; + + if leaf_idx == self.first_leaf() { + // First leaf. This can be empty - but only if the whole data structure is empty. + if !leaf.children[0].exists() { + assert!(!leaf.next_leaf.exists()); + assert_eq!(self.total_len, LenPair::default()); + } + } else { + assert!(leaf.children[0].exists(), "Only the first leaf can be empty"); + } + + // The size is checked in dbg_check_walk(). + + if leaf.is_last() { break; } + else { + let next_leaf = &self[leaf.next_leaf]; + // assert!(next_leaf.bounds[0] > prev); + // assert_eq!(leaf.upper_bound, next_leaf.bounds[0]); + } + leaf_idx = leaf.next_leaf; + } + assert_eq!(leaves_visited, self.leaves.len()); + + // let mut leaf_pool_size = 0; + // let mut i = self.free_leaf_pool_head; + // while i.0 != usize::MAX { + // leaf_pool_size += 1; + // i = self.leaves[i.0].next_leaf; + // } + // assert_eq!(leaves_visited + leaf_pool_size, self.leaves.len()); + // + // if self.height == 0 { + // assert!(self.root < self.leaves.len()); + // } else { + // assert!(self.root < self.nodes.len()); + // } + + + // let (lv, cursor) = self.cursor.get(); + // self.check_cursor_at(cursor, lv, false); + } + + pub(crate) fn iter_leaves(&self) -> ContentLeafIter<'_, V> { + ContentLeafIter { + tree: self, + leaf_idx: self.first_leaf(), + } + } +} + +#[derive(Debug)] +pub struct ContentTreeIter<'a, V: Content> { + tree: &'a ContentTree, + leaf_idx: LeafIdx, + // leaf: &'a ContentLeaf, + elem_idx: usize, +} + +impl<'a, V: Content> Iterator for ContentTreeIter<'a, V> { + // type Item = (DTRange, V); + type Item = V; + + fn next(&mut self) -> Option { + // if self.leaf_idx.0 == usize::MAX { + debug_assert!(self.elem_idx < LEAF_CHILDREN); + if self.leaf_idx.0 >= self.tree.leaves.len() || self.elem_idx >= LEAF_CHILDREN { // Avoid a bounds check. + return None; + } + + let leaf = &self.tree[self.leaf_idx]; + + let data = leaf.children[self.elem_idx].clone(); + + self.elem_idx += 1; + if self.elem_idx >= LEAF_CHILDREN || !leaf.children[self.elem_idx].exists() { + self.leaf_idx = leaf.next_leaf; + self.elem_idx = 0; + } + + Some(data) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct ContentLeafIter<'a, V: Content> { + tree: &'a ContentTree, + leaf_idx: LeafIdx, +} + +impl<'a, V: Content> Iterator for ContentLeafIter<'a, V> { + // type Item = (LeafIdx, &'a ContentLeaf); + type Item = (LeafIdx, &'a [V; LEAF_CHILDREN]); + + fn next(&mut self) -> Option { + if !self.leaf_idx.exists() { return None; } + + let cur_leaf = self.leaf_idx; + let leaf = &self.tree[cur_leaf]; + self.leaf_idx = leaf.next_leaf; + + Some((cur_leaf, &leaf.children)) + } +} + + +#[cfg(test)] +mod test { + use std::fmt::Debug; + use std::ops::Range; + use std::pin::Pin; + use rand::rngs::SmallRng; + use rand::{Rng, SeedableRng}; + use content_tree::{ContentLength, ContentTreeRaw, FullMetricsUsize}; + use rle::{HasLength, HasRleKey, MergableSpan, SplitableSpan, SplitableSpanHelpers}; + use super::*; + + /// This is a simple span object for testing. + #[derive(Debug, Clone, Copy, Eq, PartialEq)] + struct TestRange { + id: u32, + len: u32, + inserted: bool, + exists: bool, + } + + impl Default for TestRange { + fn default() -> Self { + Self { + id: u32::MAX, + len: u32::MAX, + inserted: false, + exists: false, + } + } + } + + impl HasLength for TestRange { + fn len(&self) -> usize { self.len as usize } + } + impl SplitableSpanHelpers for TestRange { + fn truncate_h(&mut self, at: usize) -> Self { + assert!(at > 0 && at < self.len as usize); + assert!(self.exists); + let other = Self { + id: self.id + at as u32, + len: self.len - at as u32, + inserted: self.inserted, + exists: self.exists, + }; + self.len = at as u32; + other + } + + fn truncate_keeping_right_h(&mut self, at: usize) -> Self { + let mut other = *self; + *self = other.truncate(at); + other + } + } + impl MergableSpan for TestRange { + fn can_append(&self, other: &Self) -> bool { + assert!(self.exists); + other.id == self.id + self.len && other.inserted == self.inserted + } + + fn append(&mut self, other: Self) { + assert!(self.can_append(&other)); + self.len += other.len; + } + + fn prepend(&mut self, other: Self) { + assert!(other.can_append(self)); + self.len += other.len; + self.id = other.id; + } + } + + impl HasRleKey for TestRange { + fn rle_key(&self) -> usize { + self.id as usize + } + } + + impl Content for TestRange { + fn content_len(&self) -> usize { + if self.inserted && self.exists { self.len as _ } else { 0 } + } + + fn exists(&self) -> bool { + self.exists + } + + fn takes_up_space(&self) -> bool { + if !self.exists { false } + else { self.inserted } + } + + fn none() -> Self { + Self::default() + } + } + + fn null_notify(_v: V, _idx: LeafIdx) {} + fn debug_notify(v: V, idx: LeafIdx) { + println!("Notify {:?} at {:?}", v, idx); + } + fn panic_notify(_v: V, _idx: LeafIdx) { + panic!("Notify erroneously called") + } + + #[test] + fn simple_inserts() { + let mut tree: ContentTree = ContentTree::new(); + tree.dbg_check(); + + // let mut cursor = tree.cursor_at_content_pos::(0); + let mut cursor = tree.mut_cursor_at_start(); + + tree.insert_notify(TestRange { + id: 123, + len: 10, + inserted: false, + exists: true, + }, &mut cursor, &mut debug_notify); + // tree.dbg_check(); // checking here doesn't work because we have an outstanding cursor. + // dbg!(&cursor); + + cursor.0.offset = 2; + tree.insert_notify(TestRange { + id: 321, + len: 20, + inserted: true, + exists: true, + }, &mut cursor, &mut debug_notify); + tree.emplace_cursor(20, cursor); + // tree.emplace_cursor((20, 2 + 20).into(), cursor); + // tree.flush_cursor(cursor); + tree.dbg_check(); + + // dbg!(&cursor); + + // dbg!(&tree); + + // dbg!(tree.iter().collect::>()); + assert!(tree.iter().eq([ + TestRange { id: 123, len: 2, inserted: false, exists: true }, + TestRange { id: 321, len: 20, inserted: true, exists: true }, + TestRange { id: 125, len: 8, inserted: false, exists: true }, + ].into_iter())); + } + + #[test] + fn replace_item() { + let mut tree: ContentTree = ContentTree::new(); + // let mut cursor = tree.cursor_at_start(); + let mut cursor = tree.mut_cursor_at_start(); + + tree.insert_notify(TestRange { + id: 123, + len: 10, + inserted: true, + exists: true, + }, &mut cursor, &mut null_notify); + tree.emplace_cursor(10, cursor); + tree.dbg_check(); + + let mut cursor = tree.mut_cursor_before_cur_pos(2); + // assert_eq!(tree.get_cursor_pos(&cursor), LenPair::new(2, 2)); + // cursor.offset = 2; + let (len, _r) = tree.mutate_entry(&mut cursor, 5, &mut panic_notify, |e| { + assert_eq!(e.id, 125); + assert_eq!(e.len, 5); + e.inserted = false; + }); + assert_eq!(len, 5); + tree.emplace_cursor(2, cursor); + + tree.dbg_check(); + + // dbg!(tree.get_cursor_pos(&cursor)); + // dbg!(tree.iter().collect::>()); + assert!(tree.iter().eq([ + TestRange { id: 123, len: 2, inserted: true, exists: true }, + TestRange { id: 125, len: 5, inserted: false, exists: true }, + TestRange { id: 130, len: 3, inserted: true, exists: true }, + ].into_iter())); + + // Now re-activate part of the middle item. + // let (end_pos, mut cursor) = tree.mut_cursor_at_end_pos(5); + // I can't get a cursor where I want it. This is dirty as anything. + + let mut cursor = tree.mut_cursor_before_cur_pos(1); + cursor.0.elem_idx += 1; cursor.0.offset = 3; // hack hack hack. + let (len, _r) = tree.mutate_entry(&mut cursor, 5, &mut panic_notify, |e| { + // dbg!(&e); + e.inserted = true; + }); + assert!(tree.iter().eq([ + TestRange { id: 123, len: 2, inserted: true, exists: true }, + TestRange { id: 125, len: 3, inserted: false, exists: true }, + TestRange { id: 128, len: 5, inserted: true, exists: true }, + ].into_iter())); + assert_eq!(len, 2); + // dbg!(tree.iter().collect::>()); + + tree.emplace_cursor(4, cursor); + tree.dbg_check(); + } + + impl ContentLength for TestRange { + fn content_len(&self) -> usize { self.content_len_cur() } + + fn content_len_at_offset(&self, offset: usize) -> usize { + if self.inserted { offset } else { 0 } + } + } + + fn random_entry(rng: &mut SmallRng) -> TestRange { + TestRange { + id: rng.gen_range(0..10), + len: rng.gen_range(1..10), + inserted: rng.gen_bool(0.5), + exists: true, + } + } + + fn fuzz(seed: u64, mut verbose: bool) { + verbose = verbose; // suppress mut warning. + let mut rng = SmallRng::seed_from_u64(seed); + let mut tree = ContentTree::::new(); + // let mut check_tree: Pin>, RawPositionMetricsUsize>>> = ContentTreeRaw::new(); + let mut check_tree: Pin>> = ContentTreeRaw::new(); + const START_JUNK: u32 = 1_000_000; + check_tree.replace_range_at_offset(0, TestRange { + id: START_JUNK, + len: START_JUNK, + inserted: false, + exists: false, + }); + + for _i in 0..1000 { + if verbose { println!("i: {}", _i); } + // println!("i: {}", _i); + + // if _i == 31 { + // println!("asdf"); + // // verbose = true; + // } + + if tree.total_len() == 0 || rng.gen_bool(0.6) { + + // tree.dbg_check(); + // Insert something. + let cur_pos = rng.gen_range(0..=tree.total_len()); + let item = random_entry(&mut rng); + + if verbose { println!("inserting {:?} at {}", item, cur_pos); } + + // Insert into check tree + { + // check_tree.check(); + // check_tree.print_ptr_tree(); + let mut cursor = check_tree.mut_cursor_at_content_pos(cur_pos, true); + cursor.insert(item); + assert_eq!(cursor.count_content_pos(), cur_pos + item.content_len_cur()); + } + + // Insert into our tree. + { + // if verbose { dbg!(&tree); } + + // This code mirrors the equivalent code in merge.rs + let mut cursor = if cur_pos == 0 { + tree.mut_cursor_at_start() + } else { + // // Equivalent of getting a cursor with stick_end: true. + // let (end_pos, mut cursor) = tree.mut_cursor_before_cur_pos(cur_pos - 1); + // tree.emplace_cursor((cur_pos - 1, end_pos).into(), cursor); + // + // let (end_pos, mut cursor) = tree.mut_cursor_before_cur_pos(cur_pos - 1); + // tree.cursor_inc_offset(&mut cursor); + // tree.emplace_cursor((cur_pos, end_pos + 1).into(), cursor); + + + let mut cursor = tree.mut_cursor_before_cur_pos(cur_pos - 1); + cursor.0.inc_offset(&tree); + cursor + }; + // let mut cursor = tree.cursor_at_content_pos::(pos); + // dbg!(&cursor); + let pre_pos = cur_pos; + tree.insert_notify(item, &mut cursor, &mut null_notify); + // dbg!(&cursor); + + // if verbose { dbg!(&tree); } + // tree.dbg_check(); + + // This will check that the position makes sense. + tree.emplace_cursor(pre_pos + item.content_len_pair(), cursor); + + // let post_pos = tree.get_cursor_pos(&cursor); + // // dbg!(pre_pos, item.content_len_pair(), post_pos); + // assert_eq!(pre_pos + item.content_len_pair(), post_pos); + } + } else { + + let gen_range = |rng: &mut SmallRng, range: Range| { + if range.is_empty() { range.start } + else { rng.gen_range(range) } + }; + + // Modify something. + // + // Note this has a subtle sort-of flaw: The first item we touch will always be + // active. But we might make some later items active again in the range. + let modify_len = gen_range(&mut rng, 1..20.min(tree.total_len())); + // let modify_len = 1; + debug_assert!(modify_len <= tree.total_len()); + let pos = gen_range(&mut rng, 0..tree.total_len() - modify_len); + let new_is_active = rng.gen_bool(0.5); + + // The chunking of the two tree implementations might differ, so we'll run modify + // in a loop. + { + let mut len_remaining = modify_len; + let mut cursor = check_tree.mut_cursor_at_content_pos(pos, false); + while len_remaining > 0 { + let (changed, _) = cursor.mutate_single_entry_notify(len_remaining, content_tree::null_notify, |e| { + e.inserted = new_is_active; + }); + cursor.roll_to_next_entry(); + len_remaining -= changed; + } + } + + { + let mut len_remaining = modify_len; + // let mut cursor = tree.cursor_at_content_pos::(pos); + let mut cursor = tree.mut_cursor_before_cur_pos(pos); + let mut cursor_pos = pos; + + while len_remaining > 0 { + // let pre_pos = tree.get_cursor_pos(&cursor); + let (changed, len_here) = tree.mutate_entry(&mut cursor, len_remaining, &mut null_notify, |e| { + e.inserted = new_is_active; + e.content_len_pair() + }); + cursor_pos += len_here; + // let post_pos = tree.get_cursor_pos(&cursor); + // assert_eq!(pre_pos.end + changed, post_pos.end); + len_remaining -= changed; + } + + tree.emplace_cursor(cursor_pos, cursor); + } + } + + // Check that both trees have identical content. + tree.dbg_check(); + assert!(check_tree.iter().filter(|e| e.id < START_JUNK) + .eq(tree.iter_rle())); + } + } + + #[test] + fn content_tree_fuzz_once() { + // fuzz(3322, true); + // for seed in 8646911284551352000..8646911284551353000 { + // + // fuzz(seed, true); + // } + fuzz(0, true); + } + + // #[test] + // #[ignore] + // fn content_tree_fuzz_forever() { + // fuzz_multithreaded(u64::MAX, |seed| { + // if seed % 100 == 0 { + // println!("Iteration {}", seed); + // } + // fuzz(seed, false); + // }) + // } +} diff --git a/crates/diamond-types-crdt/src/ost/ct_old.rs b/crates/diamond-types-crdt/src/ost/ct_old.rs new file mode 100644 index 0000000..88614c4 --- /dev/null +++ b/crates/diamond-types-crdt/src/ost/ct_old.rs @@ -0,0 +1,419 @@ + +use std::ops::{Index, IndexMut}; +use content_tree::ContentLength; +use rle::HasLength; +use crate::listmerge::yjsspan::CRDTSpan; +use crate::ost::{LEAF_CHILDREN, LeafIdx, LenPair, NODE_CHILDREN, NodeIdx}; + +// const LEAF_CHILDREN: usize = LEAF_SIZE - 1; + +#[derive(Debug)] +pub(super) struct ContentTree { + // The order of these vectors is arbitrary. I'm using Vec essentially as a simple memory pool. + leaves: Vec, + nodes: Vec, + /// This counts the number of levels of internal nodes. + height: usize, + root: usize, + // cursor: Option, + cursor: Option, + + total_len: LenPair, +} + +#[derive(Debug, Clone)] +struct ContentCursor { + leaf: LeafIdx, + + /// The global starting position of the leaf node we're pointing to. This is used when the + /// cursor is cached. + /// + /// This is a "current pos". + leaf_global_start: usize, + // leaf_start: LenPair, + + // The cursor points to a specific location within the specified leaf node. + elem_idx: usize, + offset: usize, + + /// This is the distance from the start of the tree to the current element / offset position. + /// This is a "current pos". + offset_global_pos: usize, +} + +#[derive(Debug, Clone)] +struct CachedContentCursor { + c: ContentCursor, + leaf_current_end: usize, +} + +#[derive(Debug, Clone, Default)] +pub(super) struct ContentLeaf { + /// Data entries are filled from the left. All unused entries have an empty ID. + /// + /// TODO: Try replacing this with a gap buffer. + data: [CRDTSpan; LEAF_CHILDREN], + + // Redundant information. Could look at the parent. But this is useful for cursor + // calculations. + // size: LenPair, + cur_size: usize, + + // TODO: Consider adding prev_leaf as well. + + /// usize::MAX for the last leaf node. + next_leaf: LeafIdx, + + parent: NodeIdx, +} + +// #[derive(Debug, Clone, Copy, Eq, PartialEq)] +// struct ItemSize { +// current: usize, +// end: usize, +// } + +#[derive(Debug, Clone, Default)] +pub(super) struct ContentNode { + // SoA or AoS? + + child_indexes: [usize; NODE_CHILDREN], + + /// The size (width) of each child item at the current point in time. + cur_size: [usize; NODE_CHILDREN], + + /// The size (width) of each child item after all items have been merged. + end_size: [usize; NODE_CHILDREN], +} + +impl ContentLeaf { + /// The number of children of this node "in use". Might be worth caching? Hard to tell. + fn num_children(&self) -> usize { + // TODO: SIMD accelerate me. + for (i, e) in self.data.iter().enumerate() { + if e.is_empty() { return i; } + } + return self.data.len() + } +} + +impl ContentNode { + fn iter(&self) -> impl Iterator + '_ { + // TODO: Would this generate better code with .copied() ? + self.child_indexes.iter() + .zip(self.cur_size.iter()) + .zip(self.end_size.iter()) + .take_while(|((idx, _), _)| **idx != usize::MAX) + .map(|((&idx, &cur), &end)| (idx, LenPair { cur, end })) + } + + fn len_of_child(&self, i: usize) -> LenPair { + LenPair { cur: self.cur_size[i], end: self.end_size[i] } + } + + // /// The number of children of this node "in use". Might be worth caching? Hard to tell. + // fn num_children(&self) -> usize { + // // TODO: SIMD accelerate me. + // for (i, idx) in self.child_indexes.iter().enumerate() { + // if *idx == usize::MAX { return i; } + // } + // return self.child_indexes.len() + // } +} + +impl Index for ContentTree { + type Output = ContentLeaf; + + fn index(&self, index: LeafIdx) -> &Self::Output { + &self.leaves[index.0] + } +} +impl IndexMut for ContentTree { + fn index_mut(&mut self, index: LeafIdx) -> &mut Self::Output { + &mut self.leaves[index.0] + } +} +impl Index for ContentTree { + type Output = ContentNode; + + fn index(&self, index: NodeIdx) -> &Self::Output { + &self.nodes[index.0] + } +} +impl IndexMut for ContentTree { + fn index_mut(&mut self, index: NodeIdx) -> &mut Self::Output { + &mut self.nodes[index.0] + } +} + +impl Default for ContentTree { + fn default() -> Self { + Self::new() + } +} + +impl ContentTree { + pub(super) fn new() -> Self { + Self { + leaves: vec![ContentLeaf::default()], + nodes: vec![], + height: 0, + root: 0, + cursor: None, + total_len: Default::default(), + } + } + + pub(super) fn clear(&mut self) { + self.leaves.clear(); + self.nodes.clear(); + self.height = 0; + self.root = 0; + self.cursor = None; + self.leaves.push(ContentLeaf::default()); + } + + fn cursor_within_leaf(&self, req_pos: usize, leaf_idx: LeafIdx, leaf_global_start: usize, stick_end: bool) -> ContentCursor { + let mut p = leaf_global_start; + let leaf = &self[leaf_idx]; + + for (i, e) in leaf.data.iter().enumerate() { + let c_len = e.content_len(); + let next_pos = p + c_len; + if next_pos > req_pos || stick_end && next_pos == req_pos { + return ContentCursor { + leaf: leaf_idx, + leaf_global_start, + elem_idx: i, + offset: req_pos - p, + offset_global_pos: req_pos, + } + } + p = next_pos; + } + unreachable!("Cursor metadata is invalid"); + } + + fn try_cursor_at_current_cached(&self, req_cur_pos: usize, stick_end: bool) -> Option { + // First check if we can use the cached cursor. + if let Some(c) = self.cursor.as_ref() { + // if req_cur_pos == c.c.leaf_global_start + c.c.offset_global_pos { + if req_cur_pos == c.c.offset_global_pos { + // The cursor is exactly where we expect. Take it! + // TODO: Try this with &mut self and cursor.take(). + return Some(c.c.clone()); + } + + if req_cur_pos >= c.c.leaf_global_start { + if req_cur_pos < c.leaf_current_end { + return Some(self.cursor_within_leaf(req_cur_pos, c.c.leaf, c.c.leaf_global_start, stick_end)); + } else if req_cur_pos == c.leaf_current_end { + let leaf = &self[c.c.leaf]; + // The cursor points to the end of the last item in this node. + if stick_end { + // Make a cursor here. + let last_idx = leaf.num_children() - 1; + + return Some(ContentCursor { + leaf: c.c.leaf, + leaf_global_start: c.c.leaf_global_start, + elem_idx: last_idx, + offset: leaf.data[last_idx].len(), + offset_global_pos: c.leaf_current_end, + }) + } else { + // Make a cursor at the start of the subsequent node. + return Some(ContentCursor { + leaf: leaf.next_leaf, + leaf_global_start: c.leaf_current_end, + elem_idx: 0, + offset: 0, + offset_global_pos: c.leaf_current_end, + }) + } + } + } + } + + None + } + + fn cursor_at_current(&self, req_pos: usize, stick_end: bool) -> ContentCursor { + if let Some(c) = self.try_cursor_at_current_cached(req_pos, stick_end) { + return c; + } + + // Scan the tree. + let mut idx = self.root; + let mut req_pos_remaining = req_pos; + + 'outer: for _h in 0..self.height { + // Scan down this internal node. + let n = &self.nodes[idx]; + + // Scan across. + // TODO: SIMD somehow? + for i in 0..n.child_indexes.len() { + // If we run out of childen. + debug_assert_ne!(n.child_indexes[i], usize::MAX); + + if n.cur_size[i] > req_pos_remaining { + // Go down. + idx = n.child_indexes[i]; + continue 'outer; + } else { + req_pos_remaining -= n.cur_size[i]; + } + + } + unreachable!("Could not find child element. Tree is corrupt."); + } + + // preload the leaf? + + // Scan the leaf. + return self.cursor_within_leaf(req_pos, LeafIdx(idx), req_pos - req_pos_remaining, stick_end); + } + + fn cache_cursor(&mut self, c: ContentCursor) { + let n = &self[c.leaf]; + self.cursor = Some(CachedContentCursor { + leaf_current_end: c.leaf_global_start + n.cur_size, + c, + }); + } + + // fn insert_at_cursor(&mut self, e: CRDTSpan, c: &ContentCursor, notify: &mut N) + // where N: FnMut(&CRDTSpan, LeafIdx) + // { + // let leaf = &mut self[c.leaf]; + // let width = e.len_pair(); + // + // let mut idx = c.elem_idx; + // let mut slot = &mut leaf.data[idx]; + // let mut offset = c.offset; + // + // // cur, end. + // let mut size_update = LenUpdate::default(); + // + // let remainder = if offset == 0 && idx > 0 { + // // We'll roll the cursor back to opportunistically see if we can append. + // idx -= 1; + // slot = &mut leaf.data[idx]; + // offset = slot.len(); + // None + // } else if offset < slot.len() { + // // Splice off the end of the current item. + // let remainder = slot.truncate(offset); + // size_update.dec_by(&remainder); + // Some(remainder) + // } else { None }; + // + // if offset != 0 { + // // Try and append the inserted item here. + // if slot.can_append(&e) { + // size_update.inc_by(&e); + // notify(&e, c.leaf); + // slot.append(e); + // } else { + // offset = 0; + // // Go to the next slot. + // if idx + 1 < leaf.data.len() { + // idx += 1; + // slot = &mut leaf.data[idx]; + // } + // } + // } + // + // // if let remainder = + // + // leaf.cur_size += width.cur; + // + // + // // let mut slot = &mut leaf.data[c.elem_idx]; + // if leaf.data[c.elem_idx].is_empty() { + // // The cursor points to the end of the node. + // debug_assert!(c.elem_idx == 0 || !leaf.data[c.elem_idx - 1].is_empty(), "Invalid cursor"); + // + // leaf.data[c.elem_idx] = e; + // } + // } + + /// Check is implemented recursively. Because why not. + fn dbg_check_walk(&self, idx: usize, height: usize, expect_len: LenPair, global_cpos: usize) { + if height != 0 { + assert!(idx < self.nodes.len()); + let node = &self.nodes[idx]; + + let mut actual_child_len = LenPair::default(); + + // Count the size and recurse. + let mut finished = false; + for (i, &child_idx) in node.child_indexes.iter().enumerate() { + if child_idx == usize::MAX { + finished = true; + } else { + assert_eq!(finished, false); + let child_len = node.len_of_child(i); + self.dbg_check_walk(child_idx, height - 1, child_len, global_cpos + actual_child_len.cur); + actual_child_len += child_len; + } + } + + assert_eq!(actual_child_len, expect_len); + } else { + // Look at the leaf at idx. + assert!(idx < self.leaves.len()); + let leaf = &self.leaves[idx]; + + // Count the size. + let mut actual_len = LenPair::default(); + let mut finished = false; + for d in &leaf.data { + // Any empty elements should be at the end. + if d.is_empty() { + finished = true; + } else { + assert_eq!(finished, false); + actual_len += d.len_pair(); + } + } + + assert_eq!(actual_len, expect_len); + assert_eq!(actual_len.cur, leaf.cur_size); + + // Check the cached cursor. + if let Some(c) = self.cursor.as_ref() { + if c.c.leaf.0 == idx { + // Check the cursor makes sense in this leaf. + assert_eq!(c.c.leaf_global_start, global_cpos); + assert_eq!(c.leaf_current_end, global_cpos + leaf.cur_size); + + // Check the offset position. + let mut offset_pos = global_cpos; + for e in &leaf.data[0..c.c.elem_idx] { + offset_pos += e.content_len(); + } + assert_eq!(c.c.offset_global_pos, offset_pos + c.c.offset); + } + } + } + } + + #[allow(unused)] + pub(super) fn dbg_check(&self) { + // Invariants: + // - All content sizes match + // - The root item contains all other items + // - Next pointers make sense in the leaves + if self.height == 0 { + assert!(self.root < self.leaves.len()); + } else { + assert!(self.root < self.nodes.len()); + } + + // Walk the content tree. + self.dbg_check_walk(self.root, self.height, self.total_len, 0); + } +} + diff --git a/crates/diamond-types-crdt/src/ost/index_tree.rs b/crates/diamond-types-crdt/src/ost/index_tree.rs new file mode 100644 index 0000000..e5036f7 --- /dev/null +++ b/crates/diamond-types-crdt/src/ost/index_tree.rs @@ -0,0 +1,1902 @@ +//! This file contains an implementation of [`IndexTree`] - which is a run-length encoded, in-memory +//! BTree mapping from integers to some value type. +//! +//! The merging algorithm uses this type to find the item which stores a specific local version +//! value. + +use std::cell::Cell; +use std::cmp::Ordering; +use std::fmt::Debug; +use std::mem; +use std::ops::{Index, IndexMut, Range}; + +use rle::{HasLength, MergableSpan, RleDRun, SplitableSpan, SplitableSpanHelpers}; + +use crate::dtrange::DTRange; +use crate::list::LV; +use crate::ost::{LEAF_CHILDREN, LeafIdx, NODE_CHILDREN, NodeIdx, remove_from_array, remove_from_array_fill}; + +#[derive(Debug, Clone)] +pub(crate) struct IndexTree { + leaves: Vec>, + nodes: Vec, + // upper_bound: LV, + height: usize, + root: usize, + // cursor: IndexCursor, + cursor: Cell<(LV, IndexCursor)>, + + // Linked lists. + free_leaf_pool_head: LeafIdx, + free_node_pool_head: NodeIdx, +} + +#[derive(Debug, Clone, Copy)] +struct IndexCursor { + // The item pointed to by the cursor should still be in the CPU's L1 cache. I could cache some + // properties of the cursor's leaf item here, but I think it wouldn't improve performance - + // since we wouldn't be saving any memory loads anyway. + leaf_idx: LeafIdx, + elem_idx: usize, +} + +// Wouldn't need this impl if LeafIdx defaulted to 0... +impl Default for IndexCursor { + fn default() -> Self { + IndexCursor { + leaf_idx: LeafIdx(0), + elem_idx: 0, + } + } +} + +const NODE_SPLIT_POINT: usize = NODE_CHILDREN / 2; +// const LEAF_CHILDREN: usize = LEAF_SIZE - 1; +const LEAF_SPLIT_POINT: usize = LEAF_CHILDREN / 2; + +#[derive(Debug, Clone)] +pub struct IndexLeaf { + /// The bounds is usize::MAX for unused items. The last item has an upper bound equal to the + /// start bound of the first item in the next leaf. This is also cached in upper_bound. + bounds: [LV; LEAF_CHILDREN], + children: [V; LEAF_CHILDREN], + // /// (start of range, data). Start == usize::MAX for empty entries. + // children: [(LV, V); LEAF_CHILDREN], + + // upper_bound: LV, + next_leaf: LeafIdx, + parent: NodeIdx, +} + +#[derive(Debug, Clone)] +pub struct IndexNode { + /// Child entries point to either another node or a leaf. We disambiguate using the height. + /// The named LV is the first LV of the child data. + /// + /// Children are (usize::MAX, usize::MAX) if they are unset. + children: [NodeChild; NODE_CHILDREN], + parent: NodeIdx, +} + +fn initial_root_leaf() -> IndexLeaf { + // The tree is initialized with V::Default covering the entire range. This means we don't need + // to have any special handling for the size of the tree. Set operations "carve out" their + // specified value. + let mut bounds = [usize::MAX; LEAF_CHILDREN]; + bounds[0] = 0; + + IndexLeaf { + bounds, + children: [V::default(); LEAF_CHILDREN], + // upper_bound: usize::MAX, // The bounds of the last item is (functionally) infinity. + next_leaf: LeafIdx(usize::MAX), + parent: NodeIdx(usize::MAX), // This node won't exist yet - but thats ok. + } +} + +/// A node child specifies the LV of the (recursive) first element and an index in the data +/// structure. The index is either an index into the internal nodes or leaf nodes depending on the +/// height. +type NodeChild = (LV, usize); + +const EMPTY_NODE_CHILD: NodeChild = (usize::MAX, usize::MAX); + +impl IndexLeaf { + fn is_full(&self) -> bool { + *self.bounds.last().unwrap() != usize::MAX + } + + #[inline(always)] + fn has_space(&self, space_wanted: usize) -> bool { + if space_wanted == 0 { return true; } + self.bounds[LEAF_CHILDREN - space_wanted] == usize::MAX + } + + // fn bound_for_idx(&self, idx: usize) -> usize { + // let next_idx = idx + 1; + // if next_idx >= LEAF_CHILDREN { + // self.upper_bound + // } else { + // let bound = self.bounds[next_idx]; + // // If bound == usize::MAX, this item isn't used. Default to bound. + // if bound == usize::MAX { self.upper_bound } else { bound } + // } + // } + + fn is_last(&self) -> bool { !self.next_leaf.exists() } + + fn next<'a>(&self, leaves: &'a [IndexLeaf]) -> Option<&'a IndexLeaf> { + if self.is_last() { None } + else { Some(&leaves[self.next_leaf.0]) } + } + + fn next_mut<'a>(&self, leaves: &'a mut [IndexLeaf]) -> Option<&'a mut IndexLeaf> { + if self.is_last() { None } + else { Some(&mut leaves[self.next_leaf.0]) } + } + + fn remove_children(&mut self, del_range: Range) { + remove_from_array_fill(&mut self.bounds, del_range.clone(), usize::MAX); + remove_from_array(&mut self.children, del_range.clone()); + + // self.children.copy_within(del_range.end..LEAF_CHILDREN, del_range.start); + } +} + +impl IndexNode { + fn is_full(&self) -> bool { + self.children.last().unwrap().1 != usize::MAX + } + + fn remove_children(&mut self, del_range: Range) { + remove_from_array_fill(&mut self.children, del_range.clone(), EMPTY_NODE_CHILD); + } +} + +impl Default for IndexTree { + fn default() -> Self { + Self::new() + } +} + +impl Index for IndexTree { + type Output = IndexLeaf; + + fn index(&self, index: LeafIdx) -> &Self::Output { + &self.leaves[index.0] + } +} +impl IndexMut for IndexTree { + fn index_mut(&mut self, index: LeafIdx) -> &mut Self::Output { + &mut self.leaves[index.0] + } +} +impl Index for IndexTree { + type Output = IndexNode; + + fn index(&self, index: NodeIdx) -> &Self::Output { + &self.nodes[index.0] + } +} +impl IndexMut for IndexTree { + fn index_mut(&mut self, index: NodeIdx) -> &mut Self::Output { + &mut self.nodes[index.0] + } +} + + +/// I'm not sure if this is a good idea. The index stores its base positions separate from the +/// content. +/// +/// Essentially index content must splitable & mergable be such that .truncate() / .append() are +/// no-ops. .can_append will also need the base & offset. +// pub trait IndexContent: Debug + Copy + Eq { +pub trait IndexContent: Debug + Copy { + /// Try to append other to self. If possible, self is modified (if necessary) and true is + /// returned. + fn try_append(&mut self, offset: usize, other: &Self, other_len: usize) -> bool; + // fn try_append(&mut self, offset: usize, other: &Self, other_len: usize) -> bool { + // debug_assert!(offset > 0); + // debug_assert!(other_len > 0); + // &self.at_offset(offset) == other + // } + + fn at_offset(&self, offset: usize) -> Self; + + fn eq(&self, other: &Self, upto_len: usize) -> bool; +} + +fn split_rle(val: RleDRun, offset: usize) -> (RleDRun, RleDRun) { + debug_assert!(offset > 0); + debug_assert!(offset < (val.end - val.start)); + + (RleDRun { + start: val.start, + end: val.start + offset, + val: val.val, + }, RleDRun { + start: val.start + offset, + end: val.end, + val: val.val.at_offset(offset), + }) +} + +impl IndexTree { + pub fn new() -> Self { + Self { + leaves: vec![initial_root_leaf()], + nodes: vec![], + // upper_bound: 0, + height: 0, + root: 0, + cursor: Default::default(), + free_leaf_pool_head: LeafIdx(usize::MAX), + free_node_pool_head: NodeIdx(usize::MAX), + } + } + + pub fn clear(&mut self) { + self.leaves.clear(); + self.nodes.clear(); + self.height = 0; + self.root = 0; + self.cursor = Default::default(); + self.free_leaf_pool_head = LeafIdx(usize::MAX); + self.free_node_pool_head = NodeIdx(usize::MAX); + + self.leaves.push(initial_root_leaf()); + } + + fn create_new_root_node(&mut self, lower_bound: usize, child_a: usize, split_point: LV, child_b: usize) -> NodeIdx { + self.height += 1; + let mut new_root = IndexNode { + children: [EMPTY_NODE_CHILD; NODE_CHILDREN], + parent: Default::default(), + }; + new_root.children[0] = (lower_bound, child_a); + new_root.children[1] = (split_point, child_b); + + let new_idx = self.nodes.len(); + // println!("Setting root to {new_idx}"); + self.root = new_idx; + self.nodes.push(new_root); + NodeIdx(new_idx) + } + + /// This method always splits a node in the middle. This isn't always optimal, but its simpler. + fn split_node(&mut self, old_idx: NodeIdx, children_are_leaves: bool) -> NodeIdx { + // Split a full internal node into 2 nodes. + let new_node_idx = self.nodes.len(); + // println!("split node -> {new_node_idx}"); + let old_node = &mut self.nodes[old_idx.0]; + let split_lv = old_node.children[NODE_SPLIT_POINT].0; + + // The old leaf must be full before we split it. + debug_assert!(old_node.is_full()); + + // eprintln!("split node {:?} -> {:?} + {:?} (leaves: {children_are_leaves})", old_idx, old_idx, new_node_idx); + // eprintln!("split start {:?} / {:?}", &old_node.children[..NODE_SPLIT_POINT], &old_node.children[NODE_SPLIT_POINT..]); + + let mut new_node = IndexNode { + children: [EMPTY_NODE_CHILD; NODE_CHILDREN], + parent: NodeIdx(usize::MAX), // Overwritten below. + }; + + new_node.children[0..NODE_SPLIT_POINT].copy_from_slice(&old_node.children[NODE_SPLIT_POINT..]); + old_node.children[NODE_SPLIT_POINT..].fill(EMPTY_NODE_CHILD); + + if children_are_leaves { + for (_, idx) in &new_node.children[..NODE_SPLIT_POINT] { + self.leaves[*idx].parent = NodeIdx(new_node_idx); + } + } else { + for (_, idx) in &new_node.children[..NODE_SPLIT_POINT] { + self.nodes[*idx].parent = NodeIdx(new_node_idx); + } + } + + debug_assert_eq!(new_node_idx, self.nodes.len()); + // let split_point_lv = new_node.children[0].0; + self.nodes.push(new_node); + + // It would be much nicer to do this above earlier - and in earlier versions I did. + // The problem is that both create_new_root_node and insert_into_node can insert new items + // into self.nodes. If that happens, the new node index we're expecting to use is used by + // another node. Hence, we need to call self.nodes.push() before calling any other function + // which modifies the node list. + let old_node = &self.nodes[old_idx.0]; + if old_idx.0 == self.root { + let lower_bound = old_node.children[0].0; + // We'll make a new root. + let parent = self.create_new_root_node( + lower_bound, old_idx.0, + split_lv, new_node_idx); + self.nodes[old_idx.0].parent = parent; + self.nodes[new_node_idx].parent = parent + } else { + let parent = old_node.parent; + self.nodes[new_node_idx].parent = self.insert_into_node(parent, (split_lv, new_node_idx), old_idx.0, false); + } + + NodeIdx(new_node_idx) + } + + #[must_use] + fn insert_into_node(&mut self, mut node_idx: NodeIdx, new_child: NodeChild, after_child: usize, children_are_leaves: bool) -> NodeIdx { + let mut node = &mut self[node_idx]; + + // Where will the child go? I wonder if the compiler can do anything smart with this... + let mut insert_pos = node.children + .iter() + .position(|(_, idx)| { *idx == after_child }) + .unwrap() + 1; + + // dbg!(&node); + // println!("insert_into_node n={:?} after_child {after_child} pos {insert_pos}, new_child {:?}", node_idx, new_child); + + if node.is_full() { + let new_node = self.split_node(node_idx, children_are_leaves); + + if insert_pos >= NODE_SPLIT_POINT { + // Actually we're inserting into the new node. + insert_pos -= NODE_SPLIT_POINT; + node_idx = new_node; + } + // Technically this only needs to be reassigned in the if() above, but reassigning it + // in all cases is necessary for the borrowck. + node = &mut self[node_idx]; + } + + // Could scan to find the actual length of the children, then only memcpy that many. But + // memcpy is cheap. + node.children.copy_within(insert_pos..NODE_CHILDREN - 1, insert_pos + 1); + node.children[insert_pos] = new_child; + + if insert_pos == 0 { + let parent = node.parent; + Self::recursively_update_nodes(&mut self.nodes, parent, node_idx.0, new_child.0); + } + + node_idx + } + + fn split_leaf(&mut self, old_idx: LeafIdx) -> LeafIdx { + // This function splits a full leaf node in the middle, into 2 new nodes. + // The result is two nodes - old_leaf with items 0..N/2 and new_leaf with items N/2..N. + + let old_height = self.height; + // TODO: This doesn't currently use the pool of leaves that we have so carefully prepared. + // It would be good to fix this, but it currently never actually happens in any of the + // benchmarking data. + let new_leaf_idx = self.leaves.len(); // Weird instruction order for borrowck. + let mut old_leaf = &mut self.leaves[old_idx.0]; + // debug_assert!(old_leaf.is_full()); + debug_assert!(!old_leaf.has_space(2)); + + // let parent = old_leaf.parent; + let split_lv = old_leaf.bounds[LEAF_SPLIT_POINT]; + + let parent = if old_height == 0 { + // Insert this leaf into a new root node. This has to be the first node. + let lower_bound = old_leaf.bounds[0]; + let parent = self.create_new_root_node( + lower_bound, old_idx.0, + split_lv, new_leaf_idx); + old_leaf = &mut self.leaves[old_idx.0]; + debug_assert_eq!(parent, NodeIdx(0)); + // let parent = NodeIdx(self.nodes.len()); + old_leaf.parent = NodeIdx(0); + // debug_assert_eq!(old_leaf.parent, NodeIdx(0)); // Ok because its the default. + // old_leaf.parent = NodeIdx(0); // Could just default nodes to have a parent of 0. + + NodeIdx(0) + } else { + let mut parent = old_leaf.parent; + // The parent may change by calling insert_into_node - since the node we're inserting + // into may split off. + + parent = self.insert_into_node(parent, (split_lv, new_leaf_idx), old_idx.0, true); + old_leaf = &mut self.leaves[old_idx.0]; // borrowck. + parent + }; + + // The old leaf must be full before we split it. + // debug_assert!(old_leaf.data.last().unwrap().is_some()); + + let mut new_leaf = IndexLeaf { + bounds: [usize::MAX; LEAF_CHILDREN], + children: [V::default(); LEAF_CHILDREN], + // upper_bound: old_leaf.upper_bound, + next_leaf: old_leaf.next_leaf, + parent, + }; + + // We'll steal the second half of the items in OLD_LEAF. + // Could use ptr::copy_nonoverlapping but this is safe, and they compile to the same code. + new_leaf.children[0..LEAF_SPLIT_POINT].copy_from_slice(&old_leaf.children[LEAF_SPLIT_POINT..]); + new_leaf.bounds[0..LEAF_SPLIT_POINT].copy_from_slice(&old_leaf.bounds[LEAF_SPLIT_POINT..]); + + // The old leaf's new bound is the first copied item's position. + // old_leaf.upper_bound = split_lv; + old_leaf.bounds[LEAF_SPLIT_POINT..].fill(usize::MAX); + + // Ignore any danging children in release mode. They don't matter. + if cfg!(debug_assertions) { + // This behaviour shouldn't be depended on... its nice while debugging though. + old_leaf.children[LEAF_SPLIT_POINT..].fill(V::default()); + } + + // old_leaf.upper_bound = split_lv; + old_leaf.next_leaf = LeafIdx(new_leaf_idx); + + self.leaves.push(new_leaf); + + LeafIdx(new_leaf_idx) + } + + fn make_space_in_leaf_for(&mut self, mut leaf_idx: LeafIdx, mut elem_idx: usize) -> (LeafIdx, usize) { + assert!(SIZE == 1 || SIZE == 2); + + if !self.leaves[leaf_idx.0].has_space(SIZE) { + let new_node = self.split_leaf(leaf_idx); + + if elem_idx >= LEAF_SPLIT_POINT { + // We're inserting into the newly created node. + leaf_idx = new_node; + elem_idx -= LEAF_SPLIT_POINT; + } + } + + let leaf = &mut self.leaves[leaf_idx.0]; + + // Could scan to find the actual length of the children, then only memcpy that many. But + // memcpy is cheap. + // Could also memcpy fewer items if we split it - since we know then the max will be + // LEAF_SPLIT_POINT. But I don't think that'll make any difference. + leaf.bounds.copy_within(elem_idx..LEAF_CHILDREN - SIZE, elem_idx + SIZE); + leaf.children.copy_within(elem_idx..LEAF_CHILDREN - SIZE, elem_idx + SIZE); + + (leaf_idx, elem_idx) + } + + /// This function blindly assumes the item is definitely in the recursive children. + fn find_lv_in_node(node: &IndexNode, needle: LV) -> usize { + // TODO: Speed up using SIMD. + node.children[1..].iter() + // Looking for the first child which contains the needle. + .position(|(lv, _)| { needle < *lv }) + .unwrap_or(NODE_CHILDREN - 1) + // .expect("Invalid search in index node") + // node.children.iter() + // // Looking for the first child which contains the needle. + // .position(|(lv, _)| { needle >= *lv }) + // .expect("Invalid search in index node") + } + + fn find_child_idx_in_node(node: &IndexNode, child: usize) -> usize { + // TODO: Speed up using SIMD. + node.children.iter() + .position(|(_, idx)| { child == *idx }) + .expect("Invalid search in index node") + } + + fn find_in_leaf(leaf: &IndexLeaf, needle: LV) -> usize { + // Find the index of the first item where the needle is *not* in the range, and then return + // the previous item. + + // debug_assert!(leaf.is_last() || needle < leaf.upper_bound, "leaf: {:?} / needle {needle}", leaf); + + // There are much faster ways to write this using SIMD. + leaf.bounds[1..].iter() + // We're looking for the first item past the needle. + .position(|bound| *bound == usize::MAX || needle < *bound) + .unwrap_or(LEAF_CHILDREN - 1) + } + + // /// returns (index, at_end); + // fn find_in_leaf_2(&self, leaf: &IndexLeaf, needle: LV, end_ok: bool) -> (usize, bool) { + // for i in 1..LEAF_CHILDREN { + // let b = leaf.bounds[i]; + // if b == usize::MAX { + // let upper_bound = self.leaf_upper_bound(leaf); + // debug_assert!(upper_bound >= needle); + // return (i - 1, needle == upper_bound); + // } else if needle < b { + // return (i - 1, false); + // } else if needle == b { + // return if end_ok { (i - 1, true) } else { (i, false) }; + // } + // } + // + // let upper_bound = self.leaf_upper_bound(leaf); + // debug_assert!(upper_bound >= needle); + // return (LEAF_CHILDREN - 1, needle == upper_bound); + // } + + #[inline] + fn leaf_upper_bound(&self, leaf: &IndexLeaf) -> LV { + Self::leaf_upper_bound_2(&self.leaves, leaf) + } + + #[inline] + fn leaf_upper_bound_2(leaves: &Vec>, leaf: &IndexLeaf) -> LV { + if leaf.is_last() { + usize::MAX + } else { + leaves[leaf.next_leaf.0].bounds[0] + } + } + + fn check_cursor_at(&self, cursor: IndexCursor, lv: LV, at_end: bool) { + assert!(cfg!(debug_assertions)); + let leaf = &self.leaves[cursor.leaf_idx.0]; + let lower_bound = leaf.bounds[cursor.elem_idx]; + + let next = cursor.elem_idx + 1; + let upper_bound = if next < LEAF_CHILDREN && leaf.bounds[next] != usize::MAX { + leaf.bounds[next] + } else { + self.leaf_upper_bound(leaf) + }; + assert!(lv >= lower_bound); + + if at_end { + assert_eq!(lv, upper_bound); + } else { + assert!(lv < upper_bound, "Cursor is not within expected bound. Expect {lv} / upper_bound {upper_bound}"); + } + } + + fn cursor_to_next(&self, cursor: &mut IndexCursor) { + let leaf = &self.leaves[cursor.leaf_idx.0]; + let next_idx = cursor.elem_idx + 1; + if next_idx >= LEAF_CHILDREN || leaf.bounds[next_idx] == usize::MAX { + cursor.elem_idx = 0; + cursor.leaf_idx = leaf.next_leaf; + } else { + cursor.elem_idx += 1; + } + } + + /// Generate a cursor which points at the specified LV. + fn cursor_at(&self, lv: LV) -> IndexCursor { + debug_assert!(lv < usize::MAX); + + let (cursor_lv, cursor) = self.cursor.get(); + if cursor_lv == lv { + // println!("1"); + // println!("HIT"); + if cfg!(debug_assertions) { + self.check_cursor_at(cursor, lv, false); + } + + return cursor; + } + + let leaf = &self[cursor.leaf_idx]; + // TODO: Consider caching the upper bound of the subsequent element in the cursor. + + // This is correct, but doesn't improve performance. + // if lv >= leaf.bounds[cursor.elem_idx] { + // let next_elem = cursor.elem_idx + 1; + // let upper_bound = if next_elem >= LEAF_CHILDREN || leaf.bounds[next_elem] == usize::MAX { + // self.leaf_upper_bound(leaf) + // } else { + // leaf.bounds[next_elem] + // }; + // if lv < upper_bound { + // return cursor; + // } + // } + + if lv >= leaf.bounds[0] { + // There are 3 cases: + // - The lv is less than the bound (or this is the last node) + // - The lv is exactly the same as the upper bound. Use the start of the next leaf + // - Or the LV is something else. Scan normally. + + // TODO: Take advantage of elem_idx in the cursor. + let upper_bound = self.leaf_upper_bound(leaf); + // let rel = self.upper_bound(leaf).map(|bound| lv.cmp(&bound)).unwrap_or(Ordering::Less); + + if lv < upper_bound { // || end_ok && lv == upper_bound + // println!("2"); + // println!("..."); + return IndexCursor { + leaf_idx: cursor.leaf_idx, + elem_idx: Self::find_in_leaf(leaf, lv), + }; + } else if lv == upper_bound { + // println!("3"); + // println!("..."); + return IndexCursor { + leaf_idx: leaf.next_leaf, + elem_idx: 0, // Has to be. + }; + } + } + + // println!("MISS"); + + // Make a cursor by descending from the root. + let mut idx = self.root; + for _h in 0..self.height { + let n = &self.nodes[idx]; + let slot = Self::find_lv_in_node(n, lv); + idx = n.children[slot].1; + } + + // dbg!(&self, lv, idx); + + // Now idx will point to the leaf node. Search there. + // println!("4"); + IndexCursor { + leaf_idx: LeafIdx(idx), + elem_idx: Self::find_in_leaf(&self.leaves[idx], lv), + } + } + + // // pub fn get_entry(&mut self, lv: LV, hint_fwd: bool) -> (V, LV) { + // /// Returns (value, upper bound) + // pub fn get_entry_mut(&mut self, lv: LV) -> RleDRun { + // let cursor = self.cursor_at(lv); + // self.cursor.set(cursor); + // let leaf = &self.leaves[cursor.leaf_idx.0]; + // let val = leaf.children[cursor.elem_idx]; + // let lower_bound = leaf.bounds[cursor.elem_idx]; + // + // let next_elem = cursor.elem_idx + 1; + // let upper_bound = if next_elem >= LEAF_CHILDREN { + // self.leaf_upper_bound(leaf) + // } else { + // leaf.bounds[next_elem] + // }; + // + // RleDRun { + // start: lower_bound, + // end: upper_bound, + // val + // } + // } + + /// Get the entry at the specified offset. This will return the largest run of values which + /// contains the specified index. + pub fn get_entry(&self, lv: LV) -> RleDRun { + let cursor = self.cursor_at(lv); + + if cfg!(debug_assertions) { + self.check_cursor_at(cursor, lv, false); + } + + self.cursor.set((lv, cursor)); + + let leaf = &self.leaves[cursor.leaf_idx.0]; + let val = leaf.children[cursor.elem_idx]; + let lower_bound = leaf.bounds[cursor.elem_idx]; + + let next_elem = cursor.elem_idx + 1; + let upper_bound = if next_elem >= LEAF_CHILDREN || leaf.bounds[next_elem] == usize::MAX { + self.leaf_upper_bound(leaf) + } else { + leaf.bounds[next_elem] + }; + debug_assert!(lv >= lower_bound && lv < upper_bound); + + RleDRun { + start: lower_bound, + end: upper_bound, + val + } + } + + /// After the first item in a leaf has been modified, we need to walk up the node tree to update + /// the start LV values. + fn recursively_update_nodes(nodes: &mut Vec, mut node_idx: NodeIdx, mut child: usize, new_start: LV) { + while node_idx.0 != usize::MAX { + let node = &mut nodes[node_idx.0]; + let child_idx = Self::find_child_idx_in_node(node, child); + node.children[child_idx].0 = new_start; + if child_idx != 0 { + // We're done here. This is the most likely case. + break; + } + + // Otherwise continue up the tree until we hit the root. + child = node_idx.0; + node_idx = node.parent; + } + } + + #[inline] + fn get_leaf_and_bound(&mut self, idx: LeafIdx) -> (&mut IndexLeaf, LV) { + Self::get_leaf_and_bound_2(&mut self.leaves, idx) + } + + fn get_leaf_and_bound_2(leaves: &mut Vec>, idx: LeafIdx) -> (&mut IndexLeaf, LV) { + let leaf = &leaves[idx.0]; + let upper_bound = Self::leaf_upper_bound_2(leaves, leaf); + (&mut leaves[idx.0], upper_bound) + } + + + /// Returns true if we need to keep trimming stuff after this leaf. + fn trim_leaf_end(&mut self, leaf_idx: LeafIdx, elem_idx: usize, end: LV) -> bool { + debug_assert!(elem_idx >= 1); + // debug_assert!(elem_idx < LEAF_CHILDREN); + // let leaf = &mut self.leaves[leaf_idx.0]; + let (leaf, leaf_upper_bound) = self.get_leaf_and_bound(leaf_idx); + // dbg!(leaf_idx, elem_idx, end, leaf_upper_bound, &leaf); + // debug_assert!(end > leaf.bounds[elem_idx]); // This element will not be removed. + + if cfg!(debug_assertions) { + // Check the bounds + let mut prev = leaf.bounds[0]; + for &b in &leaf.bounds[1..elem_idx] { + if b != usize::MAX { + assert!(b > prev, "Bounds does not monotonically increase b={:?}", &leaf.bounds); + } + prev = b; + } + } + + if elem_idx >= LEAF_CHILDREN || leaf.bounds[elem_idx] == usize::MAX { + // The cat is already out of the bag. Continue trimming after this leaf. + return end > leaf_upper_bound; + } + + // This function wouldn't be called if we had nothing to do. (Though if this were the + // case, we could return immediately). + // debug_assert!(leaf.bounds[elem_idx] < end); + + let mut del_to = elem_idx; + // let mut last_idx = i; + + // let mut stop_here = false; + + loop { + // The bounds of element i. + let next = del_to + 1; + let mut b = if next > LEAF_CHILDREN { + break; + } else if next == LEAF_CHILDREN { + leaf_upper_bound + } else { + leaf.bounds[next] + }; + // Which may be usize::MAX. + + // Ugh this is so gross. If we hit the last in-use item, the bound is + // leaf_upper_bound and stop after this one. + if b == usize::MAX { + b = leaf_upper_bound; + } + + // if b == usize::MAX { del_to = LEAF_CHILDREN; break; } + + match end.cmp(&b) { + Ordering::Less => { + // println!("Trim {del_to} to {end}"); + // Trim the current item and stop here. + // let b = b.min(leaf_upper_bound); + debug_assert!(leaf.bounds[del_to] < end); + leaf.children[del_to] = leaf.children[del_to].at_offset(end - leaf.bounds[del_to]); + leaf.bounds[del_to] = end; + // stop_here = true; + break; + } + Ordering::Equal => { + // The current item is the last item to delete. + del_to += 1; + break; + } + Ordering::Greater => { + // Keep scanning. + del_to += 1; + } + } + + // Bleh! + if next < LEAF_CHILDREN && leaf.bounds[next] == usize::MAX { break; } + } + + if del_to >= LEAF_CHILDREN || leaf.bounds[del_to] == usize::MAX { + // Delete the rest of this leaf and bubble up. + leaf.bounds[elem_idx..].fill(usize::MAX); + // if end > leaf_upper_bound { + // let parent = leaf.parent; + // if self.height > 0 { + // self.extend_range_in_node(parent, leaf_idx.0, end); + // } + // } + + end > leaf_upper_bound + } else { + let trimmed_items = del_to - elem_idx; + + if trimmed_items >= 1 { + // println!("trim {elem_idx} <- {del_to}.."); + + // Hold onto your hats, its time to delete some items. + leaf.remove_children(elem_idx..del_to); + // leaf.children.copy_within(del_to.., elem_idx); + // leaf.bounds.copy_within(del_to.., elem_idx); + // leaf.bounds[LEAF_CHILDREN - trimmed_items..].fill(usize::MAX); + } + false + } + } + + fn upper_bound_scan(&self, mut idx: usize, mut height: usize) -> usize { + while height > 0 { + // Descend to the last child of this item. + let node = &self.nodes[idx]; + + debug_assert!(node.children[0].1 != usize::MAX, "Node is empty. idx: {idx}"); + + let last_child_idx = node.children.iter() + .rfind(|(_, idx)| *idx != usize::MAX) + .expect("Invalid state: Node is empty") + .1; + + height -= 1; + idx = last_child_idx; + } + + // idx is now pointing to a leaf. + self.leaf_upper_bound(&self.leaves[idx]) + } + + // fn discard_leaf_internal(leaves: &mut Vec>, leaf_pool_head: &mut LeafIdx, leaf_idx: LeafIdx) { + // let leaf = &mut leaves[leaf_idx.0]; + // leaf.next_leaf = *leaf_pool_head; + // *leaf_pool_head = leaf_idx; + // } + + fn discard_leaf(&mut self, leaf_idx: LeafIdx) { + // println!("Discard leaf {:?}", leaf_idx); + + // Self::discard_leaf_internal(&mut self.leaves, &mut self.free_leaf_pool_head, leaf_idx); + let leaf = &mut self.leaves[leaf_idx.0]; + leaf.next_leaf = self.free_leaf_pool_head; + self.free_leaf_pool_head = leaf_idx; + + if cfg!(debug_assertions) { + // Make sure discarded leaves aren't added multiple times to the discard queue. + assert_ne!(leaf.parent, NodeIdx(0xfefe)); + leaf.parent = NodeIdx(0xfefe); + } + } + + fn discard_node(&mut self, idx: usize, height: usize) { + if height == 0 { + self.discard_leaf(LeafIdx(idx)); + } else { + // println!("DISCARD NODE {idx}"); + // Move it to the free list. + let node = &mut self.nodes[idx]; + node.parent = self.free_node_pool_head; + self.free_node_pool_head = NodeIdx(idx); + + let old_children = mem::replace(&mut node.children, [EMPTY_NODE_CHILD; NODE_CHILDREN]); + + for (_, child_idx) in old_children { + if child_idx == usize::MAX { break; } + self.discard_node(child_idx, height - 1); + } + } + } + + fn remove_and_queue_node_children(&mut self, node_idx: NodeIdx, child_range: Range, height: usize) { + // This is horrible. + for i in child_range.clone() { + // TODO: Benchmark this against just copying out the children we care about. + let child_idx = self.nodes[node_idx.0].children[i].1; // boooo. + self.discard_node(child_idx, height - 1); + } + // Bleh. I want to do this but the borrowck suuucks. + // for (_, idx) in &node.children[..keep_child_idx] { + // self.discard_node(*idx, height - 1); + // } + + self.nodes[node_idx.0].remove_children(child_range); + } + + fn trim_node_start(&mut self, mut idx: usize, end: LV, mut height: usize) -> LeafIdx { + while height > 0 { + let mut node = &mut self.nodes[idx]; + + if end > node.children[0].0 { + let keep_child_idx = Self::find_lv_in_node(node, end); + + if cfg!(debug_assertions) { + let i = node.children[keep_child_idx].1; + debug_assert!(self.upper_bound_scan(i, height - 1) > end); + node = &mut self.nodes[idx]; + } + + if keep_child_idx >= 1 { + self.remove_and_queue_node_children(NodeIdx(idx), 0..keep_child_idx, height); + // for i in 0..keep_child_idx { + // // TODO: Benchmark this against just copying out the children we care about. + // let child_idx = self.nodes[idx].children[i].1; // boooo. + // self.discard_node(child_idx, height - 1); + // } + // node = &mut self.nodes[idx]; + // node.remove_children(0..keep_child_idx); + + node = &mut self.nodes[idx]; // borrowck. + } + + node.children[0].0 = end; + // dbg!(height, end, &node.children, keep_child_idx, node.children[keep_child_idx].1); + idx = node.children[0].1; + } else { + // dbg!(height, end, &node.children, node.children[0].1); + idx = node.children[0].1; + } + + height -= 1; + } + + // Ok, now drop the first however many items from the leaf. + let leaf = &mut self.leaves[idx]; + let keep_elem_idx = Self::find_in_leaf(leaf, end); + if keep_elem_idx >= 1 { + leaf.remove_children(0..keep_elem_idx); + } + leaf.children[0] = leaf.children[0].at_offset(end - leaf.bounds[0]); + leaf.bounds[0] = end; + + if cfg!(debug_assertions) { + let leaf = &self.leaves[idx]; + let leaf_upper_bound = self.leaf_upper_bound(leaf); + assert!(leaf_upper_bound >= end); + } + + LeafIdx(idx) + } + + /// Change the upper bound of the child of this node to end. + fn trim_node_end_after_child(&mut self, node_idx: NodeIdx, child: usize, end: LV, height: usize) -> LeafIdx { + debug_assert!(height >= 1); + + // We're going to keep at least 1 child, so this node (and its recursive parents) won't be + // deleted. + let mut node = &mut self.nodes[node_idx.0]; + let idx = Self::find_child_idx_in_node(node, child); + + // let mut i = idx + 1; + // while i < NODE_CHILDREN { + + let del_start = idx + 1; + + if cfg!(debug_assertions) { + let child_idx = node.children[idx].1; + let up = self.upper_bound_scan(child_idx, height - 1); + assert!(end > up); + node = &mut self.nodes[node_idx.0]; + if del_start < NODE_CHILDREN && node.children[del_start].1 != usize::MAX { + // assert_eq!(node.children[del_start].0, up); + assert!(end > up); + } + } + // debug_assert!(del_start >= NODE_CHILDREN || end >= node.children[del_start].0, + // "del_start: {del_start} / end: {end}" + // ); + + for i in del_start..NODE_CHILDREN { + let (_lower_bound, child_idx) = node.children[i]; + + // if idx == usize::MAX { i = NODE_CHILDREN; break; } + if child_idx == usize::MAX { break; } + + // This is a little bit inefficient. It might be better to search from the end, or + // binary search or something. But given how rarely this will all run, I think its ok. + let upper_bound = if i + 1 < NODE_CHILDREN && node.children[i + 1].1 != usize::MAX { + // This is a shortcut. + + if cfg!(debug_assertions) { + let n = &self.nodes[node_idx.0]; + debug_assert_eq!(n.children[i + 1].0, self.upper_bound_scan(child_idx, height - 1)); + node = &mut self.nodes[node_idx.0]; // borrowck. + } + + node.children[i + 1].0 + } else { + self.upper_bound_scan(child_idx, height - 1) + }; + + if end < upper_bound { + // end < upper_bound. Trim the start of this child. + node = &mut self.nodes[node_idx.0]; // borrowck. + node.children[i].0 = end; // Update the lower bound of this child. + + let del_end = i; + if del_end > del_start { + // Delete skipped over elements. + // self.remove_and_queue_node_children(node_idx, del_start..del_end, height); + // node = &mut self.nodes[node_idx.0]; // borrowck. + + // These items will already have been discarded by discard_node, below. + node.remove_children(del_start..del_end); + } + + return self.trim_node_start(child_idx, end, height - 1); + } else { + // Remove this child. + self.discard_node(child_idx, height - 1); + } + + // Borrowck. + node = &mut self.nodes[node_idx.0]; + } + + node.children[del_start..].fill(EMPTY_NODE_CHILD); + + // Recurse up. + debug_assert!(node.parent.0 != usize::MAX, "Invalid bounds"); + let parent = node.parent; + self.trim_node_end_after_child(parent, node_idx.0, end, height + 1) + + // if i == NODE_CHILDREN { + // debug_assert!(node.parent.0 != usize::MAX, "Invalid bounds"); + // self.trim_node_end_after_child(node.parent, node_idx.0, end, height + 1) + // } + } + + /// This method clears everything out of the way for the specified element, to set its + /// upper bound correctly. + fn extend_upper_range(&mut self, leaf_idx: LeafIdx, elem_idx: usize, end: LV) { + // This may need to do a lot of work: + // - The leaf we're currently inside of needs to be trimmed, from elem_idx onwards + // - If we continue, the parent leaf needs to be trimmed, and its parent and so on. This may + // cause some leaves and nodes to be discarded entirely. + // - Then some nodes and a leaf may need the first few elements removed. + + // We'll always call this with the "next" elem_idx. So the leaf thats being trimmed will + // never itself be removed. + debug_assert!(elem_idx >= 1); + + // First, trim the end of this leaf if we can. + if !self.trim_leaf_end(leaf_idx, elem_idx, end) || self.height == 0 { return; } + + let parent = self.leaves[leaf_idx.0].parent; + debug_assert!(parent.0 != usize::MAX); + + let new_next_leaf = self.trim_node_end_after_child(parent, leaf_idx.0, end, 1); + self.leaves[leaf_idx.0].next_leaf = new_next_leaf; + } + + pub fn set_range_2(&mut self, range: Range, data: V) { + self.set_range(range.into(), data); + } + + pub fn set_range(&mut self, range: DTRange, data: V) { + // println!(" SET RANGE {:?} = {:?}", range, data); + if range.is_empty() { return; } + let cursor = self.cursor_at(range.start); + if cfg!(debug_assertions) { + self.check_cursor_at(cursor, range.start, false); + } + + // self.cursor.set((range.start, cursor)); + // The cursor may move. + let (mut cursor, at_end) = self.set_range_internal(cursor, range, data); + + if cfg!(debug_assertions) { + // println!("check cursor {:?} {}, {}", cursor, range.end, at_end); + self.check_cursor_at(cursor, range.end, at_end); + } + + // if hint_fwd { + if at_end { + self.cursor_to_next(&mut cursor); + if cfg!(debug_assertions) { + self.check_cursor_at(cursor, range.end, false); + } + } + self.cursor.set((range.end, cursor)); + } + + // returns resulting cursor, whether its at the end of the element. + fn set_range_internal(&mut self, cursor: IndexCursor, range: DTRange, mut data: V) -> (IndexCursor, bool) { + // Setting a range can involve deleting some number of data items, and inserting an item. + // + // For now, I'm never going to leave a leaf empty just so I can avoid needing to deal with + // ever deleting nodes. + + let IndexCursor { mut leaf_idx, mut elem_idx } = cursor; + let DTRange { mut start, mut end } = range; + // let range = (); + // let cursor = (); + + // let dbg_upper_bound = self.upper_bound(&self.leaves[leaf_idx.0]); + // let mut leaf = &mut self.leaves[leaf_idx.0]; + let (mut leaf, mut leaf_upper_bound) = self.get_leaf_and_bound(leaf_idx); + + debug_assert!(leaf.bounds[elem_idx] != usize::MAX); + debug_assert!(start >= leaf.bounds[0] || leaf_idx.0 == 0); + debug_assert!(start < leaf_upper_bound); + // debug_assert!(elem_idx == LEAF_CHILDREN - 1 || start < leaf.bounds[elem_idx + 1]); + // And the range should be < the upper bound. + + // debug_assert!(leaf.is_last() || start < leaf.upper_bound); + + assert!(elem_idx < LEAF_CHILDREN); + + let mut cur_start = leaf.bounds[elem_idx]; + + if cur_start == start && elem_idx > 0 { + // Try and append it to the previous item. This is unnecessary, but should help with + // perf. + let prev_idx = elem_idx - 1; + let prev_start = leaf.bounds[prev_idx]; + if leaf.children[prev_idx].try_append(cur_start - prev_start, &data, end - start) { + // Ok! + self.extend_upper_range(leaf_idx, elem_idx, end); + + // Note extend_upper_range might have nuked the current element. Since the stored + // cursor always points to the *next* element, we'll roll the cursor forward in this + // case here. + let leaf = &self.leaves[leaf_idx.0]; + if leaf.bounds[elem_idx] == usize::MAX { + // println!("A1"); + return (IndexCursor { leaf_idx: leaf.next_leaf, elem_idx: 0}, false); + } else { + // println!("A2"); + // self.check_cursor_at(cursor, range.end, false); + return (cursor, false); + } + } + } + + // TODO: Probably worth a short-circuit check here to see if the value even changed. + + let mut cur_end = if elem_idx >= LEAF_CHILDREN - 1 { + leaf_upper_bound + } else { + // This is pretty gnarly. + let b = leaf.bounds[elem_idx + 1]; + if b == usize::MAX { leaf_upper_bound } else { b } + }; + + // If we can append the item to the current item, do that. + if cur_start < start { + let mut d = leaf.children[elem_idx]; + if d.try_append(start - cur_start, &data, end - start) { + data = d; + start = cur_start; + } + } + + let mut end_is_end = true; + + if end < cur_end { + // Try to append the end of the current element. + if data.try_append(end - start, &leaf.children[elem_idx].at_offset(end - cur_start), cur_end - end) { + // Nice. We'll handle this in the special case below. + end = cur_end; + end_is_end = false; + } else { + // In this case, the item is replacing a prefix of the target slot. We'll just hardcode + // these cases, since otherwise we need to deal with remainders below and thats a pain. + if cur_start < start { + // We need to "splice in" this item. Eg, x -> xyx. This will result in 2 + // inserted items. + + // The resulting behaviour should be that: + // b1 (x) b2 ----> b1 (x) start (y) range.end (x) b2 + + // The item at elem_idx is the start of the item we're splitting. Leave it + // alone. We'll replace elem_idx + 1 with data and elem_idx + 2 with remainder. + + (leaf_idx, elem_idx) = self.make_space_in_leaf_for::<2>(leaf_idx, elem_idx); + let leaf = &mut self.leaves[leaf_idx.0]; + + assert!(elem_idx + 2 < LEAF_CHILDREN); + leaf.bounds[elem_idx + 1] = start; + leaf.children[elem_idx + 1] = data; + leaf.bounds[elem_idx + 2] = end; + // This will be a no-op for many types of data because of the memcpy. + leaf.children[elem_idx + 2] = leaf.children[elem_idx].at_offset(end - cur_start); + + // We modified elem_idx +1 and +2, so we can't have modified index 0. No parent update. + // println!("b"); + // self.check_cursor_at(IndexCursor { leaf_idx, elem_idx: elem_idx + 1 }, range.end, true); + return (IndexCursor { leaf_idx, elem_idx: elem_idx + 1 }, true); + } else { + // Preserve the end of this item. Eg, x -> yx. + debug_assert!(cur_start == start); + debug_assert!(end < cur_end); + + (leaf_idx, elem_idx) = self.make_space_in_leaf_for::<1>(leaf_idx, elem_idx); + let leaf = &mut self.leaves[leaf_idx.0]; + + // This should be true, but V doesn't impl Eq. + // debug_assert_eq!(leaf.children[elem_idx + 1], leaf.children[elem_idx]); + + debug_assert_eq!(leaf.bounds[elem_idx], start); + assert!(elem_idx + 1 < LEAF_CHILDREN); + leaf.children[elem_idx] = data; + leaf.bounds[elem_idx + 1] = end; + leaf.children[elem_idx + 1] = leaf.children[elem_idx + 1].at_offset(end - start); + + // Since start == lower bound, the parents won't need updating. + // println!("c"); + return (IndexCursor { leaf_idx, elem_idx }, true); + } + } + } + + if end == cur_end { + // Special case. Might not be worth it. + if start == cur_start { + // Nuke the existing item. + leaf.children[elem_idx] = data; + + // Since start == lower bound, the parents don't need updating. + } else { + // Preserve the start of the item. x -> xy. + debug_assert!(start > cur_start); + + (leaf_idx, elem_idx) = self.make_space_in_leaf_for::<1>(leaf_idx, elem_idx); + let leaf = &mut self.leaves[leaf_idx.0]; + + elem_idx += 1; + assert!(elem_idx < LEAF_CHILDREN); + leaf.children[elem_idx] = data; + leaf.bounds[elem_idx] = start; + // We didn't modify [0], so no parent update. + } + // println!("d"); + return (IndexCursor { leaf_idx, elem_idx }, end_is_end); + } + + // This element overlaps with some other elements. + debug_assert!(end > cur_end); + debug_assert!(start < cur_end); + + if cur_start < start { + // Trim the current item alone and modify the next item. + // If we get here then: cur_start < start < cur_end < end. + debug_assert!(cur_start < start && start < cur_end && cur_end < end); + + elem_idx += 1; + + // Alternately, we could just use make_space_in_leaf here - though it would need to be + // adjusted to allow the elem_idx to be = LEAF_CHILDREN. + if elem_idx >= LEAF_CHILDREN || leaf.bounds[elem_idx] == usize::MAX { + // This is the end of the leaf node. + // leaf.upper_bound = start; + + if leaf.is_last() { + panic!("I don't think this can happen"); + // // Split the last element and insert. + // leaf_idx = self.split_leaf(leaf_idx); + // let new_leaf = &mut self.leaves[leaf_idx.0]; + // + // new_leaf.children[LEAF_SPLIT_POINT] = data; + // new_leaf.bounds[LEAF_SPLIT_POINT] = start; + // // new_leaf.upper_bound = range.end; + // return; + } else { + // We've trimmed this leaf node. Roll the cursor to the next item. + leaf_idx = leaf.next_leaf; + (leaf, leaf_upper_bound) = Self::get_leaf_and_bound_2(&mut self.leaves, leaf_idx); + // leaf = &mut self.leaves[leaf_idx.0]; + elem_idx = 0; + // TODO: Fuzz check, but I think this line isn't necessary. + // leaf.bounds[0] = range.start; + + // We're going to replace the leaf's starting item. + let parent = leaf.parent; + Self::recursively_update_nodes(&mut self.nodes, parent, leaf_idx.0, start); + } + } + + debug_assert_eq!(leaf.bounds[elem_idx], cur_end); + debug_assert!(start < leaf.bounds[elem_idx]); + + // debug_assert!(start < leaf.bounds[elem_idx]); + + // Right now leaf.children[elem_idx] contains an item from cur_end > start. + + // We've moved forward. Try and append the existing item to data. + cur_start = cur_end; + cur_end = if elem_idx >= LEAF_CHILDREN - 1 { + leaf_upper_bound + } else { + let b = leaf.bounds[elem_idx + 1]; + if b == usize::MAX { leaf_upper_bound } else { b } + }; + + leaf.bounds[elem_idx] = start; + + // debug_assert!(cur_start < end); + + // Current constraints here: + // start < cur_start < cur_end + // cur_start < end + debug_assert!(start < cur_start && cur_start < cur_end); + debug_assert!(cur_start < end); + + if end < cur_end { + // Try to prepend the new item to the start of the existing item. + if data.try_append(cur_start - start, &leaf.children[elem_idx], cur_end - cur_start) { + // Ok! + leaf.children[elem_idx] = data; + // println!("e"); + return (IndexCursor { leaf_idx, elem_idx }, false); + } else { + (leaf_idx, elem_idx) = self.make_space_in_leaf_for::<1>(leaf_idx, elem_idx); + leaf = &mut self.leaves[leaf_idx.0]; + leaf.children[elem_idx] = data; + leaf.bounds[elem_idx + 1] = end; + leaf.children[elem_idx + 1] = leaf.children[elem_idx + 1].at_offset(end - cur_start); + // println!("f"); + return (IndexCursor { leaf_idx, elem_idx }, end_is_end); + } + } else if end == cur_end { + // This item fits perfectly. + leaf.children[elem_idx] = data; + // println!("g"); + return (IndexCursor { leaf_idx, elem_idx }, end_is_end); + } + + cur_start = start; // Since we've pushed down the item bounds. + } + + debug_assert!(end > cur_end); + debug_assert_eq!(cur_start, start); + + // We don't care about the current element at all. Just overwrite it and extend + // the bounds. + leaf.children[elem_idx] = data; + self.extend_upper_range(leaf_idx, elem_idx + 1, end); + + // println!("h"); + (IndexCursor { leaf_idx, elem_idx }, end_is_end) + } + + fn first_leaf(&self) -> LeafIdx { + if cfg!(debug_assertions) { + // dbg!(&self); + let mut idx = self.root; + for _ in 0..self.height { + idx = self.nodes[idx].children[0].1; + } + debug_assert_eq!(idx, 0); + } + LeafIdx(0) + } + + pub fn is_empty(&self) -> bool { + let first_leaf = &self.leaves[self.first_leaf().0]; + first_leaf.bounds[0] == usize::MAX + } + + pub fn count_items(&self) -> usize { + let mut count = 0; + let mut leaf = &self[self.first_leaf()]; + loop { + // SIMD should make this fast. + count += leaf.bounds.iter().filter(|b| **b != usize::MAX).count(); + + // There is always at least one leaf. + if leaf.is_last() { break; } + else { + leaf = &self[leaf.next_leaf]; + } + } + + count + } + + /// returns number of internal nodes, leaves. + pub fn count_obj_pool(&self) -> (usize, usize) { + let mut nodes = 0; + let mut leaves = 0; + + let mut idx = self.free_node_pool_head; + while idx.0 != usize::MAX { + nodes += 1; + idx = self.nodes[idx.0].parent; + } + let mut idx = self.free_leaf_pool_head; + while idx.0 != usize::MAX { + leaves += 1; + idx = self.leaves[idx.0].next_leaf; + } + + (nodes, leaves) + } + + /// Iterate over the contents of the index. Note the index tree may contain extra entries + /// for items within the range, with a value of V::default. + pub fn iter(&self) -> IndexTreeIter { + IndexTreeIter { + tree: self, + leaf_idx: self.first_leaf(), + // leaf: &self.leaves[self.first_leaf()], + elem_idx: 0, + } + } + + pub fn to_vec(&self) -> Vec> { + self.iter().collect::>() + } + + + fn dbg_check_walk(&self, idx: usize, height: usize, expect_start: Option, expect_parent: NodeIdx) { + if height != 0 { + // Visiting a node. + assert!(idx < self.nodes.len()); + let node = &self.nodes[idx]; + + // dbg!(&self.nodes, self.root, self.height, expect_parent); + assert_eq!(node.parent, expect_parent); + + // The first child must be in use. + assert_ne!(node.children[0].1, usize::MAX); + // The first child must start at expect_start. + if let Some(expect_start) = expect_start { + // dbg!(&self.nodes, self.root, self.height); + assert_eq!(node.children[0].0, expect_start); + } + + let mut finished = false; + let mut prev_start = usize::MAX; + for &(start, child_idx) in &node.children { + if child_idx == usize::MAX { finished = true; } + else { + assert!(prev_start == usize::MAX || prev_start < start, "prev_start {prev_start} / start {start}"); + prev_start = start; + + assert_eq!(finished, false); + self.dbg_check_walk(child_idx, height - 1, Some(start), NodeIdx(idx)); + } + } + } else { + // Visiting a leaf. + assert!(idx < self.leaves.len()); + let leaf = &self.leaves[idx]; + + // dbg!(&self, idx); + assert_eq!(leaf.parent, expect_parent); + + // We check that the first child is in use below. + if leaf.bounds[0] != usize::MAX { + if let Some(expect_start) = expect_start { + assert_eq!(leaf.bounds[0], expect_start); + } + } + } + } + + #[allow(unused)] + pub(crate) fn dbg_check(&self) { + // Invariants: + // - All index markers point to the node which contains the specified item. + // - Except for the root item, all leaves must have at least 1 data entry. + // - The "left edge" of items should all have a lower bound of 0 + // - The last leaf node should have an upper bound and node_next of usize::MAX. + + // This code does 2 traversals of the data structure: + // 1. We walk the leaves by following next_leaf pointers in each leaf node + // 2. We recursively walk the tree + + // Walk the leaves. + let mut leaves_visited = 0; + let mut leaf_idx = self.first_leaf(); + loop { + let leaf = &self[leaf_idx]; + leaves_visited += 1; + + if leaf_idx == self.first_leaf() { + // First leaf. This can be empty - but only if the whole data structure is empty. + if leaf.bounds[0] == usize::MAX { + assert!(!leaf.next_leaf.exists()); + } + } else { + assert_ne!(leaf.bounds[0], usize::MAX, "Only the first leaf can be empty"); + } + + // Make sure the bounds are all sorted. + let mut prev = leaf.bounds[0]; + let mut finished = false; + for &b in &leaf.bounds[1..] { + if b == usize::MAX { + finished = true; + } else { + assert!(b > prev, "Bounds does not monotonically increase b={:?}", &leaf.bounds); + // assert!(b < leaf.upper_bound); + // assert!(b < self.upper_bound); + prev = b; + assert!(!finished, "All in-use children must come before all null children"); + } + } + + if leaf.is_last() { break; } + else { + let next_leaf = &self[leaf.next_leaf]; + assert!(next_leaf.bounds[0] > prev); + // assert_eq!(leaf.upper_bound, next_leaf.bounds[0]); + } + leaf_idx = leaf.next_leaf; + } + + let mut leaf_pool_size = 0; + let mut i = self.free_leaf_pool_head; + while i.0 != usize::MAX { + leaf_pool_size += 1; + i = self.leaves[i.0].next_leaf; + } + assert_eq!(leaves_visited + leaf_pool_size, self.leaves.len()); + + if self.height == 0 { + assert!(self.root < self.leaves.len()); + } else { + assert!(self.root < self.nodes.len()); + } + + // And walk the tree structure in the nodes + self.dbg_check_walk(self.root, self.height, None, NodeIdx(usize::MAX)); + + let (lv, cursor) = self.cursor.get(); + // self.check_cursor_at(cursor, lv, false); + } + + #[allow(unused)] + pub(crate) fn dbg_check_eq_2(&self, other: impl IntoIterator>) { + self.dbg_check(); + + let mut tree_iter = self.iter(); + // let mut expect_iter = expect.into_iter(); + + // while let Some(expect_val) = expect_iter.next() { + let mut actual_remainder = None; + for mut expect in other.into_iter() { + loop { + let mut actual = actual_remainder.take().unwrap_or_else(|| { + tree_iter.next().expect("Tree missing item") + }); + + // Skip anything before start. + if actual.end <= expect.start { + continue; + } + + // Trim the start of actual_next + if actual.start < expect.start { + (_, actual) = split_rle(actual, expect.start - actual.start); + } else if expect.start < actual.start { + panic!("Missing element"); + } + + assert_eq!(actual.start, expect.start); + let r = DTRange { start: actual.start, end: actual.start + usize::min(actual.len(), expect.len()) }; + assert!(expect.val.eq(&actual.val, usize::min(actual.len(), expect.len())), + "at {:?}: expect {:?} != actual {:?} (len={})", r, &expect.val, &actual.val, usize::min(actual.len(), expect.len())); + // assert_eq!(expect.val, actual.val, "{:?}", &tree_iter); + + if actual.end > expect.end { + // We don't need to split it here because that'll happen on the next iteration anyway. + actual_remainder = Some(actual); + // actual_remainder = Some(split_rle(actual, expect.end - actual.start).1); + break; + } else if actual.end >= expect.end { + break; + } else { + // actual.end < expect.end + // Keep the rest of expect for the next iteration. + (_, expect) = split_rle(expect, actual.end - expect.start); + debug_assert_eq!(expect.start, actual.end); + // And continue with this expected item. + } + } + } + } + + #[allow(unused)] + pub(crate) fn dbg_check_eq<'a>(&self, vals: impl IntoIterator>) where V: 'a { + self.dbg_check_eq_2(vals.into_iter().copied()); + } + +} + +#[derive(Debug)] +pub struct IndexTreeIter<'a, V: Copy> { + tree: &'a IndexTree, + leaf_idx: LeafIdx, + // leaf: &'a IndexLeaf, + elem_idx: usize, +} + +impl<'a, V: Copy> Iterator for IndexTreeIter<'a, V> { + // type Item = (DTRange, V); + type Item = RleDRun; + + fn next(&mut self) -> Option { + // if self.leaf_idx.0 == usize::MAX { + // debug_assert!(self.elem_idx < LEAF_CHILDREN); + if self.leaf_idx.0 >= self.tree.leaves.len() || self.elem_idx >= LEAF_CHILDREN { // Avoid a bounds check. + return None; + } + + let mut leaf = &self.tree[self.leaf_idx]; + // if self.elem_idx >= LEAF_CHILDREN || leaf.bounds[self.elem_idx] == usize::MAX { + // debug_assert!(leaf.is_last()); + // return None; + // } + + let data = leaf.children[self.elem_idx].clone(); + let start = leaf.bounds[self.elem_idx]; + if start == usize::MAX { + // This will happen when the tree is empty. + debug_assert_eq!(self.elem_idx, 0); + debug_assert_eq!(self.leaf_idx.0, 0); + return None; + } + + self.elem_idx += 1; + + let end = 'block: { + if self.elem_idx >= LEAF_CHILDREN || leaf.bounds[self.elem_idx] == usize::MAX { + // Try to move to the next leaf. + self.leaf_idx = leaf.next_leaf; + // if self.leaf_idx.0 == usize::MAX { + if self.leaf_idx.0 >= self.tree.leaves.len() { + break 'block usize::MAX; + } + self.elem_idx = 0; + + leaf = &self.tree[self.leaf_idx]; + leaf.bounds[0] + } else { + leaf.bounds[self.elem_idx] + } + }; + + Some(RleDRun::new(start..end, data)) + } +} + +#[cfg(test)] +mod test { + use std::pin::Pin; + + use rand::{Rng, SeedableRng}; + use rand::prelude::SmallRng; + + use content_tree::{ContentTreeRaw, RawPositionMetricsUsize}; + use Foo::*; + + // use crate::list_fuzzer_tools::fuzz_multithreaded; + use super::*; + + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + enum Foo { A, B, C } + + #[derive(Debug, Copy, Clone, Eq, PartialEq, Default)] + struct X(usize); + impl IndexContent for X { + fn try_append(&mut self, offset: usize, other: &Self, other_len: usize) -> bool { + debug_assert!(offset > 0); + debug_assert!(other_len > 0); + &self.at_offset(offset) == other + } + + fn at_offset(&self, offset: usize) -> Self { + X(self.0 + offset) + } + + fn eq(&self, other: &Self, _upto_len: usize) -> bool { + self.0 == other.0 + } + } + + #[test] + fn empty_tree_is_empty() { + let tree = IndexTree::::new(); + + tree.dbg_check_eq(&[]); + } + + #[test] + fn overlapping_sets() { + let mut tree = IndexTree::new(); + + tree.set_range((5..10).into(), X(100)); + tree.dbg_check_eq(&[RleDRun::new(5..10, X(100))]); + // assert_eq!(tree.to_vec(), &[((5..10).into(), Some(A))]); + // dbg!(&tree.leaves[0]); + tree.set_range((5..11).into(), X(200)); + tree.dbg_check_eq(&[RleDRun::new(5..11, X(200))]); + + tree.set_range((5..10).into(), X(100)); + tree.dbg_check_eq(&[ + RleDRun::new(5..10, X(100)), + RleDRun::new(10..11, X(205)), + ]); + + tree.set_range((2..50).into(), X(300)); + // dbg!(&tree.leaves); + tree.dbg_check_eq(&[RleDRun::new(2..50, X(300))]); + + } + + #[test] + fn split_values() { + let mut tree = IndexTree::new(); + tree.set_range((10..20).into(), X(100)); + tree.set_range((12..15).into(), X(200)); + tree.dbg_check_eq(&[ + RleDRun::new(10..12, X(100)), + RleDRun::new(12..15, X(200)), + RleDRun::new(15..20, X(105)), + ]); + } + + #[test] + fn set_inserts_1() { + let mut tree = IndexTree::new(); + + tree.set_range((5..10).into(), X(100)); + tree.dbg_check_eq(&[RleDRun::new(5..10, X(100))]); + + tree.set_range((5..10).into(), X(200)); + tree.dbg_check_eq(&[RleDRun::new(5..10, X(200))]); + + // dbg!(&tree); + tree.set_range((15..20).into(), X(300)); + // dbg!(tree.iter().collect::>()); + tree.dbg_check_eq(&[ + RleDRun::new(5..10, X(200)), + RleDRun::new(15..20, X(300)), + ]); + + // dbg!(&tree); + // dbg!(tree.iter().collect::>()); + } + + #[test] + fn set_inserts_2() { + let mut tree = IndexTree::new(); + tree.set_range((5..10).into(), X(100)); + tree.set_range((1..5).into(), X(200)); + // dbg!(&tree); + tree.dbg_check_eq(&[ + RleDRun::new(1..5, X(200)), + RleDRun::new(5..10, X(100)), + ]); + dbg!(&tree.leaves[0]); + + tree.set_range((3..8).into(), X(300)); + // dbg!(&tree); + // dbg!(tree.iter().collect::>()); + tree.dbg_check_eq(&[ + RleDRun::new(1..3, X(200)), + RleDRun::new(3..8, X(300)), + RleDRun::new(8..10, X(103)), + ]); + } + + #[test] + fn split_leaf() { + let mut tree = IndexTree::new(); + // Using 10, 20, ... so they don't merge. + tree.set_range(10.into(), X(100)); + tree.dbg_check(); + tree.set_range(20.into(), X(200)); + tree.set_range(30.into(), X(100)); + tree.set_range(40.into(), X(200)); + tree.dbg_check(); + // dbg!(&tree); + tree.set_range(50.into(), X(100)); + tree.dbg_check(); + + // dbg!(&tree); + // dbg!(tree.iter().collect::>()); + + tree.dbg_check_eq(&[ + RleDRun::new(10..11, X(100)), + RleDRun::new(20..21, X(200)), + RleDRun::new(30..31, X(100)), + RleDRun::new(40..41, X(200)), + RleDRun::new(50..51, X(100)), + ]); + } + + #[test] + fn clear_range() { + // for i in 2..20 { + for i in 2..50 { + eprintln!("i: {i}"); + let mut tree = IndexTree::new(); + for base in 0..i { + tree.set_range((base*3..base*3+2).into(), X(base + 100)); + } + // dbg!(tree.iter().collect::>()); + + let ceil = i*3 - 2; + // dbg!(ceil); + // dbg!(&tree); + tree.dbg_check(); + tree.set_range((1..ceil).into(), X(99)); + // dbg!(tree.iter().collect::>()); + + tree.dbg_check_eq(&[ + RleDRun::new(0..1, X(100)), + RleDRun::new(1..ceil, X(99)), + RleDRun::new(ceil..ceil+1, X(i - 1 + 100 + 1)), + ]); + } + } + + fn fuzz(seed: u64, verbose: bool) { + let mut rng = SmallRng::seed_from_u64(seed); + let mut tree = IndexTree::new(); + // let mut check_tree: Pin>, RawPositionMetricsUsize>>> = ContentTreeRaw::new(); + let mut check_tree: Pin>> = ContentTreeRaw::new(); + const START_JUNK: usize = 1_000_000; + check_tree.replace_range_at_offset(0, (START_JUNK..START_JUNK *2).into()); + + for _i in 0..1000 { + if verbose { println!("i: {}", _i); } + // This will generate some overlapping ranges sometimes but not too many. + let val = rng.gen_range(0..100) + 100; + // let start = rng.gen_range(0..3); + let start = rng.gen_range(0..1000); + let len = rng.gen_range(0..100) + 1; + // let start = rng.gen_range(0..100); + // let len = rng.gen_range(0..100) + 1; + + // dbg!(&tree, start, len, val); + // if _i == 19 { + // println!("blerp"); + // } + + // if _i == 14 { + // dbg!(val, start, len); + // dbg!(tree.iter().collect::>()); + // } + tree.set_range((start..start+len).into(), X(val)); + // dbg!(&tree); + tree.dbg_check(); + + // dbg!(check_tree.iter().collect::>()); + + check_tree.replace_range_at_offset(start, (val..val+len).into()); + + // if _i == 14 { + // dbg!(tree.iter().collect::>()); + // dbg!(check_tree.iter_with_pos().filter_map(|(pos, r)| { + // if r.start >= START_JUNK { return None; } + // Some(RleDRun::new(pos..pos+r.len(), X(r.start))) + // }).collect::>()); + // } + + // check_tree.iter + tree.dbg_check_eq_2(check_tree.iter_with_pos().filter_map(|(pos, r)| { + if r.start >= START_JUNK { return None; } + Some(RleDRun::new(pos..pos+r.len(), X(r.start))) + })); + } + } + + #[test] + fn fuzz_once() { + fuzz(22, true); + } + + // #[test] + // #[ignore] + // fn tree_fuzz_forever() { + // fuzz_multithreaded(u64::MAX, |seed| { + // if seed % 100 == 0 { + // println!("Iteration {}", seed); + // } + // fuzz(seed, false); + // }) + // } +} diff --git a/crates/diamond-types-crdt/src/ost/mod.rs b/crates/diamond-types-crdt/src/ost/mod.rs new file mode 100644 index 0000000..5b2d883 --- /dev/null +++ b/crates/diamond-types-crdt/src/ost/mod.rs @@ -0,0 +1,221 @@ +//! This module exists as a future planned replacement for the content-tree crate. It has a few +//! advantages: +//! +//! - I have two separate data structures, one for the index and one for content. Content-tree uses +//! the same b-tree data structure for both +//! - These btree implementations store data in a Vec / Vec pair rather than using raw +//! pointers. Surprisingly, this turns out to perform better - because the CPU ends up caching +//! runs of nodes. It also means this works with no unsafe {} blocks. +//! - There's less abstraction here. Way less abstraction. I went a bit overboard with content-tree +//! and as a result, its much harder to read. However, the code here has more duplication. Eh. +//! - The resulting wasm size is a little smaller. + +mod index_tree; +pub(crate) mod content_tree; +pub(crate) mod recording_index_tree; +// mod content_tree; + +use std::cmp::Ordering; +use std::iter::Sum; +pub(crate) use index_tree::{IndexContent, IndexTree}; + +use std::ops::{Add, AddAssign, Index, IndexMut, Range, Sub, SubAssign}; +use rle::{HasLength, MergableSpan, SplitableSpan}; +// use crate::ost::content_tree::ContentTree; +// Some utility types. + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +use ::content_tree::ContentLength; +use crate::crdtspan::CRDTSpan; + + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct LeafIdx(pub(crate) usize); + +impl Default for LeafIdx { + fn default() -> Self { Self(usize::MAX) } +} +impl LeafIdx { + pub fn exists(&self) -> bool { self.0 != usize::MAX } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct NodeIdx(usize); + +impl Default for NodeIdx { + fn default() -> Self { Self(usize::MAX) } +} + +impl NodeIdx { + fn is_root(&self) -> bool { self.0 == usize::MAX } +} + +// #[derive(Copy, Clone, Eq, PartialEq, Debug)] +// enum LenType { CURRENT, END } + +pub type LenPair = usize; + +fn update_by(len: &mut usize, upd: isize) { + *len = len.wrapping_add_signed(upd) +} + +// #[derive(Copy, Clone, Eq, PartialEq, Debug, Default)] +// pub struct LenPair { +// pub cur: usize, +// // pub end: usize, +// } +// +// impl LenPair { +// fn get(&self) -> usize { +// self.cur +// } +// +// #[inline] +// fn update_by(&mut self, upd: LenUpdate) { +// self.cur = self.cur.wrapping_add_signed(upd.cur); +// // self.end = self.end.wrapping_add_signed(upd.end); +// } +// +// pub fn new(cur: usize) -> Self { +// LenPair { cur, } +// } +// } +// +// impl From for LenPair { +// fn from((cur, end): (usize, usize)) -> Self { +// Self::new(cur, end) +// } +// } +// +// impl AddAssign for LenPair { +// #[inline] +// fn add_assign(&mut self, rhs: Self) { +// self.cur += rhs.cur; +// self.end += rhs.end; +// } +// } +// +// impl SubAssign for LenPair { +// #[inline] +// fn sub_assign(&mut self, rhs: Self) { +// self.cur -= rhs.cur; +// self.end -= rhs.end; +// } +// } +// +// impl Add for LenPair { +// type Output = LenPair; +// +// fn add(self, rhs: Self) -> Self::Output { +// LenPair { +// cur: self.cur + rhs.cur, +// end: self.end + rhs.end, +// } +// } +// } +// +// impl Sub for LenPair { +// type Output = LenPair; +// +// fn sub(self, rhs: Self) -> Self::Output { +// Self { +// cur: self.cur - rhs.cur, +// end: self.end - rhs.end, +// } +// } +// } +// +// impl Sum for LenPair { +// fn sum>(iter: I) -> Self { +// let mut aggregate = Self::default(); +// for i in iter { aggregate += i; } +// aggregate +// } +// } + +#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)] +pub struct LenUpdate { + pub cur: isize, +} + +impl LenUpdate { + fn inc_by(&mut self, e: &CRDTSpan) { + self.cur += e.content_len() as isize; + } + + fn dec_by(&mut self, e: &CRDTSpan) { + self.cur -= e.content_len() as isize; + } + + fn is_empty(&self) -> bool { + self.cur == 0 + } +} + +// In debug mode, nodes are kept intentionally small to exercise the node splitting / joining code +// more. +#[cfg(debug_assertions)] +const NODE_CHILDREN: usize = 4; +#[cfg(debug_assertions)] +const LEAF_CHILDREN: usize = 4; + +// Figured out with benchmarking. +#[cfg(not(debug_assertions))] +const NODE_CHILDREN: usize = 16; +#[cfg(not(debug_assertions))] +const LEAF_CHILDREN: usize = 32; + + +// type LeafData = crate::listmerge::markers::Marker; +// #[derive(Debug, Default)] +// struct OrderStatisticTree { +// content: ContentTree, +// index: IndexTree<()>, +// } +// +// impl OrderStatisticTree { +// pub fn new() -> Self { +// Self { +// content: ContentTree::new(), +// index: IndexTree::new(), +// } +// } +// +// // fn insert(&mut self, +// +// pub fn clear(&mut self) { +// self.index.clear(); +// self.content.clear(); +// } +// +// #[allow(unused)] +// fn dbg_check(&self) { +// self.content.dbg_check(); +// self.index.dbg_check(); +// +// // Invariants: +// // - All index markers point to the node which contains the specified item. +// } +// } + +/// Utility method for tree implementations. +/// +/// Remove the items in `a[del_range]`, sliding back items later in the array. +#[inline(always)] +fn remove_from_array(a: &mut [T; S], del_range: Range) { + a.copy_within(del_range.end..S, del_range.start); +} + +/// Utility method for tree implementations. +/// +/// Remove the items in `a[del_range]`, sliding back items later in the array. The end of the array +/// is filled with the provided default value. +#[inline(always)] +fn remove_from_array_fill(a: &mut [T; S], del_range: Range, default: T) { + a.copy_within(del_range.end..S, del_range.start); + a[S - del_range.len()..S].fill(default); +} + + diff --git a/crates/diamond-types-crdt/src/ost/recording_index_tree.rs b/crates/diamond-types-crdt/src/ost/recording_index_tree.rs new file mode 100644 index 0000000..c9b3cc8 --- /dev/null +++ b/crates/diamond-types-crdt/src/ost/recording_index_tree.rs @@ -0,0 +1,106 @@ +//! This file is for debugging. It provides an implementation of index_tree which can record +//! and play back operations made to an index tree. + +use std::cell::RefCell; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use rle::RleDRun; + +use crate::dtrange::DTRange; +use crate::list::LV; +use crate::ost::{IndexContent, IndexTree}; + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, Copy)] +pub(crate) enum TreeCommand { + GetEntry(LV), + SetRange(DTRange, V), + Clear, +} + +#[derive(Debug, Clone)] +pub(crate) struct RecordingTree { + inner: IndexTree, + pub actions: RefCell>>, +} + +impl Default for RecordingTree { + fn default() -> Self { + Self { + inner: IndexTree::default(), + actions: RefCell::new(vec![]), + } + } +} + +impl RecordingTree { + pub fn new() -> Self { + Default::default() + } + + pub fn get_entry(&self, lv: LV) -> RleDRun { + self.actions.borrow_mut().push(TreeCommand::GetEntry(lv)); + self.inner.get_entry(lv) + } + + pub fn clear(&mut self) { + self.actions.get_mut().push(TreeCommand::Clear); + self.inner.clear(); + } + + pub fn set_range(&mut self, range: DTRange, data: V) { + self.actions.get_mut().push(TreeCommand::SetRange(range, data)); + self.inner.set_range(range, data) + } + + pub fn dbg_check(&self) { + self.inner.dbg_check() + } + + #[cfg(feature = "gen_test_data")] + pub fn actions_to_json(&self) -> Vec where V: Serialize { + // serde_json::to_vec_pretty(self.actions.borrow().deref()).unwrap() + serde_json::to_vec(self.actions.borrow().deref()).unwrap() + } + + #[cfg(feature = "gen_test_data")] + pub fn stats(&self) { + let set_acts = self.actions.borrow().iter() + .filter(|a| if let TreeCommand::SetRange(_, _) = a { true } else { false }) + .count(); + dbg!(set_acts); + + let get_acts = self.actions.borrow().iter() + .filter(|a| if let TreeCommand::GetEntry(_) = a { true } else { false }) + .count(); + dbg!(get_acts); + } +} + +#[derive(Debug, Clone)] +pub struct IndexTreeReplay(Vec>); + +// #[cfg(feature = "serde")] +#[cfg(feature = "expose_benchmarking")] +impl IndexTreeReplay { + pub fn from_json(json: &[u8]) -> Self { + Self(serde_json::from_slice(json).unwrap()) + } + + pub fn replay(&self) { + let mut tree = IndexTree::new(); + + for action in self.0.iter() { + match action { + TreeCommand::GetEntry(lv) => { black_box(tree.get_entry(*lv)); }, + TreeCommand::SetRange(range, val) => tree.set_range(*range, *val), + TreeCommand::Clear => tree.clear(), + } + } + + black_box(tree); + } +} + diff --git a/crates/diamond-types-crdt/tests/fuzzer.rs b/crates/diamond-types-crdt/tests/fuzzer.rs index a3daf6b..8d4ba1e 100644 --- a/crates/diamond-types-crdt/tests/fuzzer.rs +++ b/crates/diamond-types-crdt/tests/fuzzer.rs @@ -38,7 +38,8 @@ fn make_random_change(doc: &mut ListCRDT, rope: Option<&mut JumpRope>, agent: Ag if let Some(rope) = rope { rope.insert(pos, content.as_str()); } - doc.local_insert(agent, pos, &content) + doc.local_insert(agent, pos, &content); + doc.check(false); } else { // Delete something let pos = rng.gen_range(0..doc_len); @@ -49,11 +50,11 @@ fn make_random_change(doc: &mut ListCRDT, rope: Option<&mut JumpRope>, agent: Ag if let Some(rope) = rope { rope.remove(pos..pos + span); } - doc.local_delete(agent, pos, span) + doc.local_delete(agent, pos, span); + doc.check(false); } // dbg!(&doc.markers); // doc.check(true); - doc.check(false); } #[test] @@ -88,6 +89,7 @@ fn random_single_replicate() { for _ in 0..100 { make_random_change(&mut doc, Some(&mut expected_content), agent, &mut rng); } + // println!("i {_i}"); let mut doc_2 = ListCRDT::new(); // dbg!(&doc.content_tree); @@ -122,6 +124,8 @@ fn run_fuzzer_iteration(seed: u64) { } for _i in 0..100 { + println!("i {_i}"); + // Generate some operations for _j in 0..5 { let doc_idx = rng.gen_range(0..docs.len()); @@ -157,8 +161,24 @@ fn run_fuzzer_iteration(seed: u64) { } } + // println!("======================="); + // a.debug_print_ids(); + // println!("---"); + // b.debug_print_ids(); + // println!("{} -> {}", a_idx, b_idx); a.replicate_into(b); + + + // println!("----------"); + // a.debug_print_ids(); + // println!("---"); + // b.debug_print_ids(); + + if _i == 1 { + println!(); + } + // println!("{} -> {}", b_idx, a_idx); b.replicate_into(a); @@ -175,6 +195,10 @@ fn run_fuzzer_iteration(seed: u64) { if a != b { println!("Docs {} and {} after {} iterations:", a_idx, b_idx, _i); + println!("Content '{:?}' / '{:?}'", + a.text_content.as_ref().map(|b| b.to_string()), + b.text_content.as_ref().map(|b| b.to_string()), + ); a.debug_print_ids(); println!("---"); b.debug_print_ids(); @@ -198,7 +222,7 @@ fn run_fuzzer_iteration(seed: u64) { #[test] fn fuzz_quick() { - run_fuzzer_iteration(0); + run_fuzzer_iteration(22); } #[test] diff --git a/crates/diamond-types-crdt/tests/realworld.rs b/crates/diamond-types-crdt/tests/realworld.rs index 685a3a4..4678d8c 100644 --- a/crates/diamond-types-crdt/tests/realworld.rs +++ b/crates/diamond-types-crdt/tests/realworld.rs @@ -74,12 +74,12 @@ fn replicate() { assert_eq!(local_doc, remote_doc); } -#[ignore] -#[test] -fn doc_to_position_updates() { - // let test_data = load_testing_data("../../benchmark_data/seph-blog1.json.gz"); - let test_data = load_testing_data("../../benchmark_data/sveltecomponent.json.gz"); - let local_doc = load_into_doc(test_data); - let patches = local_doc.iter_original_patches().collect::>(); - dbg!(patches.len()); -} \ No newline at end of file +// #[ignore] +// #[test] +// fn doc_to_position_updates() { +// // let test_data = load_testing_data("../../benchmark_data/seph-blog1.json.gz"); +// let test_data = load_testing_data("../../benchmark_data/sveltecomponent.json.gz"); +// let local_doc = load_into_doc(test_data); +// let patches = local_doc.iter_original_patches().collect::>(); +// dbg!(patches.len()); +// } \ No newline at end of file diff --git a/crates/diamond-types-crdt/tests/realworld_positional.rs b/crates/diamond-types-crdt/tests/realworld_positional.rs index c6d30df..de1ad4b 100644 --- a/crates/diamond-types-crdt/tests/realworld_positional.rs +++ b/crates/diamond-types-crdt/tests/realworld_positional.rs @@ -19,84 +19,84 @@ use diamond_types_crdt::root_id; // } // } -#[test] -#[ignore] -fn test_xml_trace_data() { - let mut doc = ListCRDT::new(); - // let d = load_nl_testing_data("/home/seph/src/crdt-benchmarks/xml/out/G1-3.json"); - // let d = load_nl_testing_data("/home/seph/src/crdt-benchmarks/xml/out/Serie-1.json"); - let d = load_nl_testing_data("/home/seph/src/crdt-benchmarks/xml/out/G1-1.json"); - - let mut positional: Vec = Vec::with_capacity(3); - let mut content = String::new(); - - // Sooooo the sequence numbers in the file don't line up with the way I use sequence numbers in - // DT. In the file they're linear from 1-n. Here they count from 0 and go up by the size of the - // change. - let mut seq_map: Vec> = vec![]; - - let convert_id = |id: &NLId, seq_map: &mut Vec>| -> RemoteId { - RemoteId { - agent: id.agent.to_string().into(), - seq: seq_map[id.agent as usize][id.seq - 1] - } - }; - - for op in d.ops { - let agent_str = op.id.agent.to_string(); - let agent_id = doc.get_or_create_agent_id(&agent_str); - let seq = doc.get_next_agent_seq(agent_id); - // doc.ge - - // dbg!(&op); - let id = RemoteId { - agent: agent_str.into(), - seq - }; - while seq_map.len() <= op.id.agent as usize { - seq_map.push(vec![]); - } - assert_eq!(seq_map[op.id.agent as usize].len(), op.id.seq as usize - 1); - let op_len = op.patch.1 + op.patch.2.chars().count(); - seq_map[op.id.agent as usize].push(id.seq + op_len - 1); - - // dbg!(&id); - - let mut parents = op.parents.iter().map(|p| convert_id(p, &mut seq_map)).collect::>(); - if parents.len() == 0 { - // The root operation(s). - parents.push(root_id()); - } - // dbg!(&parents); - - positional.clear(); - content.clear(); - - let TestPatch(pos, del_span, ins_content) = op.patch; - if del_span > 0 { - positional.push(PositionalComponent { - pos, - len: del_span, - content_known: false, - tag: InsDelTag::Del - }); - } - - if !ins_content.is_empty() { - positional.push(PositionalComponent { - pos, - len: ins_content.chars().count(), - content_known: true, - tag: InsDelTag::Ins - }); - content.push_str(ins_content.as_str()); - } - - doc.apply_remote_patch_at_version(&id, &parents, PositionalOpRef { - components: &positional, - content: content.as_str(), - }); - } - - println!("{}", doc.to_string()); -} \ No newline at end of file +// #[test] +// #[ignore] +// fn test_xml_trace_data() { +// let mut doc = ListCRDT::new(); +// // let d = load_nl_testing_data("/home/seph/src/crdt-benchmarks/xml/out/G1-3.json"); +// // let d = load_nl_testing_data("/home/seph/src/crdt-benchmarks/xml/out/Serie-1.json"); +// let d = load_nl_testing_data("/home/seph/src/crdt-benchmarks/xml/out/G1-1.json"); +// +// let mut positional: Vec = Vec::with_capacity(3); +// let mut content = String::new(); +// +// // Sooooo the sequence numbers in the file don't line up with the way I use sequence numbers in +// // DT. In the file they're linear from 1-n. Here they count from 0 and go up by the size of the +// // change. +// let mut seq_map: Vec> = vec![]; +// +// let convert_id = |id: &NLId, seq_map: &mut Vec>| -> RemoteId { +// RemoteId { +// agent: id.agent.to_string().into(), +// seq: seq_map[id.agent as usize][id.seq - 1] +// } +// }; +// +// for op in d.ops { +// let agent_str = op.id.agent.to_string(); +// let agent_id = doc.get_or_create_agent_id(&agent_str); +// let seq = doc.get_next_agent_seq(agent_id); +// // doc.ge +// +// // dbg!(&op); +// let id = RemoteId { +// agent: agent_str.into(), +// seq +// }; +// while seq_map.len() <= op.id.agent as usize { +// seq_map.push(vec![]); +// } +// assert_eq!(seq_map[op.id.agent as usize].len(), op.id.seq as usize - 1); +// let op_len = op.patch.1 + op.patch.2.chars().count(); +// seq_map[op.id.agent as usize].push(id.seq + op_len - 1); +// +// // dbg!(&id); +// +// let mut parents = op.parents.iter().map(|p| convert_id(p, &mut seq_map)).collect::>(); +// if parents.len() == 0 { +// // The root operation(s). +// parents.push(root_id()); +// } +// // dbg!(&parents); +// +// positional.clear(); +// content.clear(); +// +// let TestPatch(pos, del_span, ins_content) = op.patch; +// if del_span > 0 { +// positional.push(PositionalComponent { +// pos, +// len: del_span, +// content_known: false, +// tag: InsDelTag::Del +// }); +// } +// +// if !ins_content.is_empty() { +// positional.push(PositionalComponent { +// pos, +// len: ins_content.chars().count(), +// content_known: true, +// tag: InsDelTag::Ins +// }); +// content.push_str(ins_content.as_str()); +// } +// +// doc.apply_remote_patch_at_version(&id, &parents, PositionalOpRef { +// components: &positional, +// content: content.as_str(), +// }); +// } +// +// println!("{}", doc.to_string()); +// } \ No newline at end of file diff --git a/crates/run_on_old/Cargo.toml b/crates/run_on_old/Cargo.toml index 62d8e33..c1c8bee 100644 --- a/crates/run_on_old/Cargo.toml +++ b/crates/run_on_old/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] diamond-types = {path = "../..", features = ["ops_to_old"]} -diamond-types-crdt = {path = "../diamond-types-crdt"} +diamond-types-crdt = {path = "../diamond-types-crdt", features = []} smallvec = "2.0.0-alpha.6" rle = {path = "../rle", features = ["smallvec"]} criterion = { version = "0.5.1", features = ["html_reports"] } @@ -24,3 +24,4 @@ brotli-decompressor = "2.5.0" [features] memusage = ["trace-alloc", "trace-alloc/memusage", "dep:serde", "dep:serde_json"] bench = [] +stats = ["diamond-types-crdt/stats"] \ No newline at end of file diff --git a/crates/run_on_old/src/main.rs b/crates/run_on_old/src/main.rs index 19b0347..b49a135 100644 --- a/crates/run_on_old/src/main.rs +++ b/crates/run_on_old/src/main.rs @@ -1,8 +1,9 @@ #[cfg(feature = "memusage")] use std::collections::HashMap; +use std::hint::black_box; #[cfg(feature = "bench")] -use criterion::{BenchmarkId, black_box, Criterion}; +use criterion::{BenchmarkId, Criterion}; #[cfg(feature = "memusage")] use serde::Serialize; use smallvec::{smallvec, SmallVec}; @@ -174,8 +175,6 @@ pub fn get_txns_from_oplog(oplog: &ListOpLog) -> Vec { result } - -#[cfg(feature = "bench")] // const DATASETS: &[&str] = &["automerge-paper", "seph-blog1", "friendsforever", "clownschool", "node_nodecc", "git-makefile", "egwalker"]; // const DATASETS: &[&str] = &["automerge-paperx3", "seph-blog1x3", "node_nodeccx1", "friendsforeverx25", "clownschoolx25", "egwalkerx1", "git-makefilex2"]; const DATASETS: &[&str] = & ["S1", "S2", "S3", "C1", "C2", "A1", "A2"]; @@ -216,6 +215,31 @@ fn bench_process(c: &mut Criterion) { } } +#[cfg(feature = "stats")] +fn stats() { + // for &name in DATASETS { + // for &name in &["S1"] { + for &name in &["S1", "S2", "S3"] { + let txns = get_txns_from_file(&format!("../reg-paper/datasets/{}.dt", name)); + // dbg!(txns.len()); + // dbg!(txns.iter().map(|t| t.ops.len()).sum::()); + // dbg!(txns.iter() + // .map(|t| t.ops.iter().map(|o| o.len()).sum::()) + // .sum::()); + + // dbg!(txns.iter().take(10).collect::>()); + + // diamond_types_crdt::take_stats(); + let mut old_oplog = diamond_types_crdt::list::ListCRDT::new(); + for txn in txns.iter() { + old_oplog.apply_remote_txn(txn); + } + + let (hits, misses) = diamond_types_crdt::take_stats(); + println!("Trace {name}: Hits: {hits} misses {misses} / total {}", hits + misses); + } +} + #[cfg(feature = "memusage")] #[derive(Debug, Clone, Copy, Serialize)] struct MemUsage { @@ -267,6 +291,9 @@ fn main() { bench_process(&mut c); c.final_summary(); } + + #[cfg(feature = "stats")] + stats(); } // fn main() { diff --git a/src/ost/content_tree.rs b/src/ost/content_tree.rs index 8b38980..4f471de 100644 --- a/src/ost/content_tree.rs +++ b/src/ost/content_tree.rs @@ -1328,6 +1328,37 @@ impl ContentTree { } return (DeltaCursor(cursor, delta), pos); + } else if cursor.elem_idx > 0 { + // Try the previous item. + let leaf = &self.leaves[cursor.leaf_idx.0]; + let prev_elem = &leaf.children[cursor.elem_idx - 1]; + if let Some(actual_offset) = prev_elem.get_offset(id) { + // Ok. + if let Some(pos) = pos.as_mut() { + if item.takes_up_space::() { + pos.end -= cur_offset; + } + if item.takes_up_space::() { + pos.cur -= cur_offset; + } + if prev_elem.takes_up_space::() { + pos.end -= prev_elem.len() - actual_offset; + } + if prev_elem.takes_up_space::() { + pos.cur -= prev_elem.len() - actual_offset; + } + } + + cursor.elem_idx -= 1; + cursor.offset = actual_offset; + + if let Some(pos) = pos { + debug_assert_eq!(cursor.get_pos(self), pos); + } + + // cache_hit(); + return (DeltaCursor(cursor, delta), pos); + } } // Throw the old cursor away.