Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions vortex-duckdb/cpp/include/duckdb_vx/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,19 @@
extern "C" {
#endif

/// Slice to a dictionary vector.
/// Slice the vector to a new dictionary vector, using the current vector's values and
/// the provided selection vector.
///
/// A dictionary slice holds a strong reference to all memory it uses.
void duckdb_vx_vector_slice_to_dictionary(duckdb_vector ffi_vector, duckdb_selection_vector selection_vector,
idx_t selection_vector_length);

/// Turn vector into a dictionary vector. In contrast to `duckdb_vx_vector_slice_to_dictionary` this
/// call creates a dictionary that holds a strong reference to its children.
/// Creates a dictionary vector for a given values vector and selection vector.
///
/// A dictionary holds a strong reference to all memory it uses.
///
/// `dictionary` differs from `slice_to_dictionary` in that it initializes hash caching:
/// https://github.com/duckdb/duckdb/blob/0dcf633f603a629981d089202f93b9080cb1a3e9/src/common/types/vector.cpp#L293
void duckdb_vx_vector_dictionary(duckdb_vector ffi_vector, duckdb_vector ffi_dict, idx_t dictionary_size,
duckdb_selection_vector ffi_sel_vec, idx_t count);

Expand Down
54 changes: 52 additions & 2 deletions vortex-duckdb/src/duckdb/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ impl Vector {

/// Slice the vector to a new dictionary vector, using the current vector's values and
/// the provided selection vector.
///
/// A dictionary slice holds a strong reference to all memory it uses.
pub fn slice_to_dictionary(&mut self, sel_vec: SelectionVector, sel_vec_length: usize) {
unsafe {
cpp::duckdb_vx_vector_slice_to_dictionary(
Expand All @@ -63,8 +65,12 @@ impl Vector {
}
}

/// Turn vector into a dictionary vector. In contrast to `slice_to_dictionary` this
/// call creates a dictionary that holds a strong reference to its children.
/// Creates a dictionary vector for a given values vector and selection vector.
///
/// A dictionary holds a strong reference to all memory it uses.
///
/// `dictionary` differs from `slice_to_dictionary` in that it initializes hash caching.
/// See: <https://github.com/duckdb/duckdb/blob/0dcf633f603a629981d089202f93b9080cb1a3e9/src/common/types/vector.cpp#L293>
pub fn dictionary(
&self,
dict: &Vector,
Expand Down Expand Up @@ -562,4 +568,48 @@ mod tests {
assert!(validity.is_valid(62), "Row 62 should be valid");
assert!(validity.is_valid(65), "Row 65 should be valid");
}

#[test]
fn test_slice_to_dictionary() {
let len = 2;
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
let mut vector = Vector::with_capacity(logical_type, len);

let slice = unsafe { vector.as_slice_mut::<i32>(len) };
slice[0] = 10;
slice[1] = 20;

let mut sel_vec = SelectionVector::with_capacity(2);
let sel_slice = unsafe { sel_vec.as_slice_mut(2) };
sel_slice[0] = 1;
sel_slice[1] = 0;

vector.slice_to_dictionary(sel_vec, 2);
Copy link
Contributor

@connortsui20 connortsui20 Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would happen if the selection vec was also 010 like underneath? Still trying to wrap my head around the difference between dictionary and slice_to_dictionary...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're the same except for the initialization Dictionary does. Just stepped through the code in the debugger to double check.

void Vector::Dictionary(Vector &dict, idx_t dictionary_size, const SelectionVector &sel, idx_t count) {
	if (DictionaryVector::CanCacheHashes(dict.GetType()) && !dict.cached_hashes) {
		// Create an empty hash vector for this dictionary, potentially to be used for caching hashes later
		// This needs to happen here, as we need to add "cached_hashes" to the original input Vector "dict"
		dict.cached_hashes = make_buffer<VectorChildBuffer>(Vector(LogicalType::HASH, false, false, 0));
	}
	Reference(dict);
	Dictionary(dictionary_size, sel, count);
}

calls into

void Vector::Dictionary(idx_t dictionary_size, const SelectionVector &sel, idx_t count) {
	Slice(sel, count);
	if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
		buffer->Cast<DictionaryBuffer>().SetDictionarySize(dictionary_size);
	}
}

which in turn calls

void Vector::Slice(const SelectionVector &sel, idx_t count) {
    //...

vector.flatten(2);

assert_eq!(vector.as_slice_with_len::<i32>(2), &[20, 10]);
}

#[test]
fn test_dictionary() {
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);

let mut dict = Vector::with_capacity(logical_type.clone(), 2);
let dict_slice = unsafe { dict.as_slice_mut::<i32>(2) };
dict_slice[0] = 100;
dict_slice[1] = 200;

let vector = Vector::with_capacity(logical_type, 3);

let mut sel_vec = SelectionVector::with_capacity(3);
let sel_slice = unsafe { sel_vec.as_slice_mut(3) };
sel_slice[0] = 0;
sel_slice[1] = 1;
sel_slice[2] = 0;

vector.dictionary(&dict, 2, &sel_vec, 3);
vector.flatten(3);

assert_eq!(vector.as_slice_with_len::<i32>(3), &[100, 200, 100]);
}
}
Loading