Skip to content

Commit

Permalink
Handle hashing list of struct arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
thinkharderdev committed Feb 22, 2024
1 parent cb3341f commit 425247e
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,33 @@ where
Ok(())
}

fn hash_struct_array(
array: &StructArray,
random_state: &RandomState,
hashes_buffer: &mut [u64],
) -> Result<()> {
let nulls = array.nulls();
let row_len = array.len();

let valid_row_indices: Vec<usize> = if let Some(nulls) = nulls {
nulls.valid_indices().collect()
} else {
(0..row_len).collect()
};

// Create hashes for each row that combines the hashes over all the column at that row.
let mut values_hashes = vec![0u64; row_len];
create_hashes(array.columns(), random_state, &mut values_hashes)?;

for i in valid_row_indices {
let hash = &mut hashes_buffer[i];
*hash = combine_hashes(*hash, values_hashes[i]);
}

Ok(())
}


/// Test version of `create_hashes` that produces the same value for
/// all hashes (to test collisions)
///
Expand Down Expand Up @@ -327,6 +354,10 @@ pub fn create_hashes<'a>(
array => hash_dictionary(array, random_state, hashes_buffer, rehash)?,
_ => unreachable!()
}
DataType::Struct(_) => {
let array = as_struct_array(array);
hash_struct_array(array, random_state, hashes_buffer)?;
}
DataType::List(_) => {
let array = as_list_array(array);
hash_list_array(array, random_state, hashes_buffer)?;
Expand Down

0 comments on commit 425247e

Please sign in to comment.