Skip to content

Commit f2b5349

Browse files
committed
address cr.
1 parent ce1cdc8 commit f2b5349

File tree

1 file changed

+7
-4
lines changed
  • datafusion/physical-plan/src/aggregates/group_values/single_group_by

1 file changed

+7
-4
lines changed

datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,13 @@ hash_float!(f16, f32, f64);
8181
pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
8282
/// The data type of the output array
8383
data_type: DataType,
84-
/// Stores the group index based on the hash of its value
84+
/// Stores the `(group_index, hash)` based on the hash of its value
85+
///
86+
/// We also store `hash` is for reducing cost of rehashing. Such cost
87+
/// is obvious in high cardinality group by situation.
88+
/// More details can see:
89+
/// <https://github.com/apache/datafusion/issues/15961>
8590
///
86-
/// We don't store the hashes as hashing fixed width primitives
87-
/// is fast enough for this not to benefit performance
8891
map: HashTable<(usize, u64)>,
8992
/// The group index of the null value if any
9093
null_group: Option<usize>,
@@ -148,7 +151,7 @@ where
148151
}
149152

150153
fn size(&self) -> usize {
151-
self.map.capacity() * size_of::<usize>() + self.values.allocated_size()
154+
self.map.capacity() * size_of::<(usize, u64)>() + self.values.allocated_size()
152155
}
153156

154157
fn is_empty(&self) -> bool {

0 commit comments

Comments
 (0)