Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions velox/exec/VectorHasher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,27 @@ uint64_t hashOne(DecodedVector& decoded, vector_size_t index) {
// Virtual function call for complex type.
return decoded.base()->hashValueAt(decoded.index(index));
}
// For double/float -0.0 and 0.0, folly::hasher will return the same value.
// but in presto, the return hash values are not equal. We should firstly
// cast double/float to longBits, then call folly::hasher to get hash value.
if constexpr (Kind == TypeKind::DOUBLE) {
double val = decoded.valueAt<double>(index);
int64_t* buf = reinterpret_cast<int64_t*>(&val);
// double -0.0 => *buf = -9223372036854775808
if (*buf == -9223372036854775808) {
uint64_t res = folly::hasher<int64_t>()(*buf);
return res;
}
}
if constexpr (Kind == TypeKind::REAL) {
float val = decoded.valueAt<float>(index);
int32_t* buf = reinterpret_cast<int32_t*>(&val);
// float -0.0 => *buf = -2147483648
if (*buf == -2147483648) {
uint64_t res = folly::hasher<int32_t>()(*buf);
return res;
}
}
// Inlined for scalars.
using T = typename KindToFlatVector<Kind>::HashRowType;
return folly::hasher<T>()(decoded.valueAt<T>(index));
Expand Down
34 changes: 34 additions & 0 deletions velox/exec/tests/AggregationTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3577,6 +3577,40 @@ TEST_F(AggregationTest, ignoreOrderBy) {
.assertResults("SELECT c0, sum(c1), avg(c1) FROM tmp GROUP BY 1");
}

TEST_F(AggregationTest, testGroupByDoubleNegativeZero) {
auto data = makeRowVector(
{makeFlatVector<double>({-0.0, 0.0}), makeFlatVector<int64_t>({1, 2})});

createDuckDbTable({data});

auto plan = PlanBuilder()
.values({data})
.partialAggregation({"c0"}, {"sum(c1)"})
.finalAggregation()
.planNode();

std::vector<RowVectorPtr> expected = {makeRowVector(
{makeFlatVector<double>({-0.0, 0.0}), makeFlatVector<int64_t>({1, 2})})};
test::assertQuery(plan, expected);
}

TEST_F(AggregationTest, testGroupByFloatNegativeZero) {
auto data = makeRowVector(
{makeFlatVector<float>({-0.0, 0.0}), makeFlatVector<int64_t>({1, 2})});

createDuckDbTable({data});

auto plan = PlanBuilder()
.values({data})
.partialAggregation({"c0"}, {"sum(c1)"})
.finalAggregation()
.planNode();

std::vector<RowVectorPtr> expected = {makeRowVector(
{makeFlatVector<float>({-0.0, 0.0}), makeFlatVector<int64_t>({1, 2})})};
test::assertQuery(plan, expected);
}

class TestAccumulator {
public:
~TestAccumulator() {
Expand Down
32 changes: 32 additions & 0 deletions velox/exec/tests/VectorHasherTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,38 @@ TEST_F(VectorHasherTest, flat) {
}
}

TEST_F(VectorHasherTest, testHashDoubleNegativeZero) {
auto hasher = exec::VectorHasher::create(DOUBLE(), 1);
ASSERT_EQ(hasher->channel(), 1);
ASSERT_EQ(hasher->typeKind(), TypeKind::DOUBLE);

auto vector = makeFlatVector<double>({-0.0, 0.0});

raw_vector<uint64_t> hashes(2);
std::fill(hashes.begin(), hashes.end(), 0);
SelectivityVector selectivityVector = SelectivityVector(2);
hasher->decode(*vector, selectivityVector);
hasher->hash(selectivityVector, false, hashes);
EXPECT_EQ(hashes[0], 4316648529147585864);
EXPECT_EQ(hashes[1], 0);
}

TEST_F(VectorHasherTest, testHashFloatNegativeZero) {
auto hasher = exec::VectorHasher::create(REAL(), 1);
ASSERT_EQ(hasher->channel(), 1);
ASSERT_EQ(hasher->typeKind(), TypeKind::REAL);

auto vector = makeFlatVector<float>({-0.0, 0.0});

raw_vector<uint64_t> hashes(2);
std::fill(hashes.begin(), hashes.end(), 0);
SelectivityVector selectivityVector = SelectivityVector(2);
hasher->decode(*vector, selectivityVector);
hasher->hash(selectivityVector, false, hashes);
EXPECT_EQ(hashes[0], 3269171733);
EXPECT_EQ(hashes[1], 0);
}

TEST_F(VectorHasherTest, nonNullConstant) {
auto hasher = exec::VectorHasher::create(INTEGER(), 1);
auto vector = BaseVector::createConstant(INTEGER(), 123, 100, pool());
Expand Down