Skip to content

Commit da99378

Browse files
zuochunweizhejiangxiaomai
authored andcommitted
[ORC] Add max/min support for ORC decimal reader (#301)
1 parent fb6dd42 commit da99378

File tree

9 files changed

+120
-13
lines changed

9 files changed

+120
-13
lines changed

velox/common/memory/MallocAllocator.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#pragma once
1818

19+
#include "velox/common/memory/Memory.h"
1920
#include "velox/common/memory/MemoryAllocator.h"
2021

2122
namespace facebook::velox::memory {
@@ -25,7 +26,10 @@ class MallocAllocator : public MemoryAllocator {
2526
MallocAllocator();
2627

2728
~MallocAllocator() override {
28-
VELOX_CHECK((numAllocated_ == 0) && (numMapped_ == 0), "{}", toString());
29+
if (numAllocated_ != 0 || numMapped_ != 0) {
30+
VELOX_MEM_LOG(WARNING)
31+
<< "Unreleased allocation detected: " << toString();
32+
}
2933
}
3034

3135
Kind kind() const override {

velox/common/memory/MemoryAllocator.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#include <numeric>
2222

2323
#include "velox/common/base/BitUtil.h"
24-
#include "velox/common/memory/Memory.h"
2524

2625
namespace facebook::velox::memory {
2726

velox/dwio/common/SelectiveColumnReader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,10 @@ namespace facebook::velox::dwio::common {
634634
// Template parameter to indicate no hook in fast scan path. This is
635635
// referenced in decoders, thus needs to be declared in a header.
636636
struct NoHook : public ValueHook {
637+
std::string toString() const override {
638+
return "NoHook";
639+
}
640+
637641
void addValue(vector_size_t /*row*/, const void* FOLLY_NULLABLE /*value*/)
638642
override {}
639643
};

velox/dwio/dwrf/reader/SelectiveLongDecimalColumnReader.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,27 @@ class SelectiveLongDecimalColumnReader
115115
private:
116116
template <bool dense>
117117
void processValueHook(RowSet rows, ValueHook* hook) {
118-
VELOX_FAIL("TODO: orc long decimal process ValueHook");
118+
switch (hook->kind()) {
119+
case aggregate::AggregationHook::kLongDecimalMax:
120+
readHelper<dense, velox::common::AlwaysTrue>(
121+
&dwio::common::alwaysTrue(),
122+
rows,
123+
dwio::common::ExtractToHook<
124+
aggregate::MinMaxHook<int128_t, false>>(hook));
125+
break;
126+
case aggregate::AggregationHook::kLongDecimalMin:
127+
readHelper<dense, velox::common::AlwaysTrue>(
128+
&dwio::common::alwaysTrue(),
129+
rows,
130+
dwio::common::ExtractToHook<
131+
aggregate::MinMaxHook<int128_t, true>>(hook));
132+
break;
133+
default:
134+
readHelper<dense, velox::common::AlwaysTrue>(
135+
&dwio::common::alwaysTrue(),
136+
rows,
137+
dwio::common::ExtractToGenericHook(hook));
138+
}
119139
}
120140

121141
template <bool dense, typename ExtractValues>

velox/dwio/dwrf/reader/SelectiveShortDecimalColumnReader.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,27 @@ class SelectiveShortDecimalColumnReader
121121
private:
122122
template <bool dense>
123123
void processValueHook(RowSet rows, ValueHook* hook) {
124-
VELOX_FAIL("TODO: orc short decimal process ValueHook");
124+
switch (hook->kind()) {
125+
case aggregate::AggregationHook::kShortDecimalMax:
126+
readHelper<dense, velox::common::AlwaysTrue>(
127+
&dwio::common::alwaysTrue(),
128+
rows,
129+
dwio::common::ExtractToHook<
130+
aggregate::MinMaxHook<int64_t, false>>(hook));
131+
break;
132+
case aggregate::AggregationHook::kShortDecimalMin:
133+
readHelper<dense, velox::common::AlwaysTrue>(
134+
&dwio::common::alwaysTrue(),
135+
rows,
136+
dwio::common::ExtractToHook<
137+
aggregate::MinMaxHook<int64_t, true>>(hook));
138+
break;
139+
default:
140+
readHelper<dense, velox::common::AlwaysTrue>(
141+
&dwio::common::alwaysTrue(),
142+
rows,
143+
dwio::common::ExtractToGenericHook(hook));
144+
}
125145
}
126146

127147
template <bool dense, typename ExtractValues>

velox/exec/AggregationHook.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ class AggregationHook : public ValueHook {
3535
static constexpr Kind kFloatMin = 8;
3636
static constexpr Kind kDoubleMax = 9;
3737
static constexpr Kind kDoubleMin = 10;
38+
static constexpr Kind kShortDecimalMax = 11;
39+
static constexpr Kind kShortDecimalMin = 12;
40+
static constexpr Kind kLongDecimalMax = 13;
41+
static constexpr Kind kLongDecimalMin = 14;
3842

3943
// Make null behavior known at compile time. This is useful when
4044
// templating a column decoding loop with a hook.
@@ -53,6 +57,12 @@ class AggregationHook : public ValueHook {
5357
groups_(groups),
5458
numNulls_(numNulls) {}
5559

60+
std::string toString() const override {
61+
char buf[256];
62+
sprintf(buf, "AggregationHook kind:%d", (int)kind());
63+
return buf;
64+
}
65+
5666
bool acceptsNulls() const override final {
5767
return false;
5868
}
@@ -119,6 +129,17 @@ class SumHook final : public AggregationHook {
119129
uint64_t* numNulls)
120130
: AggregationHook(offset, nullByte, nullMask, groups, numNulls) {}
121131

132+
std::string toString() const override {
133+
char buf[256];
134+
sprintf(
135+
buf,
136+
"SumHook kind:%d TValue:%s TAggregate:%s",
137+
(int)kind(),
138+
typeid(TValue).name(),
139+
typeid(TAggregate).name());
140+
return buf;
141+
}
142+
122143
Kind kind() const override {
123144
if (std::is_same_v<TAggregate, double>) {
124145
if (std::is_same_v<TValue, double>) {
@@ -160,6 +181,18 @@ class SimpleCallableHook final : public AggregationHook {
160181
: AggregationHook(offset, nullByte, nullMask, groups, numNulls),
161182
updateSingleValue_(updateSingleValue) {}
162183

184+
std::string toString() const override {
185+
char buf[256];
186+
sprintf(
187+
buf,
188+
"SimpleCallableHook kind:%d TValue:%s TAggregate:%s UpdateSingleValue:%s",
189+
(int)kind(),
190+
typeid(TValue).name(),
191+
typeid(TAggregate).name(),
192+
typeid(UpdateSingleValue).name());
193+
return buf;
194+
}
195+
163196
Kind kind() const override {
164197
return kGeneric;
165198
}
@@ -187,6 +220,17 @@ class MinMaxHook final : public AggregationHook {
187220
uint64_t* numNulls)
188221
: AggregationHook(offset, nullByte, nullMask, groups, numNulls) {}
189222

223+
std::string toString() const override {
224+
char buf[256];
225+
sprintf(
226+
buf,
227+
"MinMaxHook kind:%d T:%s isMin:%d",
228+
(int)kind(),
229+
typeid(T).name(),
230+
(int)isMin);
231+
return buf;
232+
}
233+
190234
Kind kind() const override {
191235
if (isMin) {
192236
if (std::is_same_v<T, int64_t>) {
@@ -198,6 +242,12 @@ class MinMaxHook final : public AggregationHook {
198242
if (std::is_same_v<T, double>) {
199243
return kDoubleMin;
200244
}
245+
if (std::is_same_v<T, int64_t>) {
246+
return kShortDecimalMin;
247+
}
248+
if (std::is_same_v<T, int128_t>) {
249+
return kLongDecimalMin;
250+
}
201251
} else {
202252
if (std::is_same_v<T, int64_t>) {
203253
return kBigintMax;
@@ -208,6 +258,12 @@ class MinMaxHook final : public AggregationHook {
208258
if (std::is_same_v<T, double>) {
209259
return kDoubleMax;
210260
}
261+
if (std::is_same_v<T, int64_t>) {
262+
return kShortDecimalMax;
263+
}
264+
if (std::is_same_v<T, int128_t>) {
265+
return kLongDecimalMax;
266+
}
211267
}
212268
return kGeneric;
213269
}

velox/functions/sparksql/DecimalArithmetic.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,15 @@ class Multiply {
519519
// Since deltaScale is greater than zero, result can now be at most
520520
// ((2^64 - 1) * (2^63 - 1)) / 10, which is less than
521521
// BasicDecimal128::kMaxValue, so there cannot be any overflow.
522-
r = res / R(DecimalUtil::kPowersOfTen[deltaScale]);
522+
DecimalUtilOp::divideWithRoundUp<R, R, R>(
523+
r,
524+
res,
525+
R(velox::DecimalUtil::kPowersOfTen[deltaScale]),
526+
false,
527+
0,
528+
0,
529+
overflow);
530+
VELOX_DCHECK(!overflow);
523531
} else {
524532
// We are multiplying decimal(38, 38) by decimal(38, 38). The result
525533
// should be a

velox/substrait/tests/Substrait2VeloxPlanConversionTest.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -304,14 +304,6 @@ TEST_F(Substrait2VeloxPlanConversionTest, ifthenTest) {
304304

305305
// Convert to Velox PlanNode.
306306
auto planNode = planConverter_->toVeloxPlan(substraitPlan);
307-
ASSERT_EQ(
308-
"-- Project[expressions: ] ->\n"
309-
" -- TableScan[table: hive_table, range filters: [(hd_demo_sk, Filter(IsNotNull,"
310-
" deterministic, null not allowed)), (hd_vehicle_count, BigintRange: [1, 9223372036854775807] no nulls)],"
311-
" remaining filter: (and(or(equalto(\"hd_buy_potential\",\">10000\"),equalto(\"hd_buy_potential\",\"unknown\")),"
312-
"if(greaterthan(\"hd_vehicle_count\",0),greaterthan(divide(cast \"hd_dep_count\" as DOUBLE,"
313-
"cast \"hd_vehicle_count\" as DOUBLE),1.2))))] -> n0_0:BIGINT, n0_1:VARCHAR, n0_2:BIGINT, n0_3:BIGINT\n",
314-
planNode->toString(true, true));
315307
ASSERT_EQ(
316308
"-- Project[expressions: ] -> \n "
317309
"-- TableScan[table: hive_table, range filters: [(hd_demo_sk, Filter(IsNotNull, deterministic, null not allowed)),"

velox/vector/LazyVector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ class ValueHook {
4545

4646
virtual ~ValueHook() = default;
4747

48+
virtual std::string toString() const {
49+
return "ValueHook";
50+
}
51+
4852
virtual bool acceptsNulls() const {
4953
return false;
5054
}

0 commit comments

Comments
 (0)