Skip to content

Commit d18b455

Browse files
zuochunweizuochunweizhejiangxiaomai
committed
add decimal column reader support (oap-project#254)
add decimal column reader support. --------- Co-authored-by: zuochunwei <zuochunwei@meituan.com> Co-authored-by: zhejiangxiaomai <zhenhui.zhao@intel.com>
1 parent bbab8b3 commit d18b455

9 files changed

+731
-1
lines changed

velox/dwio/dwrf/common/FileMetadata.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,13 @@ TypeKind TypeWrapper::kind() const {
9292
return TypeKind::VARCHAR;
9393
case proto::orc::Type_Kind_DATE:
9494
return TypeKind::DATE;
95-
case proto::orc::Type_Kind_DECIMAL:
95+
case proto::orc::Type_Kind_DECIMAL: {
96+
if (orcPtr()->precision() <= 18) {
97+
return TypeKind::SHORT_DECIMAL;
98+
} else {
99+
return TypeKind::LONG_DECIMAL;
100+
}
101+
}
96102
case proto::orc::Type_Kind_CHAR:
97103
case proto::orc::Type_Kind_TIMESTAMP_INSTANT:
98104
DWIO_RAISE(

velox/dwio/dwrf/reader/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ add_library(
2727
SelectiveStringDirectColumnReader.cpp
2828
SelectiveStringDictionaryColumnReader.cpp
2929
SelectiveTimestampColumnReader.cpp
30+
SelectiveShortDecimalColumnReader.cpp
31+
SelectiveLongDecimalColumnReader.cpp
3032
SelectiveStructColumnReader.cpp
3133
SelectiveRepeatedColumnReader.cpp
3234
StripeDictionaryCache.cpp

velox/dwio/dwrf/reader/DwrfReader.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,12 @@ std::optional<size_t> DwrfRowReader::estimatedRowSizeHelper(
518518
}
519519
return totalEstimate;
520520
}
521+
case TypeKind::SHORT_DECIMAL: {
522+
return valueCount * sizeof(uint64_t);
523+
}
524+
case TypeKind::LONG_DECIMAL: {
525+
return valueCount * sizeof(uint128_t);
526+
}
521527
default:
522528
return std::nullopt;
523529
}

velox/dwio/dwrf/reader/ReaderBase.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,12 @@ std::shared_ptr<const Type> ReaderBase::convertType(
314314
// child doesn't hold.
315315
return ROW(std::move(names), std::move(tl));
316316
}
317+
case TypeKind::LONG_DECIMAL:
318+
return LONG_DECIMAL(
319+
type.getOrcPtr()->precision(), type.getOrcPtr()->scale());
320+
case TypeKind::SHORT_DECIMAL:
321+
return SHORT_DECIMAL(
322+
type.getOrcPtr()->precision(), type.getOrcPtr()->scale());
317323
default:
318324
DWIO_RAISE("Unknown type kind");
319325
}

velox/dwio/dwrf/reader/SelectiveDwrfReader.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
#include "velox/dwio/dwrf/reader/SelectiveFloatingPointColumnReader.h"
2323
#include "velox/dwio/dwrf/reader/SelectiveIntegerDictionaryColumnReader.h"
2424
#include "velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h"
25+
#include "velox/dwio/dwrf/reader/SelectiveLongDecimalColumnReader.h"
2526
#include "velox/dwio/dwrf/reader/SelectiveRepeatedColumnReader.h"
27+
#include "velox/dwio/dwrf/reader/SelectiveShortDecimalColumnReader.h"
2628
#include "velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h"
2729
#include "velox/dwio/dwrf/reader/SelectiveStringDirectColumnReader.h"
2830
#include "velox/dwio/dwrf/reader/SelectiveStructColumnReader.h"
@@ -126,6 +128,12 @@ std::unique_ptr<SelectiveColumnReader> SelectiveDwrfReader::build(
126128
case TypeKind::TIMESTAMP:
127129
return std::make_unique<SelectiveTimestampColumnReader>(
128130
requestedType, params, scanSpec);
131+
case TypeKind::SHORT_DECIMAL:
132+
return std::make_unique<SelectiveShortDecimalColumnReader>(
133+
requestedType, dataType->type, params, scanSpec);
134+
case TypeKind::LONG_DECIMAL:
135+
return std::make_unique<SelectiveLongDecimalColumnReader>(
136+
requestedType, dataType->type, params, scanSpec);
129137
default:
130138
DWIO_RAISE(
131139
"buildReader unhandled type: " +
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "velox/dwio/dwrf/reader/SelectiveLongDecimalColumnReader.h"
18+
#include "velox/dwio/common/BufferUtil.h"
19+
#include "velox/dwio/dwrf/common/DecoderUtil.h"
20+
#include "velox/dwio/dwrf/reader/SelectiveShortDecimalColumnReader.h"
21+
22+
namespace facebook::velox::dwrf {
23+
24+
using namespace dwio::common;
25+
26+
void SelectiveLongDecimalColumnReader::read(
27+
vector_size_t offset,
28+
RowSet rows,
29+
const uint64_t* incomingNulls) {
30+
// because scale's type is int64_t
31+
prepareRead<int64_t>(offset, rows, incomingNulls);
32+
33+
bool isDense = rows.back() == rows.size() - 1;
34+
velox::common::Filter* filter =
35+
scanSpec_->filter() ? scanSpec_->filter() : &alwaysTrue();
36+
37+
if (scanSpec_->keepValues()) {
38+
if (scanSpec_->valueHook()) {
39+
if (isDense) {
40+
processValueHook<true>(rows, scanSpec_->valueHook());
41+
} else {
42+
processValueHook<false>(rows, scanSpec_->valueHook());
43+
}
44+
return;
45+
}
46+
47+
if (isDense) {
48+
processFilter<true>(filter, ExtractToReader(this), rows);
49+
} else {
50+
processFilter<false>(filter, ExtractToReader(this), rows);
51+
}
52+
} else {
53+
if (isDense) {
54+
processFilter<true>(filter, DropValues(), rows);
55+
} else {
56+
processFilter<false>(filter, DropValues(), rows);
57+
}
58+
}
59+
}
60+
61+
namespace {
62+
void scaleInt128(int128_t& value, uint32_t scale, uint32_t currentScale) {
63+
if (scale > currentScale) {
64+
while (scale > currentScale) {
65+
uint32_t scaleAdjust = std::min(
66+
SelectiveShortDecimalColumnReader::MAX_PRECISION_64,
67+
scale - currentScale);
68+
value *= SelectiveShortDecimalColumnReader::POWERS_OF_TEN[scaleAdjust];
69+
currentScale += scaleAdjust;
70+
}
71+
} else if (scale < currentScale) {
72+
int128_t remainder;
73+
while (currentScale > scale) {
74+
uint32_t scaleAdjust = std::min(
75+
SelectiveShortDecimalColumnReader::MAX_PRECISION_64,
76+
currentScale - scale);
77+
// TODO: YYM
78+
// value =
79+
// value.divide(SelectiveShortDecimalColumnReader::POWERS_OF_TEN[scaleAdjust],
80+
// remainder);
81+
currentScale -= scaleAdjust;
82+
}
83+
}
84+
}
85+
} // namespace
86+
87+
void SelectiveLongDecimalColumnReader::getValues(
88+
RowSet rows,
89+
VectorPtr* result) {
90+
auto nullsPtr = nullsInReadRange_
91+
? (returnReaderNulls_ ? nullsInReadRange_->as<uint64_t>()
92+
: rawResultNulls_)
93+
: nullptr;
94+
95+
auto decimalValues =
96+
AlignedBuffer::allocate<UnscaledLongDecimal>(numValues_, &memoryPool_);
97+
auto rawDecimalValues = decimalValues->asMutable<UnscaledLongDecimal>();
98+
99+
auto scales = scaleBuffer_->as<int64_t>();
100+
auto values = values_->as<int128_t>();
101+
102+
// transfer to UnscaledLongDecimal
103+
for (vector_size_t i = 0; i < numValues_; i++) {
104+
if (!nullsPtr || !bits::isBitNull(nullsPtr, i)) {
105+
int32_t currentScale = scales[i];
106+
int128_t value = values[i];
107+
108+
scaleInt128(value, scale_, currentScale);
109+
110+
rawDecimalValues[i] = UnscaledLongDecimal(value);
111+
}
112+
}
113+
114+
values_ = decimalValues;
115+
rawValues_ = values_->asMutable<char>();
116+
getFlatValues<UnscaledLongDecimal, UnscaledLongDecimal>(
117+
rows, result, type_, true);
118+
}
119+
120+
} // namespace facebook::velox::dwrf

0 commit comments

Comments
 (0)