Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions c++/src/Statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,13 @@ namespace orc {

ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
}

BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const proto::ColumnStatistics& pb,
const StatContext& statContext) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (pb.has_binarystatistics() && statContext.correctStats) {
_stats.setHasTotalLength(pb.binarystatistics().has_sum());
_stats.setTotalLength(static_cast<uint64_t>(pb.binarystatistics().sum()));
Expand All @@ -197,7 +197,7 @@ namespace orc {
BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const proto::ColumnStatistics& pb,
const StatContext& statContext) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (pb.has_bucketstatistics() && statContext.correctStats) {
_hasCount = true;
_trueCount = pb.bucketstatistics().count(0);
Expand All @@ -210,7 +210,7 @@ namespace orc {
DateColumnStatisticsImpl::DateColumnStatisticsImpl(const proto::ColumnStatistics& pb,
const StatContext& statContext) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (!pb.has_datestatistics() || !statContext.correctStats) {
// hasMinimum_ is false by default;
// hasMaximum_ is false by default;
Expand All @@ -227,7 +227,7 @@ namespace orc {
DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const proto::ColumnStatistics& pb,
const StatContext& statContext) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (pb.has_decimalstatistics() && statContext.correctStats) {
const proto::DecimalStatistics& stats = pb.decimalstatistics();
_stats.setHasMinimum(stats.has_minimum());
Expand All @@ -242,7 +242,7 @@ namespace orc {

DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (!pb.has_doublestatistics()) {
_stats.setMinimum(0);
_stats.setMaximum(0);
Expand All @@ -261,7 +261,7 @@ namespace orc {

IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (!pb.has_intstatistics()) {
_stats.setMinimum(0);
_stats.setMaximum(0);
Expand All @@ -281,7 +281,7 @@ namespace orc {
StringColumnStatisticsImpl::StringColumnStatisticsImpl(const proto::ColumnStatistics& pb,
const StatContext& statContext) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (!pb.has_stringstatistics() || !statContext.correctStats) {
_stats.setTotalLength(0);
} else {
Expand All @@ -299,7 +299,7 @@ namespace orc {
TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const proto::ColumnStatistics& pb,
const StatContext& statContext) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (!pb.has_timestampstatistics() || !statContext.correctStats) {
_stats.setMinimum(0);
_stats.setMaximum(0);
Expand Down Expand Up @@ -365,7 +365,7 @@ namespace orc {
CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl(
const proto::ColumnStatistics& pb) {
_stats.setNumberOfValues(pb.numberofvalues());
_stats.setHasNull(pb.hasnull());
_stats.setHasNull(pb.has_hasnull() ? pb.hasnull() : true);
if (!pb.has_collectionstatistics()) {
_stats.setMinimum(0);
_stats.setMaximum(0);
Expand Down
30 changes: 18 additions & 12 deletions c++/src/sargs/PredicateLeaf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -390,10 +390,15 @@ namespace orc {

DIAGNOSTIC_POP

static bool col_stats_hasnull(const proto::ColumnStatistics& stats) {
// for foward compatibility, if hasnull is not set, assume that the column has nulls
return stats.has_hasnull() ? stats.hasnull() : true;
}

static TruthValue evaluateBoolPredicate(const PredicateLeaf::Operator op,
const std::vector<Literal>& literals,
const proto::ColumnStatistics& stats) {
bool hasNull = stats.hasnull();
bool hasNull = col_stats_hasnull(stats);
if (!stats.has_bucketstatistics() || stats.bucketstatistics().count_size() == 0) {
// does not have bool stats
return hasNull ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
Expand Down Expand Up @@ -513,7 +518,7 @@ namespace orc {
colStats.intstatistics().has_maximum()) {
const auto& stats = colStats.intstatistics();
result = evaluatePredicateRange(mOperator, literal2Long(mLiterals), stats.minimum(),
stats.maximum(), colStats.hasnull());
stats.maximum(), col_stats_hasnull(colStats));
}
break;
}
Expand All @@ -522,10 +527,10 @@ namespace orc {
colStats.doublestatistics().has_maximum()) {
const auto& stats = colStats.doublestatistics();
if (!std::isfinite(stats.sum())) {
result = colStats.hasnull() ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
result = col_stats_hasnull(colStats) ? TruthValue::YES_NO_NULL : TruthValue::YES_NO;
} else {
result = evaluatePredicateRange(mOperator, literal2Double(mLiterals), stats.minimum(),
stats.maximum(), colStats.hasnull());
stats.maximum(), col_stats_hasnull(colStats));
}
}
break;
Expand All @@ -536,7 +541,7 @@ namespace orc {
colStats.stringstatistics().has_maximum()) {
const auto& stats = colStats.stringstatistics();
result = evaluatePredicateRange(mOperator, literal2String(mLiterals), stats.minimum(),
stats.maximum(), colStats.hasnull());
stats.maximum(), col_stats_hasnull(colStats));
}
break;
}
Expand All @@ -545,7 +550,7 @@ namespace orc {
colStats.datestatistics().has_maximum()) {
const auto& stats = colStats.datestatistics();
result = evaluatePredicateRange(mOperator, literal2Date(mLiterals), stats.minimum(),
stats.maximum(), colStats.hasnull());
stats.maximum(), col_stats_hasnull(colStats));
}
break;
}
Expand All @@ -564,7 +569,7 @@ namespace orc {
stats.maximumutc() / 1000,
static_cast<int32_t>((stats.maximumutc() % 1000) * 1000000) + maxNano);
result = evaluatePredicateRange(mOperator, literal2Timestamp(mLiterals), minTimestamp,
maxTimestamp, colStats.hasnull());
maxTimestamp, col_stats_hasnull(colStats));
}
break;
}
Expand All @@ -574,7 +579,7 @@ namespace orc {
const auto& stats = colStats.decimalstatistics();
result = evaluatePredicateRange(mOperator, literal2Decimal(mLiterals),
Decimal(stats.minimum()), Decimal(stats.maximum()),
colStats.hasnull());
col_stats_hasnull(colStats));
}
break;
}
Expand All @@ -589,7 +594,7 @@ namespace orc {
}

// make sure null literal is respected for IN operator
if (mOperator == Operator::IN && colStats.hasnull()) {
if (mOperator == Operator::IN && col_stats_hasnull(colStats)) {
for (const auto& literal : mLiterals) {
if (literal.isNull()) {
result = TruthValue::YES_NO_NULL;
Expand Down Expand Up @@ -698,20 +703,21 @@ namespace orc {
}
}

bool allNull = colStats.hasnull() && colStats.numberofvalues() == 0;
bool allNull = col_stats_hasnull(colStats) && colStats.numberofvalues() == 0;
if (mOperator == Operator::IS_NULL ||
((mOperator == Operator::EQUALS || mOperator == Operator::NULL_SAFE_EQUALS) &&
mLiterals.at(0).isNull())) {
// IS_NULL operator does not need to check min/max stats and bloom filter
return allNull ? TruthValue::YES : (colStats.hasnull() ? TruthValue::YES_NO : TruthValue::NO);
return allNull ? TruthValue::YES
: (col_stats_hasnull(colStats) ? TruthValue::YES_NO : TruthValue::NO);
} else if (allNull) {
// if we don't have any value, everything must have been null
return TruthValue::IS_NULL;
}

TruthValue result = evaluatePredicateMinMax(colStats);
if (shouldEvaluateBloomFilter(mOperator, result, bloomFilter)) {
return evaluatePredicateBloomFiter(bloomFilter, colStats.hasnull());
return evaluatePredicateBloomFiter(bloomFilter, col_stats_hasnull(colStats));
} else {
return result;
}
Expand Down