Skip to content

YQL-17502 Date32 from/to string conversion #960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ydb/library/yql/core/sql_types/simple_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ std::optional<std::string_view> LookupSimpleTypeBySqlAlias(const std::string_vie
{"tzdate", "TzDate"},
{"tzdatetime", "TzDatetime"},
{"tztimestamp", "TzTimestamp"},

{"date32", "Date32"},
};

// new types (or aliases) should be added here
Expand Down
3 changes: 2 additions & 1 deletion ydb/library/yql/core/yql_expr_type_annotation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5112,7 +5112,8 @@ TMaybe<ui32> GetDataFixedSize(const TTypeAnnotationNode* typeAnnotation) {
return 2;
}

if (EDataSlot::Datetime == dataSlot || EDataSlot::Uint32 == dataSlot || EDataSlot::Int32 == dataSlot
if (EDataSlot::Datetime == dataSlot || EDataSlot::Date32 == dataSlot
|| EDataSlot::Uint32 == dataSlot || EDataSlot::Int32 == dataSlot
|| EDataSlot::Float == dataSlot) {
return 4;
}
Expand Down
1 change: 1 addition & 0 deletions ydb/library/yql/core/yql_opt_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1570,6 +1570,7 @@ ui64 GetTypeWeight(const TTypeAnnotationNode& type) {
case NUdf::EDataSlot::Int32:
case NUdf::EDataSlot::Uint32:
case NUdf::EDataSlot::Float:
case NUdf::EDataSlot::Date32:
case NUdf::EDataSlot::Datetime: return 4;

case NUdf::EDataSlot::TzDatetime: return 5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,14 @@ void RegisterWideToDateCastsImpl(IBuiltinFunctionRegistry& registry) {
RegisterFunctionImpl<TWideToShort<typename TInput::TLayout, typename TOutput::TLayout, UpperBound>, TUnaryArgsWithNullableResultOpt<TInput, TOutput, true>, TUnaryWrap>(registry, integral);
}

template <typename TInput, typename TOutput,
typename TInput::TLayout UpperBound = std::numeric_limits<typename TInput::TLayout>::max(),
typename TInput::TLayout LowerBound = std::numeric_limits<typename TInput::TLayout>::min()>
void RegisterWideToBigDateCastsImpl(IBuiltinFunctionRegistry& registry) {
RegisterFunctionImpl<TWideToShort<typename TInput::TLayout, typename TOutput::TLayout, UpperBound, LowerBound>, TUnaryArgsWithNullableResultOpt<TInput, TOutput, false>, TUnaryStub>(registry, integral);
RegisterFunctionImpl<TWideToShort<typename TInput::TLayout, typename TOutput::TLayout, UpperBound, LowerBound>, TUnaryArgsWithNullableResultOpt<TInput, TOutput, true>, TUnaryWrap>(registry, integral);
}

void RegisterWideToIntervalCasts(IBuiltinFunctionRegistry& registry) {
constexpr auto TimestampLimit = static_cast<i64>(NUdf::MAX_TIMESTAMP - 1ULL);
RegisterFunctionImpl<TWideToShort<i64, i64, TimestampLimit, -TimestampLimit>, TUnaryArgsWithNullableResultOpt<NUdf::TDataType<i64>, NUdf::TDataType<NUdf::TInterval>, false>, TUnaryStub>(registry, integral);
Expand Down Expand Up @@ -869,6 +877,13 @@ void RegisterWideToDatetimeCasts(IBuiltinFunctionRegistry& registry) {
RegisterWideToDateCastsImpl<NUdf::TDataType<ui64>, NUdf::TDataType<NUdf::TTzDatetime>, NUdf::MAX_DATETIME - 1U>(registry);
}

void RegisterWideToBigDateCasts(IBuiltinFunctionRegistry& registry) {
RegisterWideToBigDateCastsImpl<NUdf::TDataType<i32>, NUdf::TDataType<NUdf::TDate32>, NUdf::MAX_DATE32, NUdf::MIN_DATE32>(registry);
RegisterWideToBigDateCastsImpl<NUdf::TDataType<i64>, NUdf::TDataType<NUdf::TDate32>, NUdf::MAX_DATE32, NUdf::MIN_DATE32>(registry);
RegisterWideToDateCastsImpl<NUdf::TDataType<ui32>, NUdf::TDataType<NUdf::TDate32>, NUdf::MAX_DATE32>(registry);
RegisterWideToDateCastsImpl<NUdf::TDataType<ui64>, NUdf::TDataType<NUdf::TDate32>, NUdf::MAX_DATE32>(registry);
}

void RegisterWideToTimestampCasts(IBuiltinFunctionRegistry& registry) {
RegisterWideToDateCastsImpl<NUdf::TDataType<i8>, NUdf::TDataType<NUdf::TTimestamp>>(registry);
RegisterWideToDateCastsImpl<NUdf::TDataType<i16>, NUdf::TDataType<NUdf::TTimestamp>>(registry);
Expand Down Expand Up @@ -922,6 +937,11 @@ void RegisterWideToShortIntegralCasts(IBuiltinFunctionRegistry& registry) {
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TDate>, NUdf::TDataType<ui8>>(registry);
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TDate>, NUdf::TDataType<i16>>(registry);

RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TDate32>, NUdf::TDataType<i8>>(registry);
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TDate32>, NUdf::TDataType<ui8>>(registry);
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TDate32>, NUdf::TDataType<i16>>(registry);
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TDate32>, NUdf::TDataType<ui16>>(registry);

RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TTzDate>, NUdf::TDataType<i8>>(registry);
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TTzDate>, NUdf::TDataType<ui8>>(registry);
RegisterWideToShortCastsImpl<NUdf::TDataType<NUdf::TTzDate>, NUdf::TDataType<i16>>(registry);
Expand Down Expand Up @@ -958,6 +978,7 @@ void RegisterWideToShortIntegralCasts(IBuiltinFunctionRegistry& registry) {
RegisterWideToUnsignedCasts<NUdf::TDataType<NUdf::TInterval>>(registry);

RegisterWideToDateCasts(registry);
RegisterWideToBigDateCasts(registry);
RegisterWideToDatetimeCasts(registry);
RegisterWideToTimestampCasts(registry);
RegisterWideToIntervalCasts(registry);
Expand Down Expand Up @@ -1008,6 +1029,13 @@ void RegisterToDateConvert(IBuiltinFunctionRegistry& registry) {
RegisterConvert<NUdf::TDataType<i64>, NUdf::TDataType<NUdf::TInterval>>(registry);
}

void RegisterToBigDateConvert(IBuiltinFunctionRegistry& registry) {
RegisterConvert<NUdf::TDataType<i8>, NUdf::TDataType<NUdf::TDate32>>(registry);
RegisterConvert<NUdf::TDataType<ui8>, NUdf::TDataType<NUdf::TDate32>>(registry);
RegisterConvert<NUdf::TDataType<i16>, NUdf::TDataType<NUdf::TDate32>>(registry);
RegisterConvert<NUdf::TDataType<ui16>, NUdf::TDataType<NUdf::TDate32>>(registry);
}

template <typename TInput, typename TOutput, bool Tz = false>
void RegisterRescaleOpt(IBuiltinFunctionRegistry& registry) {
RegisterFunctionImpl<TDatetimeRescale<typename TInput::TLayout, typename TOutput::TLayout, Tz>, TUnaryArgsOpt<TInput, TOutput, false>, TUnaryStub>(registry, convert);
Expand Down Expand Up @@ -1115,6 +1143,7 @@ void RegisterConvert(IBuiltinFunctionRegistry& registry) {
RegisterStringConvert<NUdf::TDataType<NUdf::TJson>, NUdf::TDataType<NUdf::TUtf8>>(registry);

RegisterFromDateConvert<NUdf::TDataType<NUdf::TDate>>(registry);
RegisterFromDateConvert<NUdf::TDataType<NUdf::TDate32>>(registry);
RegisterFromDateConvert<NUdf::TDataType<NUdf::TDatetime>>(registry);
RegisterFromDateConvert<NUdf::TDataType<NUdf::TTimestamp>>(registry);
RegisterFromDateConvert<NUdf::TDataType<NUdf::TInterval>>(registry);
Expand All @@ -1125,6 +1154,7 @@ void RegisterConvert(IBuiltinFunctionRegistry& registry) {
RegisterTzDateimeConvert(registry);
RegisterDatetimeRescale(registry);
RegisterToDateConvert(registry);
RegisterToBigDateConvert(registry);

RegisterDecimalConvert(registry);

Expand Down
146 changes: 141 additions & 5 deletions ydb/library/yql/minikql/mkql_type_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,14 @@ bool IsValidValue(NUdf::EDataSlot type, const NUdf::TUnboxedValuePod& value) {
MKQL_ENSURE(false, "Incorrect data slot: " << (ui32)type);
}

bool IsLeapYear(ui32 year) {
bool IsLeapYear(i32 year) {
if (Y_UNLIKELY(year < 0)) {
++year;
}
bool isLeap = (year % 4 == 0);
if (year % 100 == 0) {
isLeap = year % 400 == 0;
}

return isLeap;
}

Expand Down Expand Up @@ -201,7 +203,7 @@ ui32 LeapDaysSinceEpoch(ui32 yearsSinceEpoch) {
return leapDaysCount;
}

void WriteDate(IOutputStream& out, ui32 year, ui32 month, ui32 day) {
void WriteDate(IOutputStream& out, i32 year, ui32 month, ui32 day) {
out << year << '-' << LeftPad(month, 2, '0') << '-' << LeftPad(day, 2, '0');
}

Expand All @@ -215,6 +217,17 @@ bool WriteDate(IOutputStream& out, ui16 value) {
return true;
}

bool WriteDate32(IOutputStream& out, i32 value) {
i32 year;
ui32 month, day;
if (!SplitDate32(value, year, month, day)) {
return false;
}

WriteDate(out, year, month, day);
return true;
}

void SplitTime(ui32 value, ui32& hour, ui32& min, ui32& sec) {
hour = value / 3600;
value -= hour * 3600;
Expand Down Expand Up @@ -392,6 +405,12 @@ NUdf::TUnboxedValuePod ValueToString(NUdf::EDataSlot type, NUdf::TUnboxedValuePo
}
break;

case NUdf::EDataSlot::Date32:
if (!WriteDate32(out, value.Get<i32>())) {
return NUdf::TUnboxedValuePod();
}
break;

case NUdf::EDataSlot::Datetime:
if (!WriteDatetime(out, value.Get<ui32>())) {
return NUdf::TUnboxedValuePod();
Expand Down Expand Up @@ -600,6 +619,9 @@ bool SplitDateUncached(ui16 value, ui32& year, ui32& month, ui32& day) {

namespace {

constexpr i32 SOLAR_CYCLE_DAYS = 146097;
constexpr i32 SOLAR_CYCLE_YEARS = 400;

class TDateTable {
public:
TDateTable() {
Expand All @@ -611,6 +633,24 @@ class TDateTable {
ui32 weekOfYear = 52;
ui32 weekOfYearIso8601 = 1;

auto yearDays = 0u;
for (auto i = 0u; i < Years_.size(); ++i) {
Years_[i] = yearDays;
yearDays += IsLeapYear(i + NUdf::MIN_YEAR) ? 366 : 365;
}
Y_ASSERT(yearDays == SOLAR_CYCLE_DAYS);

Months_[0] = 0;
LeapMonths_[0] = 0;
ui16 monthDays = 0;
ui16 leapMonthDays = 0;
for (auto month = 1u; month < Months_.size(); ++month) {
Months_[month] = monthDays;
LeapMonths_[month] = leapMonthDays;
monthDays += GetMonthLength(month, false);
leapMonthDays += GetMonthLength(month, true);
}

for (ui16 date = 0; date < Days_.size(); ++date) {
ui32 year, month, day;
Y_ABORT_UNLESS(SplitDateUncached(date, year, month, day));
Expand Down Expand Up @@ -649,6 +689,28 @@ class TDateTable {
return true;
}

bool SplitDate32(i32 value, i32& year, ui32& month, ui32& day) const {
auto solarCycles = value / SOLAR_CYCLE_DAYS;
value = value % SOLAR_CYCLE_DAYS;
if (Y_UNLIKELY(value < 0)) {
solarCycles -= 1;
value += SOLAR_CYCLE_DAYS;
}
auto y = std::upper_bound(Years_.cbegin(), Years_.cend(), value) - 1;
Y_ASSERT(y >= Years_.cbegin());
value -= *y;
year = NUdf::MIN_YEAR + SOLAR_CYCLE_YEARS * solarCycles + std::distance(Years_.cbegin(), y);
if (Y_UNLIKELY(year <= 0)) {
--year;
}
auto& months = IsLeapYear(year) ? LeapMonths_ : Months_;
auto m = std::upper_bound(months.cbegin() + 1, months.cend(), value) - 1;
Y_ASSERT(m >= months.cbegin());
month = std::distance(months.cbegin(), m);
day = 1 + value - *m;
return true;
}

bool GetDateOffset(ui32 year, ui32 month, ui32 day, ui16& value) const {
if (Y_UNLIKELY(year < NUdf::MIN_YEAR - 1U || year > NUdf::MAX_YEAR
|| (year == NUdf::MAX_YEAR && (day > 1U || month > 1U))
Expand Down Expand Up @@ -692,6 +754,31 @@ class TDateTable {
return true;
}

bool MakeDate32(i32 year, ui32 month, ui32 day, i32& value) const {
if (Y_UNLIKELY(year == 0 || year < NUdf::MIN_YEAR32 || year >= NUdf::MAX_YEAR32)) {
return false;
}
auto isLeap = IsLeapYear(year);
auto monthLength = GetMonthLength(month, isLeap);

if (Y_UNLIKELY(day < 1 || day > monthLength)) {
return false;
}

if (Y_UNLIKELY(year < 0)) {
year += 1;
}
year -= NUdf::MIN_YEAR;
if (Y_LIKELY(year%SOLAR_CYCLE_YEARS >= 0)) {
value = (year / SOLAR_CYCLE_YEARS) * SOLAR_CYCLE_DAYS + Years_[year % SOLAR_CYCLE_YEARS];
} else {
value = (year / SOLAR_CYCLE_YEARS - 1) * SOLAR_CYCLE_DAYS + Years_[SOLAR_CYCLE_YEARS + year % SOLAR_CYCLE_YEARS];
}
value += isLeap ? LeapMonths_[month] : Months_[month];
value += day - 1;
return true;
}

bool EnrichByOffset(ui16 value, ui32& dayOfYear, ui32& weekOfYear, ui32& weekOfYearIso8601, ui32& dayOfWeek) const {
if (Y_UNLIKELY(value >= Days_.size())) {
return false;
Expand Down Expand Up @@ -729,6 +816,9 @@ class TDateTable {

std::array<ui16, NUdf::MAX_YEAR - NUdf::MIN_YEAR + 1> YearsOffsets_; // start of linear date for each year
std::array<TDayInfo, NUdf::MAX_DATE + 2> Days_; // packed info for each date
std::array<ui32, SOLAR_CYCLE_YEARS> Years_; // start of linear date for each year in [1970, 2370] - solar cycle period
std::array<ui16, 13> Months_; // cumulative days count for months
std::array<ui16, 13> LeapMonths_; // cumulative days count for months in a leap year
};

}
Expand All @@ -737,10 +827,18 @@ bool SplitDate(ui16 value, ui32& year, ui32& month, ui32& day) {
return TDateTable::Instance().SplitDate(value, year, month, day);
}

bool SplitDate32(i32 value, i32& year, ui32& month, ui32& day) {
return TDateTable::Instance().SplitDate32(value, year, month, day);
}

bool MakeDate(ui32 year, ui32 month, ui32 day, ui16& value) {
return TDateTable::Instance().MakeDate(year, month, day, value);
}

bool MakeDate32(i32 year, ui32 month, ui32 day, i32& value) {
return TDateTable::Instance().MakeDate32(year, month, day, value);
}

bool SplitDatetime(ui32 value, ui32& year, ui32& month, ui32& day, ui32& hour, ui32& min, ui32& sec) {
if (value >= NUdf::MAX_DATETIME) {
return false;
Expand Down Expand Up @@ -988,6 +1086,45 @@ NUdf::TUnboxedValuePod ParseDate(NUdf::TStringRef buf) {
return NUdf::TUnboxedValuePod(value);
}

NUdf::TUnboxedValuePod ParseDate32(NUdf::TStringRef buf) {
ui32 year, month, day;
ui32 pos = 0;
bool beforeChrist = false;
if (pos < buf.Size()) {
char c = buf.Data()[pos];
if (c == '-') {
beforeChrist = true;
++pos;
} else if (c == '+') {
++pos;
}
}

if (!ParseNumber(pos, buf, year, 6) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}
i32 iyear = beforeChrist ? -year : year;

// skip '-'
++pos;
if (!ParseNumber(pos, buf, month, 2) || pos == buf.Size() || buf.Data()[pos] != '-') {
return NUdf::TUnboxedValuePod();
}

// skip '-'
++pos;
if (!ParseNumber(pos, buf, day, 2) || pos != buf.Size()) {
return NUdf::TUnboxedValuePod();
}

i32 value;
if (Y_LIKELY(MakeDate32(iyear, month, day, value))) {
return NUdf::TUnboxedValuePod(value);
}

return NUdf::TUnboxedValuePod();
}

NUdf::TUnboxedValuePod ParseTzDate(NUdf::TStringRef str) {
TStringBuf full = str;
TStringBuf buf;
Expand Down Expand Up @@ -1705,8 +1842,7 @@ NUdf::TUnboxedValuePod ValueFromString(NUdf::EDataSlot type, NUdf::TStringRef bu
}

case NUdf::EDataSlot::Date32:
//TODO
return {};
return ParseDate32(buf);

case NUdf::EDataSlot::Datetime64:
//TODO
Expand Down
4 changes: 3 additions & 1 deletion ydb/library/yql/minikql/mkql_type_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ TStringBuf AdaptLegacyYqlType(const TStringBuf& type);

bool IsValidValue(NUdf::EDataSlot type, const NUdf::TUnboxedValuePod& value);

bool IsLeapYear(ui32 year);
bool IsLeapYear(i32 year);

ui32 GetMonthLength(ui32 month, bool isLeap);

Expand All @@ -32,8 +32,10 @@ bool ParseUuid(NUdf::TStringRef buf, void* output, bool shortForm=false);
bool IsValidDecimal(NUdf::TStringRef buf);

bool MakeDate(ui32 year, ui32 month, ui32 day, ui16& value);
bool MakeDate32(i32 year, ui32 month, ui32 day, i32& value);
bool MakeTime(ui32 hour, ui32 minute, ui32 second, ui32& value);
bool SplitDate(ui16 value, ui32& year, ui32& month, ui32& day);
bool SplitDate32(i32 value, i32& year, ui32& month, ui32& day);
bool SplitDatetime(ui32 value, ui32& year, ui32& month, ui32& day, ui32& hour, ui32& min, ui32& sec);
bool SplitTimestamp(ui64 value, ui32& year, ui32& month, ui32& day, ui32& hour, ui32& min, ui32& sec, ui32& usec);
bool SplitInterval(i64 value, bool& sign, ui32& day, ui32& hour, ui32& min, ui32& sec, ui32& usec);
Expand Down
Loading