Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions ydb/core/external_sources/object_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct TObjectStorageExternalSource : public IExternalSource {
for (const auto& column: json.GetArray()) {
*objectStorage.add_partitioned_by() = column;
}
} else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "csv_delimiter"sv}, lowerKey)) {
} else if (IsIn({"file_pattern"sv, "data.interval.unit"sv, "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "data.date.format"sv, "csv_delimiter"sv}, lowerKey)) {
objectStorage.mutable_format_setting()->insert({lowerKey, value});
} else {
ythrow TExternalSourceException() << "Unknown attribute " << key;
Expand Down Expand Up @@ -196,7 +196,7 @@ struct TObjectStorageExternalSource : public IExternalSource {
continue;
}

if (IsIn({ "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv}, key)) {
if (IsIn({ "data.datetime.format_name"sv, "data.datetime.format"sv, "data.timestamp.format_name"sv, "data.timestamp.format"sv, "data.date.format"sv}, key)) {
continue;
}

Expand Down Expand Up @@ -257,6 +257,10 @@ struct TObjectStorageExternalSource : public IExternalSource {
continue;
}

if (key == "data.date.format"sv) {
continue;
}

if (matchAllSettings) {
issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, "unknown format setting " + key));
}
Expand Down
4 changes: 4 additions & 0 deletions ydb/library/yql/providers/pq/provider/yql_pq_datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ class TPqDataSourceProvider : public TDataProviderBase {
settings.Add(ctx.NewList(read.Pos(), std::move(pair)));
}

if (topicKeyParser.GetDateFormat()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А здесь для date нету общееизвестных форматов POSIX/ISO как для Timestamp?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

POSIX/ISO для даты выглядят одинакого - %Y-%m-%d

по совместительству это совпадает с дефолтным парсингом, поэтому показалось бессмысленно добавлять несколько одинаковых форматов

settings.Add(topicKeyParser.GetDateFormat());
}

auto builder = Build<TPqReadTopic>(ctx, read.Pos())
.World(read.World())
.DataSource(read.DataSource())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class TPqDataSourceTypeAnnotationTransformer : public TVisitorTransformerBase {
}

TStatus HandleReadTopic(TExprBase input, TExprContext& ctx) {
if (!EnsureMinMaxArgsCount(input.Ref(), 6, 8, ctx)) {
if (!EnsureMinMaxArgsCount(input.Ref(), 6, 9, ctx)) {
return TStatus::Error;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ bool TTopicKeyParser::Parse(const TExprNode& expr, TExprNode::TPtr readSettings,
TimestampFormat = readSettings->Child(i);
continue;
}
if (readSettings->Child(i)->Head().IsAtom("data.date.format")) {
DateFormat = readSettings->Child(i);
continue;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class TTopicKeyParser {
return TimestampFormat;
}

TExprNode::TPtr GetDateFormat() {
return DateFormat;
}

bool Parse(const TExprNode& expr, TExprNode::TPtr readSettings, TExprContext& ctx);

private:
Expand All @@ -60,6 +64,7 @@ class TTopicKeyParser {
TExprNode::TPtr DateTimeFormat;
TExprNode::TPtr TimestampFormatName;
TExprNode::TPtr TimestampFormat;
TExprNode::TPtr DateFormat;
TExprNode::TPtr UserSchema;
TExprNode::TPtr ColumnOrder;
};
Expand Down
4 changes: 4 additions & 0 deletions ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2247,6 +2247,10 @@ std::pair<NYql::NDq::IDqComputeActorAsyncInput*, IActor*> CreateS3ReadActor(
readSpec->Settings.timestamp_format = it->second;
}

if (const auto it = settings.find("data.date.format"); settings.cend() != it) {
readSpec->Settings.date_format = it->second;
}

if (readSpec->Settings.date_time_format_name == NDB::FormatSettings::DateTimeFormat::Unspecified && readSpec->Settings.date_time_format.empty()) {
readSpec->Settings.date_time_format_name = NDB::FormatSettings::DateTimeFormat::POSIX;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,15 @@ class TS3DataSinkTypeAnnotationTransformer : public TVisitorTransformerBase {
return true;
}

if (name == "data.date.format") {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

дописал

const auto& value = setting.Tail();
if (!EnsureAtom(value, ctx)) {
return false;
}

return true;
}

if (name == "csvdelimiter") {
const auto& value = setting.Tail();
if (!EnsureAtom(value, ctx)) {
Expand All @@ -292,7 +301,7 @@ class TS3DataSinkTypeAnnotationTransformer : public TVisitorTransformerBase {
return true;
};

if (!EnsureValidSettings(*input->Child(TS3Target::idx_Settings), {"compression", "partitionedby", "mode", "userschema", "data.datetime.formatname", "data.datetime.format", "data.timestamp.formatname", "data.timestamp.format", "csvdelimiter", "filepattern"}, validator, ctx)) {
if (!EnsureValidSettings(*input->Child(TS3Target::idx_Settings), {"compression", "partitionedby", "mode", "userschema", "data.datetime.formatname", "data.datetime.format", "data.timestamp.formatname", "data.timestamp.format", "data.date.format", "csvdelimiter", "filepattern"}, validator, ctx)) {
return TStatus::Error;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,14 @@ class TS3DataSourceTypeAnnotationTransformer : public TVisitorTransformerBase {
return true;
}

if (name == "data.date.format"sv) {
TStringBuf unused;
if (!ExtractSettingValue(setting.Tail(), "data.date.format"sv, format, {}, ctx, unused)) {
return false;
}
return true;
}

if (name == "readmaxbytes"sv) {
TStringBuf unused;
if (!ExtractSettingValue(setting.Tail(), "read_max_bytes"sv, format, "raw"sv, ctx, unused)) {
Expand Down Expand Up @@ -785,7 +793,7 @@ class TS3DataSourceTypeAnnotationTransformer : public TVisitorTransformerBase {
};
if (!EnsureValidSettings(*input->Child(TS3Object::idx_Settings),
{ "compression"sv, "partitionedby"sv, "projection"sv, "data.interval.unit"sv, "constraints"sv,
"data.datetime.formatname"sv, "data.datetime.format"sv, "data.timestamp.formatname"sv, "data.timestamp.format"sv,
"data.datetime.formatname"sv, "data.datetime.format"sv, "data.timestamp.formatname"sv, "data.timestamp.format"sv, "data.date.format"sv,
"readmaxbytes"sv, "csvdelimiter"sv, "directories"sv, "filepattern"sv, "pathpattern"sv, "pathpatternvariant"sv }, validator, ctx))
{
return TStatus::Error;
Expand Down
8 changes: 8 additions & 0 deletions ydb/library/yql/providers/s3/provider/yql_s3_phy_opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ TExprNode::TPtr GetTimestampFormat(const TExprNode& settings) {
return GetSetting(settings, "data.timestamp.format"sv);
}

TExprNode::TPtr GetDateFormat(const TExprNode& settings) {
return GetSetting(settings, "data.date.format"sv);
}

TExprNode::TListType GetPartitionKeys(const TExprNode::TPtr& partBy) {
if (partBy) {
auto children = partBy->ChildrenList();
Expand Down Expand Up @@ -171,6 +175,10 @@ class TS3PhysicalOptProposalTransformer : public TOptimizeTransformerBase {
sinkOutputSettingsBuilder.Add(ctx.NewList(target.Pos(), std::move(pair)));
}

if (auto dateFormat = GetDateFormat(settings)) {
sinkOutputSettingsBuilder.Add(std::move(dateFormat));
}

const TStringBuf format = target.Format();
if (format != "raw" && format != "json_list") { // multipart
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,11 @@ NDB::FormatSettings GetFormatSettings(const std::string_view& view) {
auto format = json["data.timestamp.format"].getString();
settings.timestamp_format = format;
}

if (json.has("data.date.format")) {
auto format = json["data.date.format"].getString();
settings.date_format = format;
}
}
return settings;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,41 @@
namespace NDB
{

void SerializationDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
namespace
{
writeDateText(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr);

inline void readText(DayNum & date, ReadBuffer & istr, const FormatSettings & settings)
{
if (!settings.date_format.empty()) {
readDateTextFormat(date, istr, settings.date_format);
return;
}

readDateText(date, istr);
}

}

void SerializationDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
auto value = DayNum(assert_cast<const ColumnType &>(column).getData()[row_num]);
if (!settings.date_format.empty()) {
writeDateTextFormat(value, ostr, settings.date_format);
return;
}

writeDateText(value, ostr);
}

void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}

void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DayNum x;
readDateText(x, istr);
readText(x, istr, settings);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}

Expand All @@ -41,11 +62,11 @@ void SerializationDate::serializeTextQuoted(const IColumn & column, size_t row_n
writeChar('\'', ostr);
}

void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DayNum x;
assertChar('\'', istr);
readDateText(x, istr);
readText(x, istr, settings);
assertChar('\'', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
Expand All @@ -57,11 +78,11 @@ void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num
writeChar('"', ostr);
}

void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DayNum x;
assertChar('"', istr);
readDateText(x, istr);
readText(x, istr, settings);
assertChar('"', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
Expand All @@ -73,11 +94,24 @@ void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num,
writeChar('"', ostr);
}

void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
LocalDate value;
readCSV(value, istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
DayNum x;

if (istr.eof())
throwReadAfterEOF();

char maybe_quote = *istr.position();

if (maybe_quote == '\'' || maybe_quote == '\"')
++istr.position();

readText(x, istr, settings);

if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, istr);

assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,42 @@

namespace NDB
{
void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const

namespace
{

inline void readText(ExtendedDayNum & date, ReadBuffer & istr, const FormatSettings & settings)
{
writeDateText(ExtendedDayNum(assert_cast<const ColumnInt32 &>(column).getData()[row_num]), ostr);
if (!settings.date_format.empty()) {
readDateTextFormat(date, istr, settings.date_format);
return;
}

readDateText(date, istr);
}

}

void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
auto value = ExtendedDayNum(assert_cast<const ColumnType &>(column).getData()[row_num]);
if (!settings.date_format.empty()) {
writeDateTextFormat(value, ostr, settings.date_format);
return;
}

writeDateText(value, ostr);
}

void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}

void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ExtendedDayNum x;
readDateText(x, istr);
readText(x, istr, settings);
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}

Expand All @@ -37,11 +59,11 @@ void SerializationDate32::serializeTextQuoted(const IColumn & column, size_t row
writeChar('\'', ostr);
}

void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ExtendedDayNum x;
assertChar('\'', istr);
readDateText(x, istr);
readText(x, istr, settings);
assertChar('\'', istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
Expand All @@ -53,11 +75,11 @@ void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_n
writeChar('"', ostr);
}

void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ExtendedDayNum x;
assertChar('"', istr);
readDateText(x, istr);
readText(x, istr, settings);
assertChar('"', istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
Expand All @@ -69,10 +91,23 @@ void SerializationDate32::serializeTextCSV(const IColumn & column, size_t row_nu
writeChar('"', ostr);
}

void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
LocalDate value;
readCSV(value, istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
ExtendedDayNum x;

if (istr.eof())
throwReadAfterEOF();

char maybe_quote = *istr.position();

if (maybe_quote == '\'' || maybe_quote == '\"')
++istr.position();

readText(x, istr, settings);

if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, istr);

assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct FormatSettings
bool decimal_trailing_zeros = false;
String date_time_format;
String timestamp_format;
String date_format;

enum class DateTimeFormat {
Unspecified,
Expand Down
Loading
Loading