Skip to content

Commit 1f95ceb

Browse files
committed
fix: interpret timestamp strings as micros or nanos
Resolves: - apache#8971 (comment)
1 parent 5f30948 commit 1f95ceb

File tree

2 files changed

+80
-12
lines changed

2 files changed

+80
-12
lines changed

api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java

+15-6
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,15 @@ public class ExpressionUtil {
4949
private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
5050
private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?");
5151
private static final Pattern TIMESTAMP =
52-
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?");
52+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?");
53+
private static final Pattern TIMESTAMPNS =
54+
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?");
5355
private static final Pattern TIMESTAMPTZ =
5456
Pattern.compile(
55-
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)");
57+
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)");
58+
private static final Pattern TIMESTAMPTZNS =
59+
Pattern.compile(
60+
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)");
5661
static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10;
5762
private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5;
5863

@@ -534,10 +539,8 @@ private static String sanitize(Literal<?> literal, long nowMillis, int today) {
534539
} else if (literal instanceof Literals.DateLiteral) {
535540
return sanitizeDate(((Literals.DateLiteral) literal).value(), today);
536541
} else if (literal instanceof Literals.TimestampLiteral) {
537-
return sanitizeTimestamp(
538-
((Literals.TimestampLiteral) literal).unit(),
539-
((Literals.TimestampLiteral) literal).value(),
540-
nowMillis);
542+
Literals.TimestampLiteral tsLiteral = ((Literals.TimestampLiteral) literal);
543+
return sanitizeTimestamp(tsLiteral.unit(), tsLiteral.value(), nowMillis);
541544
} else if (literal instanceof Literals.TimeLiteral) {
542545
return "(time)";
543546
} else if (literal instanceof Literals.IntegerLiteral) {
@@ -609,9 +612,15 @@ private static String sanitizeString(CharSequence value, long nowMillis, int tod
609612
Literal<Integer> date = Literal.of(value).to(Types.DateType.get());
610613
return sanitizeDate(date.value(), today);
611614
} else if (TIMESTAMP.matcher(value).matches()) {
615+
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.microsWithoutZone());
616+
return sanitizeTimestamp(ChronoUnit.MICROS, ts.value(), nowMillis);
617+
} else if (TIMESTAMPNS.matcher(value).matches()) {
612618
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.nanosWithoutZone());
613619
return sanitizeTimestamp(ChronoUnit.NANOS, ts.value(), nowMillis);
614620
} else if (TIMESTAMPTZ.matcher(value).matches()) {
621+
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.microsWithZone());
622+
return sanitizeTimestamp(ChronoUnit.MICROS, ts.value(), nowMillis);
623+
} else if (TIMESTAMPTZNS.matcher(value).matches()) {
615624
Literal<Long> ts = Literal.of(value).to(Types.TimestampType.nanosWithZone());
616625
return sanitizeTimestamp(ChronoUnit.NANOS, ts.value(), nowMillis);
617626
} else if (TIME.matcher(value).matches()) {

api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java

+65-6
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,12 @@ public class TestExpressionUtil {
4343
Types.NestedField.required(2, "val", Types.IntegerType.get()),
4444
Types.NestedField.required(3, "val2", Types.IntegerType.get()),
4545
Types.NestedField.required(4, "ts", Types.TimestampType.microsWithoutZone()),
46-
Types.NestedField.required(5, "date", Types.DateType.get()),
47-
Types.NestedField.required(6, "time", Types.DateType.get()),
48-
Types.NestedField.optional(7, "data", Types.StringType.get()),
49-
Types.NestedField.optional(8, "measurement", Types.DoubleType.get()),
50-
Types.NestedField.optional(9, "test", Types.IntegerType.get()));
46+
Types.NestedField.required(5, "tsns", Types.TimestampType.nanosWithoutZone()),
47+
Types.NestedField.required(6, "date", Types.DateType.get()),
48+
Types.NestedField.required(7, "time", Types.DateType.get()),
49+
Types.NestedField.optional(8, "data", Types.StringType.get()),
50+
Types.NestedField.optional(9, "measurement", Types.DoubleType.get()),
51+
Types.NestedField.optional(10, "test", Types.IntegerType.get()));
5152

5253
private static final Types.StructType STRUCT = SCHEMA.asStruct();
5354

@@ -461,7 +462,9 @@ public void testSanitizeTimestamp() {
461462
"2022-04-29T23:49:51",
462463
"2022-04-29T23:49:51.123456",
463464
"2022-04-29T23:49:51-07:00",
464-
"2022-04-29T23:49:51.123456+01:00")) {
465+
"2022-04-29T23:49:51.123456+01:00",
466+
"2022-04-29T23:49:51.123456789",
467+
"2022-04-29T23:49:51.123456789+01:00")) {
465468
assertEquals(
466469
Expressions.equal("test", "(timestamp)"),
467470
ExpressionUtil.sanitize(Expressions.equal("test", timestamp)));
@@ -497,6 +500,13 @@ public void testSanitizeTimestampAboutNow() {
497500
Expression.Operation.EQ,
498501
"test",
499502
Literal.of(nowLocal).to(Types.TimestampType.microsWithoutZone()))));
503+
assertEquals(
504+
Expressions.equal("test", "(timestamp-about-now)"),
505+
ExpressionUtil.sanitize(
506+
Expressions.predicate(
507+
Expression.Operation.EQ,
508+
"test",
509+
Literal.of(nowLocal).to(Types.TimestampType.nanosWithoutZone()))));
500510

501511
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowLocal)))
502512
.as("Sanitized string should be identical except for descriptive literal")
@@ -523,6 +533,13 @@ public void testSanitizeTimestampPast() {
523533
Expression.Operation.EQ,
524534
"test",
525535
Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.microsWithoutZone()))));
536+
assertEquals(
537+
Expressions.equal("test", "(timestamp-1-hours-ago)"),
538+
ExpressionUtil.sanitize(
539+
Expressions.predicate(
540+
Expression.Operation.EQ,
541+
"test",
542+
Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.nanosWithoutZone()))));
526543

527544
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoLocal)))
528545
.as("Sanitized string should be identical except for descriptive literal")
@@ -549,6 +566,13 @@ public void testSanitizeTimestampLastWeek() {
549566
Expression.Operation.EQ,
550567
"test",
551568
Literal.of(lastWeekLocal).to(Types.TimestampType.microsWithoutZone()))));
569+
assertEquals(
570+
Expressions.equal("test", "(timestamp-7-days-ago)"),
571+
ExpressionUtil.sanitize(
572+
Expressions.predicate(
573+
Expression.Operation.EQ,
574+
"test",
575+
Literal.of(lastWeekLocal).to(Types.TimestampType.nanosWithoutZone()))));
552576

553577
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekLocal)))
554578
.as("Sanitized string should be identical except for descriptive literal")
@@ -576,6 +600,13 @@ public void testSanitizeTimestampFuture() {
576600
"test",
577601
Literal.of(ninetyMinutesFromNowLocal)
578602
.to(Types.TimestampType.microsWithoutZone()))));
603+
assertEquals(
604+
Expressions.equal("test", "(timestamp-1-hours-from-now)"),
605+
ExpressionUtil.sanitize(
606+
Expressions.predicate(
607+
Expression.Operation.EQ,
608+
"test",
609+
Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.nanosWithoutZone()))));
579610

580611
assertThat(
581612
ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowLocal)))
@@ -599,6 +630,13 @@ public void testSanitizeTimestamptzAboutNow() {
599630
Expression.Operation.EQ,
600631
"test",
601632
Literal.of(nowUtc).to(Types.TimestampType.microsWithZone()))));
633+
assertEquals(
634+
Expressions.equal("test", "(timestamp-about-now)"),
635+
ExpressionUtil.sanitize(
636+
Expressions.predicate(
637+
Expression.Operation.EQ,
638+
"test",
639+
Literal.of(nowUtc).to(Types.TimestampType.nanosWithZone()))));
602640

603641
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowUtc)))
604642
.as("Sanitized string should be identical except for descriptive literal")
@@ -620,6 +658,13 @@ public void testSanitizeTimestamptzPast() {
620658
Expression.Operation.EQ,
621659
"test",
622660
Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.microsWithZone()))));
661+
assertEquals(
662+
Expressions.equal("test", "(timestamp-1-hours-ago)"),
663+
ExpressionUtil.sanitize(
664+
Expressions.predicate(
665+
Expression.Operation.EQ,
666+
"test",
667+
Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.nanosWithZone()))));
623668

624669
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoUtc)))
625670
.as("Sanitized string should be identical except for descriptive literal")
@@ -641,6 +686,13 @@ public void testSanitizeTimestamptzLastWeek() {
641686
Expression.Operation.EQ,
642687
"test",
643688
Literal.of(lastWeekUtc).to(Types.TimestampType.microsWithZone()))));
689+
assertEquals(
690+
Expressions.equal("test", "(timestamp-7-days-ago)"),
691+
ExpressionUtil.sanitize(
692+
Expressions.predicate(
693+
Expression.Operation.EQ,
694+
"test",
695+
Literal.of(lastWeekUtc).to(Types.TimestampType.nanosWithZone()))));
644696

645697
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekUtc)))
646698
.as("Sanitized string should be identical except for descriptive literal")
@@ -662,6 +714,13 @@ public void testSanitizeTimestamptzFuture() {
662714
Expression.Operation.EQ,
663715
"test",
664716
Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.microsWithZone()))));
717+
assertEquals(
718+
Expressions.equal("test", "(timestamp-1-hours-from-now)"),
719+
ExpressionUtil.sanitize(
720+
Expressions.predicate(
721+
Expression.Operation.EQ,
722+
"test",
723+
Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.nanosWithZone()))));
665724

666725
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowUtc)))
667726
.as("Sanitized string should be identical except for descriptive literal")

0 commit comments

Comments
 (0)