Skip to content

Commit

Permalink
[orc] Optimize ORC timestamp type read (apache#2333)
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi authored Nov 17, 2023
1 parent 25db8b4 commit 88d5311
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ public static int toInternal(java.sql.Time time) {
return (int) (ts % MILLIS_PER_DAY);
}

public static Timestamp toInternal(long millis, int nanos) {
return Timestamp.fromEpochMillis(millis + LOCAL_TZ.getOffset(millis), nanos);
}

public static int toInternal(LocalDate date) {
return ymdToUnixDate(date.getYear(), date.getMonthValue(), date.getDayOfMonth());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.paimon.utils;

import org.apache.paimon.data.Timestamp;

import org.junit.jupiter.api.Test;

import java.time.LocalDateTime;
Expand All @@ -42,4 +44,18 @@ public void testFormatLocalDateTime() {
.isEqualTo(expectations[precision]);
}
}

@Test
public void testTimestamp() {
int nanos = 100;
java.sql.Timestamp timestamp = new java.sql.Timestamp(System.currentTimeMillis());
for (int i = 0; i < 2000; i++) {
timestamp = new java.sql.Timestamp(timestamp.getTime() + 60 * 1000);
timestamp.setNanos(nanos + timestamp.getNanos());

Timestamp t1 = Timestamp.fromSQLTimestamp(timestamp);
Timestamp t2 = DateTimeUtils.toInternal(timestamp.getTime(), nanos);
assertThat(t1).isEqualTo(t2);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -384,13 +384,13 @@ public void testMergeInMemory() {
"INSERT INTO T3 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(-1 AS TINYINT), CAST(-1 AS SMALLINT), "
+ "CAST(1000 AS BIGINT), 1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), "
+ "CAST('0001-01-01 01:01:01' AS TIMESTAMP)),"
+ "CAST('2021-01-01 01:01:01' AS TIMESTAMP)),"
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), "
+ "CAST(100000 AS BIGINT), -1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), "
+ "CAST('0002-01-01 01:01:01' AS TIMESTAMP)), "
+ "CAST('2022-01-01 01:01:01' AS TIMESTAMP)), "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS SMALLINT), "
+ "CAST(10000000 AS BIGINT), 0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), "
+ "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
List<Row> result = batchSql("SELECT * FROM T3");
assertThat(result)
.containsExactlyInAnyOrder(
Expand All @@ -405,23 +405,23 @@ public void testMergeInMemory() {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
LocalDateTime.of(1, 1, 1, 1, 1, 1)));
LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
}

@Test
public void testMergeRead() {
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), "
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), "
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");

List<Row> result = batchSql("SELECT * FROM T3");
assertThat(result)
Expand All @@ -437,7 +437,7 @@ public void testMergeRead() {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
LocalDateTime.of(1, 1, 1, 1, 1, 1)));
LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
}

@Test
Expand All @@ -449,29 +449,29 @@ public void testMergeCompaction() {
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), "
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), "
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");

// key 1 3
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 3, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 3, 6, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), "
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 3, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), "
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");

assertThat(batchSql("SELECT * FROM T3"))
.containsExactlyInAnyOrder(
Expand All @@ -486,7 +486,7 @@ public void testMergeCompaction() {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
LocalDateTime.of(1, 1, 1, 1, 1, 1)),
LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
Row.of(
1,
3,
Expand All @@ -498,7 +498,7 @@ public void testMergeCompaction() {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
LocalDateTime.of(1, 1, 1, 1, 1, 1)));
LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
}

@Test
Expand Down Expand Up @@ -545,13 +545,13 @@ public void testMergeInMemory() {
"INSERT INTO T2 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), "
+ "CAST(1000 AS BIGINT), 1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), "
+ "CAST('0001-01-01 01:01:01' AS TIMESTAMP)),"
+ "CAST('2021-01-01 01:01:01' AS TIMESTAMP)),"
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), "
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), "
+ "CAST('0002-01-01 01:01:01' AS TIMESTAMP)), "
+ "CAST('2022-01-01 01:01:01' AS TIMESTAMP)), "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), "
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), "
+ "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
List<Row> result = batchSql("SELECT * FROM T2");
assertThat(result)
.containsExactlyInAnyOrder(
Expand All @@ -566,7 +566,7 @@ public void testMergeInMemory() {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
LocalDateTime.of(2, 1, 1, 2, 0, 0)));
LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
}

@Test
Expand All @@ -575,17 +575,17 @@ public void testMergeRead() {
"INSERT INTO T2 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), "
+ "CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ "CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), -1.11, "
+ "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), "
+ "CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ "CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), 0, "
+ "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), "
+ "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");

List<Row> result = batchSql("SELECT * FROM T2");
assertThat(result)
Expand All @@ -601,7 +601,7 @@ public void testMergeRead() {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
LocalDateTime.of(2, 1, 1, 2, 0, 0)));
LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
}

@Test
Expand All @@ -613,29 +613,29 @@ public void testMergeCompaction() {
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), -1.11, "
+ "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), 0, "
+ "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");

// key 1 3
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 3, CAST(NULL AS INT), 1.01, CAST(1 AS TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 3, 6, 1.10, CAST(2 AS TINYINT), CAST(2 AS SMALLINT), CAST(100000 AS BIGINT), -1.11, "
+ "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 3, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS SMALLINT), CAST(10000000 AS BIGINT), 0, "
+ "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");

assertThat(batchSql("SELECT * FROM T2"))
.containsExactlyInAnyOrder(
Expand All @@ -650,7 +650,7 @@ public void testMergeCompaction() {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
LocalDateTime.of(2, 1, 1, 2, 0, 0)),
LocalDateTime.of(2022, 1, 1, 2, 0, 0)),
Row.of(
1,
3,
Expand All @@ -662,7 +662,7 @@ public void testMergeCompaction() {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
LocalDateTime.of(2, 1, 1, 2, 0, 0)));
LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.paimon.format.orc.reader;

import org.apache.paimon.data.Timestamp;
import org.apache.paimon.utils.DateTimeUtils;

import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
Expand All @@ -40,8 +41,6 @@ public OrcTimestampColumnVector(ColumnVector vector) {
@Override
public Timestamp getTimestamp(int i, int precision) {
int index = vector.isRepeating ? 0 : i;
java.sql.Timestamp timestamp = new java.sql.Timestamp(vector.time[index]);
timestamp.setNanos(vector.nanos[index]);
return Timestamp.fromSQLTimestamp(timestamp);
return DateTimeUtils.toInternal(vector.time[index], vector.nanos[index] % 1_000_000);
}
}

0 comments on commit 88d5311

Please sign in to comment.