Skip to content

Commit

Permalink
[Hotfix] fix http source can not read yyyy-MM-dd HH:mm:ss format bug …
Browse files Browse the repository at this point in the history
…& Improve DateTime Utils (apache#6601)
  • Loading branch information
EricJoy2048 authored Apr 9, 2024
1 parent 66d8502 commit 19888e7
Show file tree
Hide file tree
Showing 16 changed files with 795 additions and 162 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,25 @@
package org.apache.seatunnel.common.utils;

import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.SignStyle;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQueries;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import static java.time.temporal.ChronoField.DAY_OF_MONTH;
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
import static java.time.temporal.ChronoField.YEAR;

public class DateTimeUtils {

Expand All @@ -48,6 +62,162 @@ public class DateTimeUtils {
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SSS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601.value));
}

// if the datatime string length is 19, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP =
new LinkedHashMap<>();
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

// if the datatime string length bigger than 19, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP =
new LinkedHashMap<>();
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

// if the datatime string length is 14, use this formatter
public static final DateTimeFormatter YYYY_MM_DD_HH_MM_SS_14_FORMATTER =
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT.value);

static {
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.appendLiteral(' ')
.append(DateTimeFormatter.ISO_LOCAL_TIME)
.toFormatter());

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}.*"),
DateTimeFormatter.ISO_LOCAL_DATE_TIME);

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SLASH.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('/')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('/')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.appendLiteral(' ')
.append(DateTimeFormatter.ISO_LOCAL_TIME)
.toFormatter());

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SPOT.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('.')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('.')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.appendLiteral(' ')
.append(DateTimeFormatter.ISO_LOCAL_TIME)
.toFormatter());

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日\\s\\d{2}时\\d{2}分\\d{2}秒"),
DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH时mm分ss秒"));

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.entrySet());
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.entrySet());
}

/**
* gave a datetime string and return the {@link DateTimeFormatter} which can be used to parse
* it.
*
* @param dateTime eg: 2020-02-03 12:12:10.101
* @return the DateTimeFormatter matched, will return null when not matched any pattern
*/
public static DateTimeFormatter matchDateTimeFormatter(String dateTime) {
if (dateTime.length() == 19) {
for (Map.Entry<Pattern, DateTimeFormatter> entry :
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET) {
if (entry.getKey().matcher(dateTime).matches()) {
return entry.getValue();
}
}
} else if (dateTime.length() > 19) {
for (Map.Entry<Pattern, DateTimeFormatter> entry :
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET) {
if (entry.getKey().matcher(dateTime).matches()) {
return entry.getValue();
}
}
} else if (dateTime.length() == 14) {
return YYYY_MM_DD_HH_MM_SS_14_FORMATTER;
}
return null;
}

public static LocalDateTime parse(String dateTime, DateTimeFormatter dateTimeFormatter) {
TemporalAccessor parsedTimestamp = dateTimeFormatter.parse(dateTime);
LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime());
LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate());
return LocalDateTime.of(localDate, localTime);
}

/**
* gave a datetime string and return {@link LocalDateTime}
*
* <p>Due to the need to determine the rules of the formatter through regular expressions, there
* will be a certain performance loss. When tested on 8c16g macos, the most significant
* performance decrease compared to directly passing the formatter is
* 'Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}.*")' has increased from 4.5
* seconds to 10 seconds in a scenario where 1000w calculations are performed.
*
* <p>Analysis shows that there are two main reasons: one is that the regular expression
* position in the map is 4, before this, three regular expression matches are required.
*
* <p>Another reason is to support the length of non fixed millisecond bits (minimum 0, maximum
* 9), we used {@link DateTimeFormatter#ISO_LOCAL_TIME}, which also increases the time for time
* conversion.
*
* @param dateTime eg: 2020-02-03 12:12:10.101
* @return {@link LocalDateTime}
*/
public static LocalDateTime parse(String dateTime) {
DateTimeFormatter dateTimeFormatter = matchDateTimeFormatter(dateTime);
return LocalDateTime.parse(dateTime, dateTimeFormatter);
}

public static LocalDateTime parse(String dateTime, Formatter formatter) {
Expand Down Expand Up @@ -78,7 +248,10 @@ public enum Formatter {
YYYY_MM_DD_HH_MM_SS_SPOT("yyyy.MM.dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SLASH("yyyy/MM/dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_NO_SPLIT("yyyyMMddHHmmss"),
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss");
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSS"),
YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"),
YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS");

private final String value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,15 @@

import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.SignStyle;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import static java.time.temporal.ChronoField.DAY_OF_MONTH;
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
import static java.time.temporal.ChronoField.YEAR;

public class DateUtils {
private static final Map<Formatter, DateTimeFormatter> FORMATTER_MAP = new HashMap<>();
Expand All @@ -36,6 +43,106 @@ public class DateUtils {
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_SLASH.value));
}

public static final Pattern[] PATTERN_ARRAY =
new Pattern[] {
Pattern.compile("\\d{4}-\\d{2}-\\d{2}"),
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日"),
Pattern.compile("\\d{4}/\\d{2}/\\d{2}"),
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}"),
Pattern.compile("\\d{8}")
};

public static final Map<Pattern, DateTimeFormatter> DATE_FORMATTER_MAP = new HashMap();

static {
DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[0],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[1],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral("年")
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral("月")
.appendValue(DAY_OF_MONTH, 2)
.appendLiteral("日")
.toFormatter())
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[2],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('/')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('/')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[3],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('.')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('.')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[4],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendValue(MONTH_OF_YEAR, 2)
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());
}

/**
* gave a date string and return the {@link DateTimeFormatter} which can be used to parse it.
*
* @param dateTime eg: 2020-02-03
* @return the DateTimeFormatter matched, will return null when not matched any pattern in
* {@link #PATTERN_ARRAY}
*/
public static DateTimeFormatter matchDateFormatter(String dateTime) {
for (int j = 0; j < PATTERN_ARRAY.length; j++) {
if (PATTERN_ARRAY[j].matcher(dateTime).matches()) {
return DATE_FORMATTER_MAP.get(PATTERN_ARRAY[j]);
}
}
return null;
}

public static LocalDate parse(String date) {
DateTimeFormatter dateTimeFormatter = matchDateFormatter(date);
return parse(date, dateTimeFormatter);
}

public static LocalDate parse(String date, DateTimeFormatter dateTimeFormatter) {
return LocalDate.parse(date, dateTimeFormatter);
}

public static LocalDate parse(String date, Formatter formatter) {
return LocalDate.parse(date, FORMATTER_MAP.get(formatter));
}
Expand Down
Loading

0 comments on commit 19888e7

Please sign in to comment.