Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(datetime): enhance datetime parsing and validation #2129

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.zendesk.maxwell.schema.columndef.ColumnDef;
import com.zendesk.maxwell.schema.columndef.ColumnDefCastException;
import com.zendesk.maxwell.schema.columndef.DateColumnDef;
import com.zendesk.maxwell.schema.columndef.DateTimeColumnDef;
import com.zendesk.maxwell.schema.columndef.TimeColumnDef;
import com.zendesk.maxwell.scripting.Scripting;
import org.slf4j.Logger;
Expand Down Expand Up @@ -267,6 +268,8 @@ private void setRowValues(RowMap row, ResultSet resultSet, Table table) throws S
columnValue = getTimestamp(resultSet, columnIndex);
else if ( columnDefinition instanceof DateColumnDef)
columnValue = resultSet.getString(columnIndex);
else if ( columnDefinition instanceof DateTimeColumnDef)
columnValue = resultSet.getString(columnIndex);
else
columnValue = resultSet.getObject(columnIndex);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
package com.zendesk.maxwell.schema.columndef;

import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;

import com.zendesk.maxwell.producer.MaxwellOutputConfig;

public class DateColumnDef extends ColumnDef {
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");

private DateColumnDef(String name, String type, short pos) {
super(name, type, pos);
}
Expand All @@ -24,11 +30,22 @@ public String toSQL(Object value) {
@Override
public Object asJSON(Object value, MaxwellOutputConfig config) throws ColumnDefCastException {
if ( value instanceof String ) {
// bootstrapper just gives up on bothering with date processing
if ( config.zeroDatesAsNull && "0000-00-00".equals((String) value) )
return null;
else
return value;
String dateString = (String) value;

if ("0000-00-00".equals(dateString)) {
if ( config.zeroDatesAsNull )
return null;
else
return "0000-00-00";
} else {
if ( !DateValidator.isValidDateTime(dateString) )
return null;

value = parseDate(dateString);
if (value == null) {
return null;
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mildly, same concerns here -- is this just for bootstrapping?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am only seeing bootstrapping taking the String path. Otherwise, during normal processing it takes the Long path.

} else if ( value instanceof Long && (Long) value == Long.MIN_VALUE ) {
if ( config.zeroDatesAsNull )
return null;
Expand All @@ -42,4 +59,12 @@ public Object asJSON(Object value, MaxwellOutputConfig config) throws ColumnDefC
throw new ColumnDefCastException(this, value);
}
}

private Object parseDate(String dateString) {
try {
return LocalDate.parse(dateString, DATE_FORMATTER);
} catch (DateTimeParseException e) {
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.zendesk.maxwell.schema.columndef;

import java.sql.Timestamp;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.*;

Expand All @@ -23,6 +24,9 @@ public static Timestamp extractTimestamp(Object value) throws IllegalArgumentExc
} else if ( value instanceof Date ) {
Long time = ((Date) value).getTime();
return new Timestamp(time);
} else if ( value instanceof LocalDate ) {
LocalDateTime startOfDay = ((LocalDate) value).atStartOfDay();
return Timestamp.valueOf(startOfDay);
} else if ( value instanceof LocalDateTime) {
return Timestamp.valueOf((LocalDateTime) value);
} else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
import com.zendesk.maxwell.producer.MaxwellOutputConfig;

import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;

public class DateTimeColumnDef extends ColumnDefWithLength {
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");

private final boolean isTimestamp = getType().equals("timestamp");

Expand All @@ -19,7 +23,24 @@ public static DateTimeColumnDef create(String name, String type, short pos, Long

protected String formatValue(Object value, MaxwellOutputConfig config) throws ColumnDefCastException {
// special case for those broken mysql dates.
if ( value instanceof Long ) {
if ( value instanceof String) {
String dateString = (String) value;

if ( "0000-00-00 00:00:00".equals(dateString) ) {
if ( config.zeroDatesAsNull )
return null;
else
return appendFractionalSeconds("0000-00-00 00:00:00", 0, getColumnLength());
} else {
if ( !DateValidator.isValidDateTime(dateString) )
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry for the delay, I'm coming back to this PR since I'm prepping a release of your mariadb stuff and some other stuff too. I'm a little worried about running a regex in a very hot code-path...

But I guess this only runs on bootstrapping so it might be ok? can you confirm that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, just making it back to looking at this to see where it was at. From my testing, the value instanceof String is only triggered through the bootstrapping and not during normal processing. It goes down the Long path at least from my debugging I was doing. If it is just bootstrapping are you good with it or should I look for a more efficient way to verify that Date/DateTime instead of using Regex?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm yeah I don't know. I had a performance benchmark setup at some point, let me see if I can maybe measure the impact of the regex on bootstrapping.

return null;

value = parseDateTime(dateString);
if (value == null) {
return null;
}
}
} else if ( value instanceof Long ) {
Long v = (Long) value;
if ( v == Long.MIN_VALUE || (v == 0L && isTimestamp) ) {
if ( config.zeroDatesAsNull )
Expand All @@ -37,4 +58,12 @@ protected String formatValue(Object value, MaxwellOutputConfig config) throws Co
throw new ColumnDefCastException(this, value);
}
}

private Object parseDateTime(String dateString) {
try {
return LocalDateTime.parse(dateString, DATE_TIME_FORMATTER);
} catch (DateTimeParseException e) {
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.zendesk.maxwell.schema.columndef;

public class DateValidator {
private static final String DATE_TIME_REGEX =
"^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])" +
"( (0[0-9]|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]))?$";

public static boolean isValidDateTime(String dateString) {
return dateString.matches(DATE_TIME_REGEX);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,46 @@ public void TestDateTimeZeroDates() throws ColumnDefCastException {
assertEquals(null, d.asJSON(Long.MIN_VALUE, config));
}

@Test
public void TestDateBadMonth() throws ColumnDefCastException {
ColumnDef d = build("date", true);

MaxwellOutputConfig config = new MaxwellOutputConfig();
config.zeroDatesAsNull = true;

assertEquals(null, d.asJSON("2020-00-01", config));
}

@Test
public void TestDateBadDay() throws ColumnDefCastException {
ColumnDef d = build("date", true);

MaxwellOutputConfig config = new MaxwellOutputConfig();
config.zeroDatesAsNull = true;

assertEquals(null, d.asJSON("2020-01-00", config));
}

@Test
public void TestDatetimeBadDay() throws ColumnDefCastException {
ColumnDef d = build("datetime", true);

MaxwellOutputConfig config = new MaxwellOutputConfig();
config.zeroDatesAsNull = true;

assertEquals(null, d.asJSON("2020-01-00 00:00:00", config));
}

@Test
public void TestDatetimeBadMonth() throws ColumnDefCastException {
ColumnDef d = build("datetime", true);

MaxwellOutputConfig config = new MaxwellOutputConfig();
config.zeroDatesAsNull = true;

assertEquals(null, d.asJSON("2020-00-01 00:00:00", config));
}

@Test
public void TestDateTimeWithTimestamp() throws ParseException, ColumnDefCastException {
ColumnDef d = build("datetime", true);
Expand Down