Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#2357] improvement(trino): Fix varchar type mapping between Gravitino JDBC catalog and Trino #2358

Merged
merged 17 commits into from
Mar 11, 2024
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
CREATE SCHEMA "test.gt_mysql".varchar_db1;

USE "test.gt_mysql".varchar_db1;

CREATE TABLE tb01 (id int, name char(20));

SHOW CREATE TABLE "test.gt_mysql".varchar_db1.tb01;

CREATE TABLE tb02 (id int, name char(255));

SHOW CREATE TABLE "test.gt_mysql".varchar_db1.tb02;

CREATE TABLE tb03 (id int, name char(256));

CREATE TABLE tb04 (id int, name varchar(250));

SHOW CREATE TABLE "test.gt_mysql".varchar_db1.tb04;

CREATE TABLE tb05 (id int, name varchar(256));

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing the test cases:

CREATE TABLE tb01 (id int, name char);
CREATE TABLE tb01 (id int, name varchar);

SHOW CREATE TABLE "test.gt_mysql".varchar_db1.tb05;

drop table "test.gt_mysql".varchar_db1.tb01;

drop table "test.gt_mysql".varchar_db1.tb02;

drop table "test.gt_mysql".varchar_db1.tb04;

drop table "test.gt_mysql".varchar_db1.tb05;

drop schema "test.gt_mysql".varchar_db1;

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
CREATE SCHEMA

USE

CREATE TABLE

"CREATE TABLE ""test.gt_mysql"".varchar_db1.tb01 (
id integer,
name char(20)
)
COMMENT ''
WITH (
engine = 'InnoDB'
)"

CREATE TABLE

"CREATE TABLE ""test.gt_mysql"".varchar_db1.tb02 (
id integer,
name char(255)
)
COMMENT ''
WITH (
engine = 'InnoDB'
)"

<QUERY_FAILED> MySQL does not support the datatype CHAR with the length greater than 255

CREATE TABLE

"CREATE TABLE ""test.gt_mysql"".varchar_db1.tb04 (
id integer,
name varchar(250)
)
COMMENT ''
WITH (
engine = 'InnoDB'
)"

CREATE TABLE

"CREATE TABLE ""test.gt_mysql"".varchar_db1.tb05 (
id integer,
name varchar(256)
)
COMMENT ''
WITH (
engine = 'InnoDB'
)"

DROP TABLE

DROP TABLE

DROP TABLE

DROP TABLE

DROP SCHEMA
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ INSERT: 15000 rows
"Trino version: %
%
└─ TableScan[table = test.gt_postgresql:gt_db1.customer gt_db1.customer constraints=[ParameterizedExpression[expression=(""phone"") LIKE (?), parameters=[QueryParameter{jdbcType=Optional.empty, type=varchar(6), value=Optional[Slice{base=[B@%, baseOffset=0, length=6}]}]]] limit=10]
Layout: [custkey:bigint, name:varchar, address:varchar, nationkey:bigint, phone:varchar, acctbal:decimal(12,2), mktsegment:varchar, comment:varchar]
Layout: [custkey:bigint, name:varchar(25), address:varchar(40), nationkey:bigint, phone:varchar(15), acctbal:decimal(12,2), mktsegment:varchar(10), comment:varchar(117)]
%
"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ USE
CREATE TABLE

"CREATE TABLE ""test.gt_postgresql"".gt_db1.tb01 (
f1 varchar,
f2 varchar,
f1 varchar(200),
f2 char(20),
f3 varbinary,
f4 decimal(10, 3),
f5 real,
Expand All @@ -26,14 +26,14 @@ INSERT: 1 row

INSERT: 1 row

"Sample text 1","Text1","65","123.456","7.89","12.34","false","100","1000","1000","100000","2024-01-01","08:00:00.000","2024-01-01 08:00:00.000"
"Sample text 1","Text1 ","65","123.456","7.89","12.34","false","100","1000","1000","100000","2024-01-01","08:00:00.000","2024-01-01 08:00:00.000"
"","","","","","","","","","","","","",""

CREATE TABLE

"CREATE TABLE ""test.gt_postgresql"".gt_db1.tb02 (
f1 varchar NOT NULL,
f2 varchar NOT NULL,
f1 varchar(200) NOT NULL,
f2 char(20) NOT NULL,
f3 varbinary NOT NULL,
f4 decimal(10, 3) NOT NULL,
f5 real NOT NULL,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
CREATE SCHEMA "test.gt_postgresql".varchar_db1;

USE "test.gt_postgresql".varchar_db1;

CREATE TABLE tb01 (id int, name char(20));

SHOW CREATE TABLE "test.gt_postgresql".varchar_db1.tb01;

CREATE TABLE tb02 (id int, name char(65536));

SHOW CREATE TABLE "test.gt_postgresql".varchar_db1.tb02;

CREATE TABLE tb03 (id int, name char(65537));

CREATE TABLE tb04 (id int, name varchar(250));

SHOW CREATE TABLE "test.gt_postgresql".varchar_db1.tb04;

CREATE TABLE tb05 (id int, name varchar(10485760));

SHOW CREATE TABLE "test.gt_postgresql".varchar_db1.tb05;

CREATE TABLE tb06 (id int, name varchar(10485761));

SHOW CREATE TABLE "test.gt_postgresql".varchar_db1.tb06;

drop table "test.gt_postgresql".varchar_db1.tb01;

drop table "test.gt_postgresql".varchar_db1.tb02;

drop table "test.gt_postgresql".varchar_db1.tb04;

drop table "test.gt_postgresql".varchar_db1.tb05;

drop schema "test.gt_postgresql".varchar_db1;

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
CREATE SCHEMA

USE

CREATE TABLE

"CREATE TABLE ""test.gt_postgresql"".varchar_db1.tb01 (
id integer,
name char(20)
)
COMMENT ''"

"CREATE TABLE ""test.gt_postgresql"".varchar_db1.tb02 (
id integer,
name char(65536)
)
COMMENT ''"

<QUERY_FAILED> Unknown type 'char(65537)' for column 'name'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the error message should not be understandable by the user

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message is from Trino itself, not from the Gravitino connector.


CREATE TABLE

"CREATE TABLE ""test.gt_postgresql"".varchar_db1.tb04 (
id integer,
name varchar(250)
)
COMMENT ''"

CREATE TABLE

"CREATE TABLE ""test.gt_postgresql"".varchar_db1.tb05 (
id integer,
name varchar(10485760)
)
COMMENT ''"

<QUERY_FAILED> Failed to create table.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we show more detailed messages for the error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to handle this special case to provide more error information, so I have omitted it.


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing line 25 command output, Does it still work with subsequent commands?

SHOW CREATE TABLE "test.gt_postgresql".varchar_db1.tb06;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

DROP TABLE

DROP TABLE

DROP TABLE

DROP TABLE

DROP SCHEMA
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ public void createTable(GravitinoTable table) {
throw new TrinoException(GRAVITINO_SCHEMA_NOT_EXISTS, SCHEMA_DOES_NOT_EXIST_MSG, e);
} catch (TableAlreadyExistsException e) {
throw new TrinoException(GRAVITINO_TABLE_ALREADY_EXISTS, "Table already exists", e);
} catch (Exception e) {
throw new TrinoException(GRAVITINO_OPERATION_FAILED, "Failed to create table.", e.getCause());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,65 @@
import com.datastrato.gravitino.trino.connector.GravitinoErrorCode;
import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer;
import io.trino.spi.TrinoException;
import io.trino.spi.type.CharType;

/** Type transformer between MySQL and Trino */
public class MySQLDataTypeTransformer extends GeneralDataTypeTransformer {
private static final int MYSQL_CHAR_LENGTH_LIMIT = 255;
// 65535 / 4 = 16383, in fact, MySQL limit the row size to 65535, and the utf8mb4 character set
// uses 4 bytes per character. In fact, if a row has several varchar columns, the length of each
// column should be less than 16383. For more details, please refer to
// https://dev.mysql.com/doc/refman/8.0/en/char.html
private static final int MYSQL_VARCHAR_LENGTH_LIMIT = 16383;

yuqi1129 marked this conversation as resolved.
Show resolved Hide resolved
@Override
public io.trino.spi.type.Type getTrinoType(Type type) {
if (type.name() == Name.STRING) {
return io.trino.spi.type.VarcharType.createUnboundedVarcharType();
}

return super.getTrinoType(type);
}

@Override
public Type getGravitinoType(io.trino.spi.type.Type type) {
Type gravitinoType = super.getGravitinoType(type);
if (gravitinoType.name() == Name.VARCHAR) {
if (((Types.VarCharType) gravitinoType).length() > MYSQL_CHAR_LENGTH_LIMIT) {
Class<? extends io.trino.spi.type.Type> typeClass = type.getClass();
if (typeClass == io.trino.spi.type.CharType.class) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we use the instanceof operator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have any preference about instanceof and getClass(), as we will judge the type condition twice, so I think the the temporary variant and use the format == may be simple.

CharType charType = (CharType) type;
if (charType.getLength() > MYSQL_CHAR_LENGTH_LIMIT) {
throw new TrinoException(
GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT,
"MySQL does not support the datatype CHAR with the length greater than "
+ MYSQL_CHAR_LENGTH_LIMIT);
}

// We do not support the CHAR without a length.
if (charType.getLength() == 0) {
throw new TrinoException(
GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT,
"MySQL does not support the datatype CHAR with the length 0");
}

return Types.FixedCharType.of(charType.getLength());
} else if (typeClass == io.trino.spi.type.VarcharType.class) {
io.trino.spi.type.VarcharType varcharType = (io.trino.spi.type.VarcharType) type;

// If the length is not specified, it is a VARCHAR without length, we convert it to a string
// type.
if (varcharType.getLength().isEmpty()) {
return Types.StringType.get();
}
}

if (gravitinoType.name() == Name.FIXEDCHAR) {
if (((Types.FixedCharType) gravitinoType).length() > MYSQL_CHAR_LENGTH_LIMIT) {
int length = varcharType.getLength().get();
if (length > MYSQL_VARCHAR_LENGTH_LIMIT) {
throw new TrinoException(
GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT,
"MySQL does not support the datatype CHAR with the length greater than 255");
"MySQL does not support the datatype VARCHAR with the length greater than "
+ MYSQL_VARCHAR_LENGTH_LIMIT);
}
return Types.VarCharType.of(length);
}
return gravitinoType;

return super.getGravitinoType(type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,55 @@
import com.datastrato.gravitino.rel.types.Type;
import com.datastrato.gravitino.rel.types.Type.Name;
import com.datastrato.gravitino.rel.types.Types;
import com.datastrato.gravitino.trino.connector.GravitinoErrorCode;
import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer;
import io.trino.spi.TrinoException;
import io.trino.spi.type.CharType;

/** Type transformer between PostgreSQL and Trino */
public class PostgreSQLDataTypeTransformer extends GeneralDataTypeTransformer {
private static final int POSTGRESQL_CHAR_LENGTH_LIMIT = 10485760;
// 1 GB, please refer to
// https://stackoverflow.com/questions/70785582/is-a-varchar-unlimited-in-postgresql
private static final int POSTGRESQL_VARCHAR_LENGTH_LIMIT = 10485760;

@Override
public io.trino.spi.type.Type getTrinoType(Type type) {
if (type.name() == Name.STRING) {
return io.trino.spi.type.VarcharType.createUnboundedVarcharType();
}

return super.getTrinoType(type);
}

@Override
public Type getGravitinoType(io.trino.spi.type.Type type) {
Type gravitinoType = super.getGravitinoType(type);
if (gravitinoType.name() == Name.VARCHAR || gravitinoType.name() == Name.FIXEDCHAR) {
return Types.StringType.get();
Class<? extends io.trino.spi.type.Type> typeClass = type.getClass();
if (typeClass == io.trino.spi.type.CharType.class) {
CharType charType = (CharType) type;

// Do not need to check the scenario that the length of the CHAR type is greater than
// POSTGRESQL_CHAR_LENGTH_LIMIT ,because the length of the CHAR type in Trino is no greater
// than 65536 We do not support the CHAR without a length.
if (charType.getLength() == 0) {
throw new TrinoException(
GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT,
"PostgreSQL does not support the datatype CHAR with the length 0");
}

return Types.FixedCharType.of(charType.getLength());
} else if (typeClass == io.trino.spi.type.VarcharType.class) {
io.trino.spi.type.VarcharType varcharType = (io.trino.spi.type.VarcharType) type;

// If the length is not specified, it is a VARCHAR without length, we convert it to a string
// type.
if (varcharType.getLength().isEmpty()) {
return Types.StringType.get();
}

return Types.VarCharType.of(varcharType.getLength().get());
}
return gravitinoType;

return super.getGravitinoType(type);
}
}
Loading
Loading