Skip to content

Type mapping rewriting for JDBC integration #505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ffa01fa
Refactor JDBC test files and enhance readJdbc.kt for better Dataframe…
zaleslaw Nov 13, 2023
f5d0eca
Updated test cases for JDBC connection and enhanced the logic for rea…
zaleslaw Nov 15, 2023
5a6c1fd
Updated: Removed method convertDataFromResultSet from DbType class
zaleslaw Nov 15, 2023
71705b9
Add catalogue and limit parameters to readAllSqlTables"
zaleslaw Nov 15, 2023
5c882c3
Update DbType methods and Postgres tests.
zaleslaw Nov 15, 2023
cc152c7
Handle null values in DataFrame filtering and mark code for removal
zaleslaw Nov 15, 2023
0e7f503
Removed redundant DataFrame read methods in readJdbc.kt
zaleslaw Nov 16, 2023
56395c2
Changes include handling potential duplication of column names in SQL…
zaleslaw Nov 16, 2023
dddbf30
Added support for empty tables
zaleslaw Nov 17, 2023
58d23c9
Refactor type conversions for better handling of unsupported SQL types
zaleslaw Nov 17, 2023
cf7c040
Fixed the test with new types generation
zaleslaw Nov 17, 2023
6e92bb2
Merge branch '0.12.1' into fix-jdbc-integration
zaleslaw Nov 27, 2023
55344f2
Added simple Test for JSON column handling
zaleslaw Nov 27, 2023
6495f30
disabled integration tests
zaleslaw Nov 27, 2023
2a7f1fd
Ensure non-nullability of "amount" in Sale, refactor SQL to KType map…
zaleslaw Dec 8, 2023
5d94ac0
Fix nullability check in readJdbc.kt
zaleslaw Dec 11, 2023
32b1d5a
Improve the handling of nullability of database columns when creating…
zaleslaw Dec 13, 2023
fb21cf4
Updated tests for an imdb database
zaleslaw Dec 13, 2023
32da39d
Updated tests for an mysql/mariadb database
zaleslaw Dec 13, 2023
de19f6d
Updated tests for a postgresql database
zaleslaw Dec 13, 2023
9e64d4a
Updated tests for a sqlite database
zaleslaw Dec 13, 2023
af3ff88
Updated tests for a sqlite database
zaleslaw Dec 13, 2023
2361a76
Formatted
zaleslaw Dec 13, 2023
fd73a08
Formattedd
zaleslaw Dec 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType

/**
* The `DbType` class represents a database type used for reading dataframe from the database.
Expand All @@ -22,19 +23,10 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
*/
public abstract val driverClassName: String

/**
* Converts the data from the given [ResultSet] into the specified [TableColumnMetadata] type.
*
* @param rs The [ResultSet] containing the data to be converted.
* @param tableColumnMetadata The [TableColumnMetadata] representing the target type of the conversion.
* @return The converted data as an instance of [Any].
*/
public abstract fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any?

/**
* Returns a [ColumnSchema] produced from [tableColumnMetadata].
*/
public abstract fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema
public abstract fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema?

/**
* Checks if the given table name is a system table for the specified database type.
Expand All @@ -52,4 +44,12 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
* @return the TableMetadata object representing the table metadata.
*/
public abstract fun buildTableMetadata(tables: ResultSet): TableMetadata

/**
* Converts SQL data type to a Kotlin data type.
*
* @param [tableColumnMetadata] The metadata of the table column.
* @return The corresponding Kotlin data type, or null if no mapping is found.
*/
public abstract fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType?
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
Expand All @@ -21,71 +20,8 @@ public object H2 : DbType("h2") {
override val driverClassName: String
get() = "org.h2.Driver"

override fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any? {
val name = tableColumnMetadata.name
return when (tableColumnMetadata.sqlTypeName) {
"CHARACTER", "CHAR" -> rs.getString(name)
"CHARACTER VARYING", "CHAR VARYING", "VARCHAR" -> rs.getString(name)
"CHARACTER LARGE OBJECT", "CHAR LARGE OBJECT", "CLOB" -> rs.getString(name)
"MEDIUMTEXT" -> rs.getString(name)
"VARCHAR_IGNORECASE" -> rs.getString(name)
"BINARY" -> rs.getBytes(name)
"BINARY VARYING", "VARBINARY" -> rs.getBytes(name)
"BINARY LARGE OBJECT", "BLOB" -> rs.getBytes(name)
"BOOLEAN" -> rs.getBoolean(name)
"TINYINT" -> rs.getByte(name)
"SMALLINT" -> rs.getShort(name)
"INTEGER", "INT" -> rs.getInt(name)
"BIGINT" -> rs.getLong(name)
"NUMERIC", "DECIMAL", "DEC" -> rs.getFloat(name) // not a BigDecimal
"REAL", "FLOAT" -> rs.getFloat(name)
"DOUBLE PRECISION" -> rs.getDouble(name)
"DECFLOAT" -> rs.getDouble(name)
"DATE" -> rs.getDate(name).toString()
"TIME" -> rs.getTime(name).toString()
"TIME WITH TIME ZONE" -> rs.getTime(name).toString()
"TIMESTAMP" -> rs.getTimestamp(name).toString()
"TIMESTAMP WITH TIME ZONE" -> rs.getTimestamp(name).toString()
"INTERVAL" -> rs.getObject(name).toString()
"JAVA_OBJECT" -> rs.getObject(name)
"ENUM" -> rs.getString(name)
"JSON" -> rs.getString(name) // TODO: https://github.com/Kotlin/dataframe/issues/462
"UUID" -> rs.getString(name)
else -> throw IllegalArgumentException("Unsupported H2 type: ${tableColumnMetadata.sqlTypeName}")
}
}

override fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
return when (tableColumnMetadata.sqlTypeName) {
"CHARACTER", "CHAR" -> ColumnSchema.Value(typeOf<String>())
"CHARACTER VARYING", "CHAR VARYING", "VARCHAR" -> ColumnSchema.Value(typeOf<String>())
"CHARACTER LARGE OBJECT", "CHAR LARGE OBJECT", "CLOB" -> ColumnSchema.Value(typeOf<String>())
"MEDIUMTEXT" -> ColumnSchema.Value(typeOf<String>())
"VARCHAR_IGNORECASE" -> ColumnSchema.Value(typeOf<String>())
"BINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"BINARY VARYING", "VARBINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"BINARY LARGE OBJECT", "BLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"BOOLEAN" -> ColumnSchema.Value(typeOf<Boolean>())
"TINYINT" -> ColumnSchema.Value(typeOf<Byte>())
"SMALLINT" -> ColumnSchema.Value(typeOf<Short>())
"INTEGER", "INT" -> ColumnSchema.Value(typeOf<Int>())
"BIGINT" -> ColumnSchema.Value(typeOf<Long>())
"NUMERIC", "DECIMAL", "DEC" -> ColumnSchema.Value(typeOf<Float>())
"REAL", "FLOAT" -> ColumnSchema.Value(typeOf<Float>())
"DOUBLE PRECISION" -> ColumnSchema.Value(typeOf<Double>())
"DECFLOAT" -> ColumnSchema.Value(typeOf<Double>())
"DATE" -> ColumnSchema.Value(typeOf<String>())
"TIME" -> ColumnSchema.Value(typeOf<String>())
"TIME WITH TIME ZONE" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP WITH TIME ZONE" -> ColumnSchema.Value(typeOf<String>())
"INTERVAL" -> ColumnSchema.Value(typeOf<String>())
"JAVA_OBJECT" -> ColumnSchema.Value(typeOf<Any>())
"ENUM" -> ColumnSchema.Value(typeOf<String>())
"JSON" -> ColumnSchema.Value(typeOf<String>()) // TODO: https://github.com/Kotlin/dataframe/issues/462
"UUID" -> ColumnSchema.Value(typeOf<String>())
else -> throw IllegalArgumentException("Unsupported H2 type: ${tableColumnMetadata.sqlTypeName} for column ${tableColumnMetadata.name}")
}
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
return null
}

override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
Expand All @@ -99,4 +35,8 @@ public object H2 : DbType("h2") {
tables.getString("TABLE_SCHEM"),
tables.getString("TABLE_CAT"))
}

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
return null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
Expand All @@ -16,73 +17,8 @@ public object MariaDb : DbType("mariadb") {
override val driverClassName: String
get() = "org.mariadb.jdbc.Driver"

override fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any? {
val name = tableColumnMetadata.name
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> rs.getBytes(name)
"TINYINT" -> rs.getInt(name)
"SMALLINT" -> rs.getInt(name)
"MEDIUMINT"-> rs.getInt(name)
"MEDIUMINT UNSIGNED" -> rs.getLong(name)
"INTEGER", "INT" -> rs.getInt(name)
"INTEGER UNSIGNED", "INT UNSIGNED" -> rs.getLong(name)
"BIGINT" -> rs.getLong(name)
"FLOAT" -> rs.getFloat(name)
"DOUBLE" -> rs.getDouble(name)
"DECIMAL" -> rs.getBigDecimal(name)
"DATE" -> rs.getDate(name).toString()
"DATETIME" -> rs.getTimestamp(name).toString()
"TIMESTAMP" -> rs.getTimestamp(name).toString()
"TIME"-> rs.getTime(name).toString()
"YEAR" -> rs.getDate(name).toString()
"VARCHAR", "CHAR" -> rs.getString(name)
"BINARY" -> rs.getBytes(name)
"VARBINARY" -> rs.getBytes(name)
"TINYBLOB"-> rs.getBytes(name)
"BLOB"-> rs.getBytes(name)
"MEDIUMBLOB" -> rs.getBytes(name)
"LONGBLOB" -> rs.getBytes(name)
"TEXT" -> rs.getString(name)
"MEDIUMTEXT" -> rs.getString(name)
"LONGTEXT" -> rs.getString(name)
"ENUM" -> rs.getString(name)
"SET" -> rs.getString(name)
else -> throw IllegalArgumentException("Unsupported MariaDB type: ${tableColumnMetadata.sqlTypeName}")
}
}

override fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYINT" -> ColumnSchema.Value(typeOf<Int>())
"SMALLINT" -> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT"-> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"INTEGER", "INT" -> ColumnSchema.Value(typeOf<Int>())
"INTEGER UNSIGNED", "INT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"BIGINT" -> ColumnSchema.Value(typeOf<Long>())
"FLOAT" -> ColumnSchema.Value(typeOf<Float>())
"DOUBLE" -> ColumnSchema.Value(typeOf<Double>())
"DECIMAL" -> ColumnSchema.Value(typeOf<Double>())
"DATE" -> ColumnSchema.Value(typeOf<String>())
"DATETIME" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP" -> ColumnSchema.Value(typeOf<String>())
"TIME"-> ColumnSchema.Value(typeOf<String>())
"YEAR" -> ColumnSchema.Value(typeOf<String>())
"VARCHAR", "CHAR" -> ColumnSchema.Value(typeOf<String>())
"BINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"VARBINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYBLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"BLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"MEDIUMBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"LONGBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"TEXT" -> ColumnSchema.Value(typeOf<String>())
"MEDIUMTEXT" -> ColumnSchema.Value(typeOf<String>())
"LONGTEXT" -> ColumnSchema.Value(typeOf<String>())
"ENUM" -> ColumnSchema.Value(typeOf<String>())
"SET" -> ColumnSchema.Value(typeOf<String>())
else -> throw IllegalArgumentException("Unsupported MariaDB type: ${tableColumnMetadata.sqlTypeName} for column ${tableColumnMetadata.name}")
}
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
return null
}

override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
Expand All @@ -95,4 +31,8 @@ public object MariaDb : DbType("mariadb") {
tables.getString("table_schem"),
tables.getString("table_cat"))
}

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
return null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import java.util.Locale
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
Expand All @@ -19,79 +20,8 @@ public object MySql : DbType("mysql") {
override val driverClassName: String
get() = "com.mysql.jdbc.Driver"

override fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any? {
val name = tableColumnMetadata.name
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> rs.getBytes(name)
"TINYINT" -> rs.getInt(name)
"SMALLINT" -> rs.getInt(name)
"MEDIUMINT"-> rs.getInt(name)
"MEDIUMINT UNSIGNED" -> rs.getLong(name)
"INTEGER", "INT" -> rs.getInt(name)
"INTEGER UNSIGNED", "INT UNSIGNED" -> rs.getLong(name)
"BIGINT" -> rs.getLong(name)
"FLOAT" -> rs.getFloat(name)
"DOUBLE" -> rs.getDouble(name)
"DECIMAL" -> rs.getBigDecimal(name)
"DATE" -> rs.getDate(name).toString()
"DATETIME" -> rs.getTimestamp(name).toString()
"TIMESTAMP" -> rs.getTimestamp(name).toString()
"TIME"-> rs.getTime(name).toString()
"YEAR" -> rs.getDate(name).toString()
"VARCHAR", "CHAR" -> rs.getString(name)
"BINARY" -> rs.getBytes(name)
"VARBINARY" -> rs.getBytes(name)
"TINYBLOB"-> rs.getBytes(name)
"BLOB"-> rs.getBytes(name)
"MEDIUMBLOB" -> rs.getBytes(name)
"LONGBLOB" -> rs.getBytes(name)
"TEXT" -> rs.getString(name)
"MEDIUMTEXT" -> rs.getString(name)
"LONGTEXT" -> rs.getString(name)
"ENUM" -> rs.getString(name)
"SET" -> rs.getString(name)
// special mysql types
"JSON" -> rs.getString(name) // TODO: https://github.com/Kotlin/dataframe/issues/462
"GEOMETRY" -> rs.getBytes(name)
else -> throw IllegalArgumentException("Unsupported MySQL type: ${tableColumnMetadata.sqlTypeName}")
}
}

override fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYINT" -> ColumnSchema.Value(typeOf<Int>())
"SMALLINT" -> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT"-> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"INTEGER", "INT" -> ColumnSchema.Value(typeOf<Int>())
"INTEGER UNSIGNED", "INT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"BIGINT" -> ColumnSchema.Value(typeOf<Long>())
"FLOAT" -> ColumnSchema.Value(typeOf<Float>())
"DOUBLE" -> ColumnSchema.Value(typeOf<Double>())
"DECIMAL" -> ColumnSchema.Value(typeOf<Double>())
"DATE" -> ColumnSchema.Value(typeOf<String>())
"DATETIME" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP" -> ColumnSchema.Value(typeOf<String>())
"TIME"-> ColumnSchema.Value(typeOf<String>())
"YEAR" -> ColumnSchema.Value(typeOf<String>())
"VARCHAR", "CHAR" -> ColumnSchema.Value(typeOf<String>())
"BINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"VARBINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYBLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"BLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"MEDIUMBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"LONGBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"TEXT" -> ColumnSchema.Value(typeOf<String>())
"MEDIUMTEXT" -> ColumnSchema.Value(typeOf<String>())
"LONGTEXT" -> ColumnSchema.Value(typeOf<String>())
"ENUM" -> ColumnSchema.Value(typeOf<String>())
"SET" -> ColumnSchema.Value(typeOf<String>())
// special mysql types
"JSON" -> ColumnSchema.Value(typeOf<ColumnGroup<DataRow<String>>>()) // TODO: https://github.com/Kotlin/dataframe/issues/462
"GEOMETRY" -> ColumnSchema.Value(typeOf<ByteArray>())
else -> throw IllegalArgumentException("Unsupported MySQL type: ${tableColumnMetadata.sqlTypeName} for column ${tableColumnMetadata.name}")
}
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
return null
}

override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
Expand All @@ -116,4 +46,8 @@ public object MySql : DbType("mysql") {
tables.getString("table_schem"),
tables.getString("table_cat"))
}

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
return null
}
}
Loading