Kotlin · zaleslaw · Oct 6, 2023 · Jul 17, 2023 · Jul 17, 2023 · Jul 17, 2023
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -37,6 +37,7 @@ dependencies {
     api(project(":dataframe-arrow"))
     api(project(":dataframe-excel"))
     api(project(":dataframe-openapi"))
+    api(project(":dataframe-jdbc"))
 }
 
 allprojects {

diff --git a/...d-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/...d-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt
@@ -8,21 +8,24 @@ import org.jetbrains.kotlinx.dataframe.io.JSON
 
 /**
  * Annotation preprocessing will generate a DataSchema interface from the data at `path`.
- * Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON.
+ * Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON, JDBC.
  * Generated data schema has properties inferred from data and a companion object with `read method`.
  * `read method` is either `readCSV` or `readJson` that returns `DataFrame<name>`
  *
  * @param name name of the generated interface
  * @param path URL or relative path to data.
- * if path starts with protocol (http, https, ftp), it's considered a URL. Otherwise, it's treated as relative path.
+ * If a path starts with protocol (http, https, ftp, jdbc), it's considered a URL.
+ * Otherwise, it's treated as a relative path.
  * By default, it will be resolved relatively to project dir, i.e. File(projectDir, path)
- * You can configure it by passing `dataframe.resolutionDir` option to preprocessor, see https://kotlinlang.org/docs/ksp-quickstart.html#pass-options-to-processors
+ * You can configure it by passing `dataframe.resolutionDir` option to preprocessor,
+ * see https://kotlinlang.org/docs/ksp-quickstart.html#pass-options-to-processors
  * @param visibility visibility of the generated interface.
  * @param normalizationDelimiters if not empty, split property names by delimiters,
  * lowercase parts and join to camel case. Set empty list to disable normalization
  * @param withDefaultPath if `true`, generate `defaultPath` property to the data schema's companion object and make it default argument for a `read method`
  * @param csvOptions options to parse CSV data. Not used when data is not Csv
  * @param jsonOptions options to parse JSON data. Not used when data is not Json
+ * @param jdbcOptions options to parse data from a database via JDBC. Not used when data is not stored in the database
  */
 @Retention(AnnotationRetention.SOURCE)
 @Target(AnnotationTarget.FILE)
@@ -35,6 +38,7 @@ public annotation class ImportDataSchema(
     val withDefaultPath: Boolean = true,
     val csvOptions: CsvOptions = CsvOptions(','),
     val jsonOptions: JsonOptions = JsonOptions(),
+    val jdbcOptions: JdbcOptions = JdbcOptions(),
 )
 
 public enum class DataSchemaVisibility {
@@ -45,6 +49,12 @@ public annotation class CsvOptions(
     public val delimiter: Char,
 )
 
+public annotation class JdbcOptions(
+    public val user: String = "", // TODO: I'm not sure about the default parameters
+    public val password: String = "", // TODO: I'm not sure about the default parameters)
+    public val sqlQuery: String = ""
+)
+
 public annotation class JsonOptions(
 
     /** Allows the choice of how to handle type clashes when reading a JSON file. */

diff --git a/...d-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt b/...d-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt
@@ -24,6 +24,7 @@ private const val verify = "verify" // cast(true) is obscure, i think it's bette
 private const val readCSV = "readCSV"
 private const val readTSV = "readTSV"
 private const val readJson = "readJson"
+private const val readJdbc = "readJdbc"
 
 public abstract class AbstractDefaultReadMethod(
     private val path: String?,

diff --git a/...ources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/DataFrameSchemaImpl.kt b/...ources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/DataFrameSchemaImpl.kt
@@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
 import org.jetbrains.kotlinx.dataframe.schema.CompareResult
 import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
 
-internal class DataFrameSchemaImpl(override val columns: Map<String, ColumnSchema>) : DataFrameSchema {
+public class DataFrameSchemaImpl(override val columns: Map<String, ColumnSchema>) : DataFrameSchema {
 
     override fun compare(other: DataFrameSchema): CompareResult {
         require(other is DataFrameSchemaImpl)

diff --git a/.../generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/.../generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt
@@ -169,14 +169,15 @@ internal class Integration(
         if (version != null) {
             dependencies(
                 "org.jetbrains.kotlinx:dataframe-excel:$version",
+                "org.jetbrains.kotlinx:dataframe-jdbc:$version",
                 "org.jetbrains.kotlinx:dataframe-arrow:$version",
                 "org.jetbrains.kotlinx:dataframe-openapi:$version",
             )
         }
 
         try {
             setMinimalKernelVersion(MIN_KERNEL_VERSION)
-        } catch (_: NoSuchMethodError) { // will be thrown on version < 0.11.0.198
+        } catch (_: NoSuchMethodError) { // will be thrown when a version < 0.11.0.198
             throw IllegalStateException(
                 getKernelUpdateMessage(notebook.kernelVersion, MIN_KERNEL_VERSION, notebook.jupyterClientType)
             )

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt
@@ -8,21 +8,24 @@ import org.jetbrains.kotlinx.dataframe.io.JSON
 
 /**
  * Annotation preprocessing will generate a DataSchema interface from the data at `path`.
- * Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON.
+ * Data must be of supported format: CSV, JSON, Apache Arrow, Excel, OpenAPI (Swagger) in YAML/JSON, JDBC.
  * Generated data schema has properties inferred from data and a companion object with `read method`.
  * `read method` is either `readCSV` or `readJson` that returns `DataFrame<name>`
  *
  * @param name name of the generated interface
  * @param path URL or relative path to data.
- * if path starts with protocol (http, https, ftp), it's considered a URL. Otherwise, it's treated as relative path.
+ * If a path starts with protocol (http, https, ftp, jdbc), it's considered a URL.
+ * Otherwise, it's treated as a relative path.
  * By default, it will be resolved relatively to project dir, i.e. File(projectDir, path)
- * You can configure it by passing `dataframe.resolutionDir` option to preprocessor, see https://kotlinlang.org/docs/ksp-quickstart.html#pass-options-to-processors
+ * You can configure it by passing `dataframe.resolutionDir` option to preprocessor,
+ * see https://kotlinlang.org/docs/ksp-quickstart.html#pass-options-to-processors
  * @param visibility visibility of the generated interface.
  * @param normalizationDelimiters if not empty, split property names by delimiters,
  * lowercase parts and join to camel case. Set empty list to disable normalization
  * @param withDefaultPath if `true`, generate `defaultPath` property to the data schema's companion object and make it default argument for a `read method`
  * @param csvOptions options to parse CSV data. Not used when data is not Csv
  * @param jsonOptions options to parse JSON data. Not used when data is not Json
+ * @param jdbcOptions options to parse data from a database via JDBC. Not used when data is not stored in the database
  */
 @Retention(AnnotationRetention.SOURCE)
 @Target(AnnotationTarget.FILE)
@@ -35,6 +38,7 @@ public annotation class ImportDataSchema(
     val withDefaultPath: Boolean = true,
     val csvOptions: CsvOptions = CsvOptions(','),
     val jsonOptions: JsonOptions = JsonOptions(),
+    val jdbcOptions: JdbcOptions = JdbcOptions(),
 )
 
 public enum class DataSchemaVisibility {
@@ -45,6 +49,12 @@ public annotation class CsvOptions(
     public val delimiter: Char,
 )
 
+public annotation class JdbcOptions(
+    public val user: String = "", // TODO: I'm not sure about the default parameters
+    public val password: String = "", // TODO: I'm not sure about the default parameters)
+    public val sqlQuery: String = ""
+)
+
 public annotation class JsonOptions(
 
     /** Allows the choice of how to handle type clashes when reading a JSON file. */

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt
@@ -24,6 +24,7 @@ private const val verify = "verify" // cast(true) is obscure, i think it's bette
 private const val readCSV = "readCSV"
 private const val readTSV = "readTSV"
 private const val readJson = "readJson"
+private const val readJdbc = "readJdbc"
 
 public abstract class AbstractDefaultReadMethod(
     private val path: String?,

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/DataFrameSchemaImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/DataFrameSchemaImpl.kt
@@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
 import org.jetbrains.kotlinx.dataframe.schema.CompareResult
 import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
 
-internal class DataFrameSchemaImpl(override val columns: Map<String, ColumnSchema>) : DataFrameSchema {
+public class DataFrameSchemaImpl(override val columns: Map<String, ColumnSchema>) : DataFrameSchema {
 
     override fun compare(other: DataFrameSchema): CompareResult {
         require(other is DataFrameSchemaImpl)

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt
@@ -169,14 +169,15 @@ internal class Integration(
         if (version != null) {
             dependencies(
                 "org.jetbrains.kotlinx:dataframe-excel:$version",
+                "org.jetbrains.kotlinx:dataframe-jdbc:$version",
                 "org.jetbrains.kotlinx:dataframe-arrow:$version",
                 "org.jetbrains.kotlinx:dataframe-openapi:$version",
             )
         }
 
         try {
             setMinimalKernelVersion(MIN_KERNEL_VERSION)
-        } catch (_: NoSuchMethodError) { // will be thrown on version < 0.11.0.198
+        } catch (_: NoSuchMethodError) { // will be thrown when a version < 0.11.0.198
             throw IllegalStateException(
                 getKernelUpdateMessage(notebook.kernelVersion, MIN_KERNEL_VERSION, notebook.jupyterClientType)
             )

diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt
@@ -57,7 +57,7 @@ internal class DefaultReadExcelMethod(path: String?) : AbstractDefaultReadMethod
 private const val readExcel = "readExcel"
 
 /**
- * @param sheetName sheet to read. By default, first sheet in the document
+ * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
@@ -77,7 +77,7 @@ public fun DataFrame.Companion.readExcel(
 }
 
 /**
- * @param sheetName sheet to read. By default, first sheet in the document
+ * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
@@ -97,7 +97,7 @@ public fun DataFrame.Companion.readExcel(
 }
 
 /**
- * @param sheetName sheet to read. By default, first sheet in the document
+ * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
@@ -114,7 +114,7 @@ public fun DataFrame.Companion.readExcel(
 ): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
 
 /**
- * @param sheetName sheet to read. By default, first sheet in the document
+ * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
@@ -134,7 +134,7 @@ public fun DataFrame.Companion.readExcel(
 }
 
 /**
- * @param sheetName sheet to read. By default, first sheet in the document
+ * @param sheetName sheet to read. By default, the first sheet in the document
  * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
  * @param skipRows number of rows before header
  * @param rowsCount number of rows to read.
@@ -446,18 +446,18 @@ private fun Cell.setCellValueByGuessedType(any: Any) {
 
 /**
  * Set LocalDateTime value correctly also if date have zero value in Excel.
- * Zero date is usually used fore storing time component only,
- * is displayed as 00.01.1900 in Excel and as 30.12.1899 in LibreOffice Calc and also in POI.
+ * Zero dates are usually used for storing a time component only,
+ * are displayed as 00.01.1900 in Excel and as 30.12.1899 in LibreOffice Calc and also in POI.
  * POI can not set 1899 year directly.
  */
 private fun Cell.setTime(localDateTime: LocalDateTime) {
     this.setCellValue(DateUtil.getExcelDate(localDateTime.plusDays(1)) - 1.0)
 }
 
 /**
- * Set Date value correctly also if date have zero value in Excel.
- * Zero date is usually used fore storing time component only,
- * is displayed as 00.01.1900 in Excel and as 30.12.1899 in LibreOffice Calc and also in POI.
+ * Set Date value correctly also if date has zero value in Excel.
+ * Zero dates are usually used for storing a time component only,
+ * are displayed as 00.01.1900 in Excel and as 30.12.1899 in LibreOffice Calc and also in POI.
  * POI can not set 1899 year directly.
  */
 private fun Cell.setDate(date: Date) {

diff --git a/dataframe-jdbc/build.gradle.kts b/dataframe-jdbc/build.gradle.kts
@@ -0,0 +1,43 @@
+plugins {
+    kotlin("jvm")
+    kotlin("libs.publisher")
+    id("org.jetbrains.kotlinx.kover")
+    kotlin("jupyter.api")
+}
+
+group = "org.jetbrains.kotlinx"
+
+val jupyterApiTCRepo: String by project
+
+repositories {
+    mavenCentral()
+    maven(jupyterApiTCRepo)
+}
+
+dependencies {
+    api(project(":core"))
+    implementation(libs.mariadb)
+    implementation(libs.kotlinLogging)
+    testImplementation(libs.sqlite)
+    testImplementation(libs.postgresql)
+    testImplementation(libs.mysql)
+    testImplementation(libs.h2db)
+    testImplementation(libs.junit)
+    testImplementation(libs.sl4j)
+    testImplementation(libs.kotestAssertions) {
+        exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
+    }
+}
+
+kotlinPublications {
+    publication {
+        publicationName.set("dataframeJDBC")
+        artifactId.set(project.name)
+        description.set("JDBC support for Kotlin Dataframe")
+        packageName.set(artifactId)
+    }
+}
+
+kotlin {
+    explicitApi()
+}
diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/Jdbc.kt
@@ -0,0 +1,53 @@
+package org.jetbrains.kotlinx.dataframe.io
+
+import org.jetbrains.kotlinx.dataframe.AnyFrame
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
+import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
+import org.jetbrains.kotlinx.jupyter.api.Code
+import java.io.File
+import java.io.InputStream
+
+// TODO: https://github.com/Kotlin/dataframe/issues/450
+public class Jdbc : SupportedCodeGenerationFormat, SupportedDataFrameFormat {
+    public override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame = DataFrame.readJDBC(stream)
+
+    public override fun readDataFrame(file: File, header: List<String>): AnyFrame = DataFrame.readJDBC(file)
+    override fun readCodeForGeneration(
+        stream: InputStream,
+        name: String,
+        generateHelperCompanionObject: Boolean
+    ): Code {
+        TODO("Not yet implemented")
+    }
+
+    override fun readCodeForGeneration(
+        file: File,
+        name: String,
+        generateHelperCompanionObject: Boolean
+    ): Code {
+        TODO("Not yet implemented")
+    }
+
+    override fun acceptsExtension(ext: String): Boolean = ext == "jdbc"
+
+    override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
+
+    override val testOrder: Int = 40000
+
+    override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
+        return DefaultReadJdbcMethod(pathRepresentation)
+    }
+}
+
+private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> {
+    TODO("Not yet implemented")
+}
+
+private fun DataFrame.Companion.readJDBC(stream: InputStream): DataFrame<*> {
+    TODO("Not yet implemented")
+}
+
+internal class DefaultReadJdbcMethod(path: String?) : AbstractDefaultReadMethod(path, MethodArguments.EMPTY, readJDBC)
+
+private const val readJDBC = "readJDBC"
diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt
@@ -0,0 +1,44 @@
+package org.jetbrains.kotlinx.dataframe.io.db
+
+import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
+import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
+import java.sql.ResultSet
+import org.jetbrains.kotlinx.dataframe.io.TableMetadata
+
+/**
+ * The `DbType` class represents a database type used for reading dataframe from the database.
+ *
+ * @property [dbTypeInJdbcUrl] The name of the database as specified in the JDBC URL.
+ */
+public abstract class DbType(public val dbTypeInJdbcUrl: String) {
+    /**
+     * Converts the data from the given [ResultSet] into the specified [TableColumnMetadata] type.
+     *
+     * @param rs The [ResultSet] containing the data to be converted.
+     * @param tableColumnMetadata The [TableColumnMetadata] representing the target type of the conversion.
+     * @return The converted data as an instance of [Any].
+     */
+    public abstract fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any?
+
+    /**
+     * Returns a [ColumnSchema] produced from [tableColumnMetadata].
+     */
+    public abstract fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema
+
+    /**
+     * Checks if the given table name is a system table for the specified database type.
+     *
+     * @param [tableMetadata] the table object representing the table from the database.
+     * @param [dbType] the database type to check against.
+     * @return True if the table is a system table for the specified database type, false otherwise.
+     */
+    public abstract fun isSystemTable(tableMetadata: TableMetadata): Boolean
+
+    /**
+     * Builds the table metadata based on the database type and the ResultSet from the query.
+     *
+     * @param [tables] the ResultSet containing the table's meta-information.
+     * @return the TableMetadata object representing the table metadata.
+     */
+    public abstract fun buildTableMetadata(tables: ResultSet): TableMetadata
+}