Skip to content

Improve codegen for stdlib <-> df interop workflow #763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 8, 2024
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.dataframe.impl.codeGen.CodeGenerator
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
import org.jetbrains.kotlinx.dataframe.impl.codeGen.from

public inline fun <reified T> DataFrame<T>.generateCode(
fields: Boolean = true,
extensionProperties: Boolean = true,
): CodeString {
val name = markerName<T>()
return generateCode(name, fields, extensionProperties)
}

public fun <T> DataFrame<T>.generateCode(
markerName: String,
fields: Boolean = true,
extensionProperties: Boolean = true,
visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC,
): CodeString {
val codeGen = CodeGenerator.create()
return codeGen.generate(
schema = schema(),
name = markerName,
fields = fields,
extensionProperties = extensionProperties,
isOpen = true,
visibility = visibility,
).code.declarations.toCodeString()
}

public inline fun <reified T> DataFrame<T>.generateInterfaces(): CodeString = generateCode(
fields = true,
extensionProperties = false
)

public inline fun <reified T> DataFrame<T>.generateDataClasses(
markerName: String? = null,
extensionProperties: Boolean = false,
visibility: MarkerVisibility = MarkerVisibility.IMPLICIT_PUBLIC,
useFqNames: Boolean = false,
nameNormalizer: NameNormalizer = NameNormalizer.default,
): CodeString {
val name = markerName ?: markerName<T>()
val codeGen = CodeGenerator.create(useFqNames)
return codeGen.generate(
schema = schema(),
name = name,
fields = true,
extensionProperties = extensionProperties,
isOpen = false,
visibility = visibility,
asDataClass = true,
fieldNameNormalizer = nameNormalizer
).code.declarations.toCodeString()
}

@PublishedApi
internal inline fun <reified T> markerName(): String = if (T::class.isAbstract) {
T::class.simpleName!!
} else "DataEntry"

public fun <T> DataFrame<T>.generateInterfaces(markerName: String): CodeString = generateCode(
markerName = markerName,
fields = true,
extensionProperties = false
)

/**
* Converts delimited 'my_name', 'my name', etc., String to camelCase 'myName'
*/
public val NameNormalizer.Companion.default: NameNormalizer get() = NameNormalizer.from(setOf('\t', ' ', '_'))

@JvmInline
public value class CodeString(public val value: String) {
override fun toString(): String = value
}

@PublishedApi
internal fun String.toCodeString(): CodeString = CodeString(this)
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,5 @@ public fun <T, G> GroupBy<T, G>.print(): Unit = println(this)
public fun DataFrameSchema.print(): Unit = println(this)

// endregion

public fun CodeString.print(): Unit = println(this)
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public interface CodeGenerator : ExtensionsCodeGenerator {
knownMarkers: Iterable<Marker> = emptyList(),
readDfMethod: DefaultReadDfMethod? = null,
fieldNameNormalizer: NameNormalizer = NameNormalizer.id(),
asDataClass: Boolean = false
): CodeGenResult

public fun generate(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ import org.jetbrains.kotlinx.dataframe.impl.codeGen.needsQuoting
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema

public sealed interface FieldType {
public class ValueFieldType(public val typeFqName: String) : FieldType
public class FrameFieldType(public val markerName: String, public val nullable: Boolean) : FieldType
public class GroupFieldType(public val markerName: String) : FieldType
public data class ValueFieldType(public val typeFqName: String) : FieldType
public data class FrameFieldType(
public val markerName: String,
public val nullable: Boolean,
public val renderAsList: Boolean
) : FieldType
public data class GroupFieldType(public val markerName: String, public val renderAsObject: Boolean) : FieldType
}

/**
Expand Down Expand Up @@ -36,8 +40,8 @@ private fun String.toNullable() = if (this.last() == '?' || this == "*") this el
public fun FieldType.toNullable(): FieldType =
if (isNotNullable()) {
when (this) {
is FieldType.FrameFieldType -> FieldType.FrameFieldType(markerName.toNullable(), nullable)
is FieldType.GroupFieldType -> FieldType.GroupFieldType(markerName.toNullable())
is FieldType.FrameFieldType -> FieldType.FrameFieldType(markerName.toNullable(), nullable, renderAsList)
is FieldType.GroupFieldType -> FieldType.GroupFieldType(markerName.toNullable(), renderAsObject)
is FieldType.ValueFieldType -> FieldType.ValueFieldType(typeFqName.toNullable())
}
} else this
Expand All @@ -55,13 +59,15 @@ public fun FieldType.toNotNullable(): FieldType =
else it.removeSuffix("?")
},
nullable = nullable,
renderAsList
)

is FieldType.GroupFieldType -> FieldType.GroupFieldType(
markerName = markerName.let {
if (it == "*") "Any"
else it.removeSuffix("?")
},
renderAsObject
)

is FieldType.ValueFieldType -> FieldType.ValueFieldType(
Expand All @@ -88,6 +94,10 @@ public class ValidFieldName private constructor(private val identifier: String,
return ValidFieldName(identifier = identifier + other.identifier, needsQuote = needsQuote || other.needsQuote)
}

override fun toString(): String {
return identifier
}

public companion object {
public fun of(name: String): ValidFieldName {
val needsQuote = name.needsQuoting()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,33 @@ import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

internal fun KType.shouldBeConvertedToFrameColumn(): Boolean = when (jvmErasure) {
DataFrame::class -> true
List::class -> arguments[0].type?.jvmErasure?.hasAnnotation<DataSchema>() == true
else -> false
internal fun KType.getFieldKind(): FieldKind = when {
jvmErasure == DataFrame::class -> Frame
jvmErasure == List::class && (arguments[0].type?.jvmErasure?.hasAnnotation<DataSchema>() == true) -> ListToFrame
jvmErasure == DataRow::class -> Group
jvmErasure.hasAnnotation<DataSchema>() -> ObjectToGroup
else -> Default
}

internal fun KType.shouldBeConvertedToColumnGroup(): Boolean = jvmErasure.let {
it == DataRow::class || it.hasAnnotation<DataSchema>()
internal sealed interface FieldKind {
val shouldBeConvertedToColumnGroup: Boolean get() = false
val shouldBeConvertedToFrameColumn: Boolean get() = false
}
internal data object Frame : FieldKind {
override val shouldBeConvertedToFrameColumn: Boolean = true
}
internal data object ListToFrame : FieldKind {
override val shouldBeConvertedToFrameColumn: Boolean = true
}

internal data object Default : FieldKind

internal data object Group : FieldKind {
override val shouldBeConvertedToColumnGroup: Boolean = true
}

internal data object ObjectToGroup : FieldKind {
override val shouldBeConvertedToColumnGroup: Boolean = true
}

private fun String.toNullable(): String = if (endsWith("?")) this else "$this?"
Expand Down Expand Up @@ -62,18 +81,26 @@ internal object MarkersExtractor {
val type = it.returnType
val fieldType: FieldType
val clazz = type.jvmErasure
val fieldKind = type.getFieldKind()
val columnSchema = when {
type.shouldBeConvertedToColumnGroup() -> {
fieldKind.shouldBeConvertedToColumnGroup -> {
val nestedType = if (clazz == DataRow::class) type.arguments[0].type ?: typeOf<Any?>() else type
val marker = get(nestedType.jvmErasure, nullableProperties || type.isMarkedNullable)
fieldType = FieldType.GroupFieldType(marker.name)
fieldType = FieldType.GroupFieldType(
marker.name,
renderAsObject = fieldKind is ObjectToGroup
)
ColumnSchema.Group(marker.schema, nestedType)
}

type.shouldBeConvertedToFrameColumn() -> {
fieldKind.shouldBeConvertedToFrameColumn -> {
val frameType = type.arguments[0].type ?: typeOf<Any?>()
val marker = get(frameType.jvmErasure, nullableProperties || type.isMarkedNullable)
fieldType = FieldType.FrameFieldType(marker.name, type.isMarkedNullable || nullableProperties)
fieldType = FieldType.FrameFieldType(
marker.name,
type.isMarkedNullable || nullableProperties,
renderAsList = fieldKind is ListToFrame
)
ColumnSchema.Frame(marker.schema, type.isMarkedNullable, frameType)
}

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ import org.jetbrains.kotlinx.dataframe.api.TraversePropertiesDsl
import org.jetbrains.kotlinx.dataframe.api.concat
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.toDataFrameFromPairs
import org.jetbrains.kotlinx.dataframe.codeGen.shouldBeConvertedToColumnGroup
import org.jetbrains.kotlinx.dataframe.codeGen.shouldBeConvertedToFrameColumn
import org.jetbrains.kotlinx.dataframe.codeGen.getFieldKind
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
Expand Down Expand Up @@ -248,11 +247,12 @@ internal fun convertToDataFrame(
}
}
val kClass = returnType.classifier as KClass<*>
val fieldKind = returnType.getFieldKind()

val shouldCreateValueCol = (
maxDepth <= 0 &&
!returnType.shouldBeConvertedToFrameColumn() &&
!returnType.shouldBeConvertedToColumnGroup()
!fieldKind.shouldBeConvertedToFrameColumn &&
!fieldKind.shouldBeConvertedToColumnGroup
) ||
kClass == Any::class ||
kClass in preserveClasses ||
Expand Down
Loading
Loading