Skip to content

To dataframe improvements #1081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -5396,6 +5396,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun getHasProperties (Lkotlin/reflect/KClass;)Z
public static final fun isValueType (Lkotlin/reflect/KClass;)Z
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.api.hasProperties
import org.jetbrains.kotlinx.dataframe.impl.api.isValueType
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
Expand All @@ -26,7 +28,15 @@ import kotlin.reflect.KProperty
@Interpretable("toDataFrameDefault")
public inline fun <reified T> Iterable<T>.toDataFrame(): DataFrame<T> =
toDataFrame {
properties()
// check if type is value: primitives, primitive arrays, datetime types etc.,
// or has no properties
if (T::class.isValueType || !T::class.hasProperties) {
// create a single `value` column
ValueProperty<T>::value from { it }
} else {
// otherwise creates columns based on properties
properties()
}
}

@Refine
Expand Down Expand Up @@ -217,99 +227,6 @@ public inline fun <reified T> Iterable<T>.toDataFrame(columnName: String): DataF

// region toDataFrame overloads for built-in types

/*
Without overloads Iterable<String>.toDataFrame produces unexpected result


```
val string = listOf("aaa", "aa", null)
string.toDataFrame()
```
=>
length
0 3
1 2
2 null
*/

@JvmName("toDataFrameByte")
public inline fun <reified B : Byte?> Iterable<B>.toDataFrame(): DataFrame<ValueProperty<B>> =
toDataFrame {
ValueProperty<B>::value from { it }
}.cast()

@JvmName("toDataFrameShort")
public inline fun <reified S : Short?> Iterable<S>.toDataFrame(): DataFrame<ValueProperty<S>> =
toDataFrame {
ValueProperty<S>::value from { it }
}.cast()

@JvmName("toDataFrameInt")
public inline fun <reified I : Int?> Iterable<I>.toDataFrame(): DataFrame<ValueProperty<I>> =
toDataFrame {
ValueProperty<I>::value from { it }
}.cast()

@JvmName("toDataFrameLong")
public inline fun <reified L : Long?> Iterable<L>.toDataFrame(): DataFrame<ValueProperty<L>> =
toDataFrame {
ValueProperty<L>::value from { it }
}.cast()

@JvmName("toDataFrameString")
public inline fun <reified S : String?> Iterable<S>.toDataFrame(): DataFrame<ValueProperty<S>> =
toDataFrame {
ValueProperty<S>::value from { it }
}.cast()

@JvmName("toDataFrameChar")
public inline fun <reified C : Char?> Iterable<C>.toDataFrame(): DataFrame<ValueProperty<C>> =
toDataFrame {
ValueProperty<C>::value from { it }
}.cast()

@JvmName("toDataFrameBoolean")
public inline fun <reified B : Boolean?> Iterable<B>.toDataFrame(): DataFrame<ValueProperty<B>> =
toDataFrame {
ValueProperty<B>::value from { it }
}.cast()

@JvmName("toDataFrameFloat")
public inline fun <reified F : Float?> Iterable<F>.toDataFrame(): DataFrame<ValueProperty<F>> =
toDataFrame {
ValueProperty<F>::value from { it }
}.cast()

@JvmName("toDataFrameDouble")
public inline fun <reified D : Double?> Iterable<D>.toDataFrame(): DataFrame<ValueProperty<D>> =
toDataFrame {
ValueProperty<D>::value from { it }
}.cast()

@JvmName("toDataFrameUByte")
public inline fun <reified U : UByte?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameUShort")
public inline fun <reified U : UShort?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameUInt")
public inline fun <reified U : UInt?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameULong")
public inline fun <reified U : ULong?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> =
toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@DataSchema
public interface ValueProperty<T> {
public val value: T
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import org.jetbrains.kotlinx.dataframe.impl.projectUpTo
import org.jetbrains.kotlinx.dataframe.impl.schema.sortWithConstructor
import java.lang.reflect.InvocationTargetException
import java.lang.reflect.Method
import java.time.temporal.Temporal
import java.time.temporal.TemporalAccessor
import java.time.temporal.TemporalAmount
import kotlin.reflect.KCallable
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
Expand All @@ -37,23 +38,51 @@ import kotlin.reflect.jvm.isAccessible
import kotlin.reflect.jvm.javaField
import kotlin.reflect.typeOf

// non-standard value types (not supertypes, but exact types)
private val valueTypes = setOf(
Any::class,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uhm, Any doesn't cause issues with other classes? I guess this doesn't mean "a subtype of", rather the exact type? Maybe mention this in kdoc for future us

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

User can provide Iterable<Any?>.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And yeah, these are exact types.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so now if a user has something like listOf(cat, dog, house, table).toDataFrame() they will get a value column, right?

Unit::class,
Char::class,
UByte::class,
UShort::class,
UInt::class,
ULong::class,
String::class,
Boolean::class,
kotlin.time.Duration::class,
kotlinx.datetime.LocalDate::class,
kotlinx.datetime.LocalDateTime::class,
kotlinx.datetime.Instant::class,
kotlinx.datetime.TimeZone::class,
kotlinx.datetime.DateTimePeriod::class,
kotlinx.datetime.DateTimeUnit::class,
)

/**
* Checks if `KClass` is a value type (number, datetime, string, etc.)
* Should be aligned with `ConeKotlinType.isValueType()` in
* plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/toDataFrame.kt
*/
@PublishedApi
internal val KClass<*>.isValueType: Boolean
get() =
this in valueTypes ||
this.isSubclassOf(Number::class) ||
this.isSubclassOf(Enum::class) ||
this.isSubclassOf(Temporal::class) ||
// all java datetime types
this.isSubclassOf(TemporalAccessor::class) ||
this.isSubclassOf(TemporalAmount::class) ||
this.isArray

/**
* Checks if `KClass` has public properties / getter functions (for pojo-like classes).
*/
@PublishedApi
internal val KClass<*>.hasProperties: Boolean
get() = this.memberProperties.any { it.visibility == KVisibility.PUBLIC } ||
// check pojo-like classes
this.memberFunctions.any { it.visibility == KVisibility.PUBLIC && it.isGetterLike() }

internal class CreateDataFrameDslImpl<T>(
override val source: Iterable<T>,
private val clazz: KClass<*>,
Expand Down
Loading