Skip to content

Overload toDataFrame for basic types to avoid surprising results #314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl
Expand Down Expand Up @@ -187,6 +188,83 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {

// endregion

// region toDataFrame overloads for built-in types

/*
Without overloads Iterable<String>.toDataFrame produces unexpected result


```
val string = listOf("aaa", "aa", null)
string.toDataFrame()
```
=>
length
0 3
1 2
2 null
*/

@JvmName("toDataFrameByte")
public inline fun <reified B : Byte?> Iterable<B>.toDataFrame(): DataFrame<ValueProperty<B>> = toDataFrame {
ValueProperty<B>::value from { it }
}.cast()

@JvmName("toDataFrameShort")
public inline fun <reified S : Short?> Iterable<S>.toDataFrame(): DataFrame<ValueProperty<S>> = toDataFrame {
ValueProperty<S>::value from { it }
}.cast()

@JvmName("toDataFrameInt")
public inline fun <reified I : Int?> Iterable<I>.toDataFrame(): DataFrame<ValueProperty<I>> = toDataFrame {
ValueProperty<I>::value from { it }
}.cast()

@JvmName("toDataFrameLong")
public inline fun <reified L : Long?> Iterable<L>.toDataFrame(): DataFrame<ValueProperty<L>> = toDataFrame {
ValueProperty<L>::value from { it }
}.cast()

@JvmName("toDataFrameString")
public inline fun <reified S : String?> Iterable<S>.toDataFrame(): DataFrame<ValueProperty<S>> = toDataFrame {
ValueProperty<S>::value from { it }
}.cast()

@JvmName("toDataFrameChar")
public inline fun <reified C : Char?> Iterable<C>.toDataFrame(): DataFrame<ValueProperty<C>> = toDataFrame {
ValueProperty<C>::value from { it }
}.cast()

@JvmName("toDataFrameBoolean")
public inline fun <reified B : Boolean?> Iterable<B>.toDataFrame(): DataFrame<ValueProperty<B>> = toDataFrame {
ValueProperty<B>::value from { it }
}.cast()

@JvmName("toDataFrameUByte")
public inline fun <reified U : UByte?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> = toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameUShort")
public inline fun <reified U : UShort?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> = toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameUInt")
public inline fun <reified U : UInt?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> = toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@JvmName("toDataFrameULong")
public inline fun <reified U : ULong?> Iterable<U>.toDataFrame(): DataFrame<ValueProperty<U>> = toDataFrame {
ValueProperty<U>::value from { it }
}.cast()

@DataSchema
public interface ValueProperty<T> {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could maybe be moved to a separate file, similar to KeyValueProperty :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer it here, doesn't have much value on its own i think

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought the same about KeyValueProperty, but who knows, we cannot know everything a user might want :) And a value-only DataSchema seems to me exactly the sort of thing people might want to use (or create themselves)

public val value: T
}

// region Create DataFrame from Map

public fun Map<String, Iterable<Any?>>.toDataFrame(): AnyFrame {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ package org.jetbrains.kotlinx.dataframe.api
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.alsoDebug
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.kind
import org.jetbrains.kotlinx.dataframe.type
import org.junit.Ignore
import org.junit.Test
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -197,4 +199,29 @@ class CreateDataFrameTests {

df["name"][0] shouldBe name
}

@Test
fun builtInTypes() {
val string = listOf("aaa", "aa", null)
string.toDataFrame().also { it.print() } shouldBe dataFrameOf("value")(*string.toTypedArray())

val int = listOf(1, 2, 3)
int.toDataFrame().alsoDebug() shouldBe dataFrameOf("value")(*int.toTypedArray())
}

@Ignore
@Test
fun generateBuiltInsOverrides() {
listOf("Byte", "Short", "Int", "Long", "String", "Char", "Boolean", "UByte", "UShort", "UInt", "ULong").forEach { type ->
val typeParameter = type.first()
val func = """
@JvmName("toDataFrame$type")
public inline fun <reified $typeParameter : $type?> Iterable<$typeParameter>.toDataFrame(): DataFrame<ValueProperty<$typeParameter>> = toDataFrame {
ValueProperty<$typeParameter>::value from { it }
}.cast()
""".trimIndent()
println(func)
println()
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.jetbrains.kotlinx.dataframe.samples.api

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.ValueProperty
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.column
import org.jetbrains.kotlinx.dataframe.api.columnGroup
Expand All @@ -18,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.api.sortBy
import org.jetbrains.kotlinx.dataframe.api.toColumn
import org.jetbrains.kotlinx.dataframe.api.toColumnOf
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.api.value
import org.jetbrains.kotlinx.dataframe.api.withValues
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
import org.jetbrains.kotlinx.dataframe.kind
Expand Down Expand Up @@ -314,6 +317,16 @@ class Create : TestBase() {
df["age"].type() shouldBe typeOf<Int>()
}

@Test
fun readDataFrameFromValues() {
// SampleStart
val names = listOf("Alice", "Bob", "Charlie")
val df: DataFrame<ValueProperty<String>> = names.toDataFrame()
df.add("length") { value.length }
// SampleEnd
df.value.toList() shouldBe names
}

@Test
fun readDataFrameFromObject() {
// SampleStart
Expand Down
14 changes: 14 additions & 0 deletions docs/StardustDocs/topics/createDataFrame.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,20 @@ map.toDataFrame()

<!---END-->

[`DataFrame`](DataFrame.md) from [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of [basic types](https://kotlinlang.org/docs/basic-types.html) (except arrays):

Return type of these overloads is a typed DataFrame. Its data schema defines column that can be used right after conversion for additional computations

<!---FUN readDataFrameFromValues-->

```kotlin
val names = listOf("Alice", "Bob", "Charlie")
val df: DataFrame<ValueProperty<String>> = names.toDataFrame()
df.add("length") { value.length }
```

<!---END-->

[`DataFrame`](DataFrame.md) from [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of objects:

<!---FUN readDataFrameFromObject-->
Expand Down