Skip to content

explode docs and tests #1291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 162 additions & 6 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KProperty
Expand All @@ -19,13 +22,82 @@ private val defaultExplodeColumns: ColumnsSelector<*, *> = {
}

// region explode DataFrame

/**
* Splits list-like values in the specified [\columns] and spreads them vertically —
* that is, it adds a separate row for each element (one value per row).
* Values in all other columns are duplicated to preserve row context.
*
* If no [\columns] are specified, all columns (at any depth) containing
* [List] or [DataFrame] values will be exploded.
*
* If [dropEmpty] is `true`, rows with empty lists or [DataFrame]s will be removed.
* If `false`, such rows will be exploded into `null` values.
*
* Returns a new [DataFrame] with exploded columns.
*
* Each exploded column will have a new type (`List<T>` -> `T`).
* When several columns are exploded in one operation, lists in different columns will be aligned.
*
* This operation is the reverse of [implode].
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* For more information, see: {@include [DocumentationUrls.Explode]}
*
* ### This `explode` overload
*/
@ExcludeFromSources
internal interface ExplodeDocs
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very clear :)


/**
* {@include [ExplodeDocs]}
* {@include [SelectingColumns.Dsl]}
*
* #### Examples
*
* ```kotlin
* // Explodes all `List` and `DataFrame` columns at any depth
* df.explode()
*
* // Explodes the "tags" column of type `List<String>`
* df.explode { tags }
*
* // Explodes all columns of type `List<Double>`
* df.explode { colsOf<List<Double>>() }
* ```
*
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
* If `false`, such rows will be exploded into `null` values.
* @param columns The [ColumnsSelector] used to select columns to explode.
* If not specified, all applicable columns will be exploded.
* @return A new [DataFrame] with exploded columns.
*/
@Refine
@Interpretable("Explode0")
public fun <T> DataFrame<T>.explode(
dropEmpty: Boolean = true,
selector: ColumnsSelector<T, *> = defaultExplodeColumns,
): DataFrame<T> = explodeImpl(dropEmpty, selector)

columns: ColumnsSelector<T, *> = defaultExplodeColumns,
): DataFrame<T> = explodeImpl(dropEmpty, columns)

/**
* {@include [ExplodeDocs]}
* {@include [SelectingColumns.ColumnNames]}
*
* #### Example
*
* ```kotlin
* // Explodes the "tags" and "scores" columns, where
* // "tags" is a `List<String>` and "scores" is a `List<Int>`
* val exploded = df.explode("tags", "scores")
* ```
*
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
* If `false`, such rows will be exploded into `null` values.
* @param columns The [column names][String] used to select columns to explode.
* If not specified, all applicable columns will be exploded.
* @return A new [DataFrame] with exploded columns.
*/
public fun <T> DataFrame<T>.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame<T> =
explode(dropEmpty) { columns.toColumnSet() }

Expand All @@ -43,11 +115,73 @@ public fun <T, C> DataFrame<T>.explode(vararg columns: KProperty<C>, dropEmpty:

// region explode DataRow

/**
* Splits list-like values in the specified [\columns] of this [DataRow] and spreads them vertically —
* that is, it adds a separate row for each element (one value per row)
* and combine them into new [DataFrame].
* Values in all other columns are duplicated to preserve row context.
*
* If no [\columns] are specified, all columns (at any depth) containing
* [List] or [DataFrame] values will be exploded.
*
* If [dropEmpty] is `true`, the result will exclude rows with empty lists or DataFrames.
* If `false`, such values will be exploded into `null`.
*
* Returns a new [DataFrame] expanded into multiple rows based on the exploded columns.
*
* Each exploded column will have a new type (`List<T>` → `T`).
* When several columns are exploded in one operation, lists in different columns will be aligned.
*
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
*
* For more information, see: {@include [DocumentationUrls.Explode]}
*
* ### This `explode` overload
*/
@ExcludeFromSources
internal interface ExplodeDataRowDocs

/**
* {@include [ExplodeDataRowDocs]}
* {@include [SelectingColumns.Dsl]}
*
* #### Example
*
* ```kotlin
* // Explodes the `hobbies` and `scores` values of the row,
* // of types `List<String>` and `List<Int>`, respectively
* row.explode { hobbies and scores }
* ```
*
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
* If `false`, such rows will be exploded into `null` values.
* @param columns The [ColumnsSelector] used to select columns to explode.
* If not specified, all applicable columns will be exploded.
* @return A new [DataFrame] with exploded columns from this [DataRow].
*/
public fun <T> DataRow<T>.explode(
dropEmpty: Boolean = true,
selector: ColumnsSelector<T, *> = defaultExplodeColumns,
): DataFrame<T> = toDataFrame().explode(dropEmpty, selector)

columns: ColumnsSelector<T, *> = defaultExplodeColumns,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While renaming an argument might not break binary compatibility, it will break source compatibility in some cases. However, as it's almost never named explicitly I think we can let it slide :) This Kotlinconf talk mentions something about compatibility levels if you're interested in them https://youtu.be/JKLqQiYh8GQ?si=gF5LkFq5vAPAxSIG&t=358

): DataFrame<T> = toDataFrame().explode(dropEmpty, columns)

/**
* {@include [ExplodeDataRowDocs]}
* {@include [SelectingColumns.ColumnNames]}
*
* #### Example
*
* ```kotlin
* // Explodes the `hobbies` and `scores` values of the row,
* // of types `List<String>` and `List<Int>`, respectively
* row.explode("hobbies", "scores")
* ```
*
* @param dropEmpty If `true`, removes rows with empty lists or DataFrames.
* If `false`, such rows will be exploded into `null` values.
* @param columns The [column names][String] used to select columns to explode.
* If not specified, all applicable columns will be exploded.
* @return A new [DataFrame] with exploded columns from this [DataRow].
*/
public fun <T> DataRow<T>.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame<T> =
explode(dropEmpty) { columns.toColumnSet() }

Expand All @@ -65,9 +199,31 @@ public fun <T, C> DataRow<T>.explode(vararg columns: KProperty<C>, dropEmpty: Bo

// region explode DataColumn

/**
* Splits list-like values in this [DataColumn] and spreads them vertically —
* that is, it adds a separate row for each element (one value per row).
*
* Returns a new [DataColumn] with the exploded values.
* The resulting column will have a new type (`List<T>` → `T`).
*
* For more information, see: {@include [DocumentationUrls.Explode]}
*
* @return A new [DataColumn] with exploded values.
*/
@JvmName("explodeList")
public fun <T> DataColumn<Collection<T>>.explode(): DataColumn<T> = explodeImpl() as DataColumn<T>

/**
* Explodes a [DataColumn] of [DataFrame] values into a single [ColumnGroup].
*
* Each nested [DataFrame] is unwrapped, and its columns are placed side by side
* within a column group named after the original column.
* The number of resulting rows equals the total number of rows across all nested DataFrames.
*
* For more information, see: {@include [DocumentationUrls.Explode]}
*
* @return A [ColumnGroup] containing the concatenated contents of all nested DataFrames.
*/
@JvmName("explodeFrames")
public fun <T> DataColumn<DataFrame<T>>.explode(): ColumnGroup<T> = concat().asColumnGroup(name())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ public fun <T, C, K, R> Gather<T, C?, K, R>.notNull(): Gather<T, C, K, R> = wher
* .mapValues { (it + 0.5).toFloat() }
* .into("series", "value")
* ```
*
* @see [explode]
* @return A new [Gather] instance with exploded list values.
*/
@Interpretable("GatherExplodeLists")
Expand Down Expand Up @@ -296,6 +298,8 @@ public fun <T, C, K, R> Gather<T, C, K, R>.explodeLists(): Gather<T, C, K, R> =
* .mapValues { (it + 0.5).toFloat() }
* .into("series", "value")
* ```
*
* @see [explode]
* @return A new [Gather] instance with exploded list values.
*/
@JvmName("explodeListsTyped")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,7 @@ internal interface DocumentationUrls {

/** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */
interface Filter

/** [See `explode` on the documentation website.]({@include [Url]}/explode.html) */
interface Explode
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,92 @@ class ExplodeTests {
val df = dataFrameOf("a", "b")(1, 2)
df.explode() shouldBe df
}

@Test
fun `explode multiple aligned columns`() {
val a by columnOf(listOf(1, 2), listOf(3, 4, 5))
val b by columnOf(listOf(1, 2, 3), listOf(4, 5))

val df = dataFrameOf(a, b)
val exploded = df.explode { a and b }

val expected = dataFrameOf("a", "b")(
1, 1,
2, 2,
null, 3,
3, 4,
4, 5,
5, null,
)

exploded shouldBe expected
}

@Test
fun `explode with empty list and dropEmpty true`() {
val df = dataFrameOf("a", "b")(
1, listOf(1, 2),
2, emptyList<Int>(),
3, listOf(3),
)

val exploded = df.explode(dropEmpty = true)

val expected = dataFrameOf("a", "b")(
1, 1,
1, 2,
3, 3,
)

exploded shouldBe expected
}

@Test
fun `explode with empty list and dropEmpty false`() {
val df = dataFrameOf("a", "b")(
1, listOf(1, 2),
2, emptyList<Int>(),
3, listOf(3),
)

val exploded = df.explode(dropEmpty = false)

val expected = dataFrameOf("a", "b")(
1, 1,
1, 2,
2, null,
3, 3,
)

exploded shouldBe expected
}

@Test
fun `explode DataColumn of lists`() {
val col by columnOf(listOf(1, 2), listOf(3, 4))

val exploded = col.explode()
val expected = columnOf(1, 2, 3, 4) named "col"

exploded shouldBe expected
}

@Test
fun `explode FrameColumn into ColumnGroup`() {
val col by columnOf(
dataFrameOf("x", "y")(1, 2, 3, 4),
dataFrameOf("x", "y")(5, 6, 7, 8),
)

val exploded = col.explode()

val expected = dataFrameOf("x", "y")(
1, 2,
3, 4,
5, 6,
7, 8,
).asColumnGroup("col")

exploded shouldBe expected
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
internal class Explode0 : AbstractInterpreter<PluginDataFrameSchema>() {
val Arguments.dropEmpty: Boolean by arg(defaultValue = Present(true))
val Arguments.receiver: PluginDataFrameSchema by dataFrame()
val Arguments.selector: ColumnsResolver? by arg(defaultValue = Present(null))
val Arguments.columns: ColumnsResolver? by arg(defaultValue = Present(null))
override val Arguments.startingSchema get() = receiver

override fun Arguments.interpret(): PluginDataFrameSchema {
val columns = selector ?: object : ColumnsResolver {
val columns = columns ?: object : ColumnsResolver {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AndreiKingsley @koperagen make sure to sync this with the actual compiler plugin!

override fun resolve(df: PluginDataFrameSchema): List<ColumnWithPathApproximation> {
return df.flatten(includeFrames = false).filter {
val column = it.column
Expand Down