Skip to content

Public empty(DataFrameSchema) API #452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import kotlin.reflect.KType

/**
Expand All @@ -32,7 +34,18 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)

/**
* Creates a DataFrame with empty columns (rows = 0).
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
*
*/
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()

/**
* Creates a DataFrame with empty columns (rows = 0).
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
*/
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame()
}

// region columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.values

/**
* @property df DataFrame containing [groups] column and key columns. Represents GroupBy.
*/
internal class GroupByImpl<T, G>(
val df: DataFrame<T>,
override val groups: FrameColumn<G>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2383,4 +2383,28 @@ class DataFrameTests : BaseTest() {
val newName by column<String>()
typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age")
}

@Test
fun `api for creating GroupBy with empty groups which can be aggregated using statistics`() {
val df1 = dataFrameOf("a", "b")(1, "c")
val df2 = DataFrame.empty()
val groupBy = dataFrameOf(columnOf("group1", "group2") named "group", columnOf(df1, df2)).asGroupBy()

val exception = shouldThrow<IllegalStateException> {
groupBy.aggregate {
sum("a")
}
}

exception.message shouldBe "Column [a] not found"

val groupBy1 = groupBy
.updateGroups { if (it.isEmpty()) DataFrame.empty(groupBy.groups.schema.value) else it }

val res = groupBy1.aggregate {
sum("a")
}

res["aggregated"].values() shouldBe listOf(1, 0)
}
}
13 changes: 13 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import kotlin.reflect.KType

/**
Expand All @@ -32,7 +34,18 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)

/**
* Creates a DataFrame with empty columns (rows = 0).
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
*
*/
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()

/**
* Creates a DataFrame with empty columns (rows = 0).
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
*/
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame()
}

// region columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.values

/**
* @property df DataFrame containing [groups] column and key columns. Represents GroupBy.
*/
internal class GroupByImpl<T, G>(
val df: DataFrame<T>,
override val groups: FrameColumn<G>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2383,4 +2383,28 @@ class DataFrameTests : BaseTest() {
val newName by column<String>()
typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age")
}

@Test
fun `api for creating GroupBy with empty groups which can be aggregated using statistics`() {
val df1 = dataFrameOf("a", "b")(1, "c")
val df2 = DataFrame.empty()
val groupBy = dataFrameOf(columnOf("group1", "group2") named "group", columnOf(df1, df2)).asGroupBy()

val exception = shouldThrow<IllegalStateException> {
groupBy.aggregate {
sum("a")
}
}

exception.message shouldBe "Column [a] not found"

val groupBy1 = groupBy
.updateGroups { if (it.isEmpty()) DataFrame.empty(groupBy.groups.schema.value) else it }

val res = groupBy1.aggregate {
sum("a")
}

res["aggregated"].values() shouldBe listOf(1, 0)
}
}