Skip to content

Commit a4490b2

Browse files
committed
add public API to create dataframes with empty columns by a schema
It's needed in Kandy to create GroupBy with empty groups that can still be aggregated, as per added test
1 parent 1461d27 commit a4490b2

File tree

4 files changed

+74
-0
lines changed
  • core

4 files changed

+74
-0
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
1616
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
1717
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
1818
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
19+
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
1920
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
21+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
2022
import kotlin.reflect.KType
2123

2224
/**
@@ -32,7 +34,18 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
3234
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
3335
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)
3436

37+
/**
38+
* Creates a DataFrame with empty columns (rows = 0).
39+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
40+
*
41+
*/
3542
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()
43+
44+
/**
45+
* Creates a DataFrame with empty columns (rows = 0).
46+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
47+
*/
48+
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame()
3649
}
3750

3851
// region columns

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,4 +2383,28 @@ class DataFrameTests : BaseTest() {
23832383
val newName by column<String>()
23842384
typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age")
23852385
}
2386+
2387+
@Test
2388+
fun `api for creating GroupBy with empty groups which can be aggregated using statistics`() {
2389+
val df1 = dataFrameOf("a", "b")(1, "c")
2390+
val df2 = DataFrame.empty()
2391+
val groupBy = dataFrameOf(columnOf("group1", "group2") named "group", columnOf(df1, df2)).asGroupBy()
2392+
2393+
val exception = shouldThrow<IllegalStateException> {
2394+
groupBy.aggregate {
2395+
sum("a")
2396+
}
2397+
}
2398+
2399+
exception.message shouldBe "Column [a] not found"
2400+
2401+
val groupBy1 = groupBy
2402+
.updateGroups { if (it.isEmpty()) DataFrame.empty(groupBy.groups.schema.value) else it }
2403+
2404+
val res = groupBy1.aggregate {
2405+
sum("a")
2406+
}
2407+
2408+
res["aggregated"].values() shouldBe listOf(1, 0)
2409+
}
23862410
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
1616
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
1717
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
1818
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
19+
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
1920
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
21+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
2022
import kotlin.reflect.KType
2123

2224
/**
@@ -32,7 +34,18 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
3234
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
3335
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)
3436

37+
/**
38+
* Creates a DataFrame with empty columns (rows = 0).
39+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
40+
*
41+
*/
3542
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()
43+
44+
/**
45+
* Creates a DataFrame with empty columns (rows = 0).
46+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
47+
*/
48+
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame()
3649
}
3750

3851
// region columns

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,4 +2383,28 @@ class DataFrameTests : BaseTest() {
23832383
val newName by column<String>()
23842384
typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age")
23852385
}
2386+
2387+
@Test
2388+
fun `api for creating GroupBy with empty groups which can be aggregated using statistics`() {
2389+
val df1 = dataFrameOf("a", "b")(1, "c")
2390+
val df2 = DataFrame.empty()
2391+
val groupBy = dataFrameOf(columnOf("group1", "group2") named "group", columnOf(df1, df2)).asGroupBy()
2392+
2393+
val exception = shouldThrow<IllegalStateException> {
2394+
groupBy.aggregate {
2395+
sum("a")
2396+
}
2397+
}
2398+
2399+
exception.message shouldBe "Column [a] not found"
2400+
2401+
val groupBy1 = groupBy
2402+
.updateGroups { if (it.isEmpty()) DataFrame.empty(groupBy.groups.schema.value) else it }
2403+
2404+
val res = groupBy1.aggregate {
2405+
sum("a")
2406+
}
2407+
2408+
res["aggregated"].values() shouldBe listOf(1, 0)
2409+
}
23862410
}

0 commit comments

Comments
 (0)