Skip to content

update groupBy documentation #465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ import kotlin.reflect.KProperty

// region DataFrame

/**
*
* @param cols key columns; Column for grouping can be created inplace
*
* `df.groupBy { expr("columnName") { "someColumn"<Int>() + 15 } }`
*
* is equivalent to
*
* `df.add("columnName") { "someColumn"<Int>() + 15 }.groupBy("columnName")`
*/
public fun <T> DataFrame<T>.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector<T, *>): GroupBy<T, T> =
groupByImpl(moveToTop, cols)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,17 +254,15 @@ public fun <T> DataFrame<T>.html(): String = toStandaloneHTML().toString()
public fun <T> DataFrame<T>.toStandaloneHTML(
configuration: DisplayConfiguration = DisplayConfiguration.DEFAULT,
cellRenderer: CellRenderer = DefaultCellRenderer,
includeStatic: Boolean = true,
getFooter: (DataFrame<T>) -> String? = { "DataFrame [${it.size}]" },
): DataFrameHtmlData = toHTML(configuration, cellRenderer, includeStatic, getFooter).withTableDefinitions()
): DataFrameHtmlData = toHTML(configuration, cellRenderer, getFooter).withTableDefinitions()

/**
* @return DataFrameHtmlData without additional definitions. Can be rendered in Jupyter kernel environments
*/
public fun <T> DataFrame<T>.toHTML(
configuration: DisplayConfiguration = DisplayConfiguration.DEFAULT,
cellRenderer: CellRenderer = DefaultCellRenderer,
includeStatic: Boolean = true,
getFooter: (DataFrame<T>) -> String? = { "DataFrame [${it.size}]" },
): DataFrameHtmlData {
val limit = configuration.rowsLimit ?: Int.MAX_VALUE
Expand All @@ -285,7 +283,7 @@ public fun <T> DataFrame<T>.toHTML(

var tableHtml = toHtmlData(configuration, cellRenderer)

if (includeStatic) {
if (configuration.enableFallbackStaticTables) {
tableHtml += toStaticHtml(configuration, DefaultCellRenderer)
}

Expand Down Expand Up @@ -368,6 +366,8 @@ public data class DataFrameHtmlData(
/**
* @param rowsLimit null to disable rows limit
* @param cellContentLimit -1 to disable content trimming
* @param enableFallbackStaticTables true to add additional pure HTML table that will be visible only if JS is disabled;
* For example hosting *.ipynb files with outputs on GitHub
*/
public data class DisplayConfiguration(
var rowsLimit: Int? = 20,
Expand All @@ -378,6 +378,7 @@ public data class DisplayConfiguration(
var isolatedOutputs: Boolean = flagFromEnv("LETS_PLOT_HTML_ISOLATED_FRAME"),
internal val localTesting: Boolean = flagFromEnv("KOTLIN_DATAFRAME_LOCAL_TESTING"),
var useDarkColorScheme: Boolean = false,
var enableFallbackStaticTables: Boolean = true,
) {
public companion object {
public val DEFAULT: DisplayConfiguration = DisplayConfiguration()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import org.jetbrains.kotlinx.dataframe.api.where
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
import org.jetbrains.kotlinx.dataframe.io.sessionId
import org.jetbrains.kotlinx.dataframe.io.tableInSessionId
import org.jetbrains.kotlinx.dataframe.io.toHTML
Expand Down Expand Up @@ -244,8 +243,8 @@ object PluginCallbackProxy : PluginCallback {
}

private fun convertToHTML(dataframeLike: Any): DataFrameHtmlData {
fun DataFrame<*>.toHTML() = toHTML(DisplayConfiguration(), getFooter = { "" })
fun FormattedFrame<*>.toHTML1() = toHTML(DisplayConfiguration())
fun DataFrame<*>.toHTML() = toHTML(SamplesDisplayConfiguration, getFooter = { "" })
fun FormattedFrame<*>.toHTML1() = toHTML(SamplesDisplayConfiguration)

return when (dataframeLike) {
is Pivot<*> -> dataframeLike.frames().toDataFrame().toHTML()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.jetbrains.kotlinx.dataframe.explainer

import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration

val SamplesDisplayConfiguration = DisplayConfiguration(enableFallbackStaticTables = false)
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,6 @@ class Analyze : TestBase() {
df.groupBy { name }
df.groupBy { city and name.lastName }
df.groupBy { age / 10 named "ageDecade" }
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
// SampleEnd
}

Expand All @@ -601,7 +600,6 @@ class Analyze : TestBase() {

df.groupBy { age / 10 named "ageDecade" }

df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
// SampleEnd
}

Expand All @@ -612,10 +610,53 @@ class Analyze : TestBase() {
df.groupBy("name")
df.groupBy { "city" and "name"["lastName"] }
df.groupBy { "age"<Int>() / 10 named "ageDecade" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByExpr_properties() {
// SampleStart
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByExpr_accessors() {
// SampleStart
val name by columnGroup()
val lastName by name.column<String>()
val firstName by name.column<String>()

df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByExpr_strings() {
// SampleStart
df.groupBy { expr { "name"["firstName"]<String>().length + "name"["lastName"]<String>().length } named "nameLength" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByMoveToTop() {
// SampleStart
df.groupBy(moveToTop = true) { name.lastName }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByMoveToTopFalse() {
// SampleStart
df.groupBy(moveToTop = false) { name.lastName }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun dataFrameToGroupBy() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.jetbrains.kotlinx.dataframe.api.rightJoin
import org.jetbrains.kotlinx.dataframe.api.rightJoinWith
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy
import org.jetbrains.kotlinx.dataframe.explainer.SamplesDisplayConfiguration
import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
Expand Down Expand Up @@ -131,7 +132,7 @@ class JoinWith : TestBase() {
private fun AnyFrame.toColoredHTML() = toHTML(
getFooter = { null },
cellRenderer = renderer,
configuration = DisplayConfiguration.DEFAULT.copy(
configuration = SamplesDisplayConfiguration.copy(
cellFormatter = { row, col ->
val value = row[col]
if (value is ColoredValue<*>) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ import kotlin.reflect.KProperty

// region DataFrame

/**
*
* @param cols key columns; Column for grouping can be created inplace
*
* `df.groupBy { expr("columnName") { "someColumn"<Int>() + 15 } }`
*
* is equivalent to
*
* `df.add("columnName") { "someColumn"<Int>() + 15 }.groupBy("columnName")`
*/
public fun <T> DataFrame<T>.groupBy(moveToTop: Boolean = true, cols: ColumnsSelector<T, *>): GroupBy<T, T> =
groupByImpl(moveToTop, cols)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,17 +254,15 @@ public fun <T> DataFrame<T>.html(): String = toStandaloneHTML().toString()
public fun <T> DataFrame<T>.toStandaloneHTML(
configuration: DisplayConfiguration = DisplayConfiguration.DEFAULT,
cellRenderer: CellRenderer = DefaultCellRenderer,
includeStatic: Boolean = true,
getFooter: (DataFrame<T>) -> String? = { "DataFrame [${it.size}]" },
): DataFrameHtmlData = toHTML(configuration, cellRenderer, includeStatic, getFooter).withTableDefinitions()
): DataFrameHtmlData = toHTML(configuration, cellRenderer, getFooter).withTableDefinitions()

/**
* @return DataFrameHtmlData without additional definitions. Can be rendered in Jupyter kernel environments
*/
public fun <T> DataFrame<T>.toHTML(
configuration: DisplayConfiguration = DisplayConfiguration.DEFAULT,
cellRenderer: CellRenderer = DefaultCellRenderer,
includeStatic: Boolean = true,
getFooter: (DataFrame<T>) -> String? = { "DataFrame [${it.size}]" },
): DataFrameHtmlData {
val limit = configuration.rowsLimit ?: Int.MAX_VALUE
Expand All @@ -285,7 +283,7 @@ public fun <T> DataFrame<T>.toHTML(

var tableHtml = toHtmlData(configuration, cellRenderer)

if (includeStatic) {
if (configuration.enableFallbackStaticTables) {
tableHtml += toStaticHtml(configuration, DefaultCellRenderer)
}

Expand Down Expand Up @@ -368,6 +366,8 @@ public data class DataFrameHtmlData(
/**
* @param rowsLimit null to disable rows limit
* @param cellContentLimit -1 to disable content trimming
* @param enableFallbackStaticTables true to add additional pure HTML table that will be visible only if JS is disabled;
* For example hosting *.ipynb files with outputs on GitHub
*/
public data class DisplayConfiguration(
var rowsLimit: Int? = 20,
Expand All @@ -378,6 +378,7 @@ public data class DisplayConfiguration(
var isolatedOutputs: Boolean = flagFromEnv("LETS_PLOT_HTML_ISOLATED_FRAME"),
internal val localTesting: Boolean = flagFromEnv("KOTLIN_DATAFRAME_LOCAL_TESTING"),
var useDarkColorScheme: Boolean = false,
var enableFallbackStaticTables: Boolean = true,
) {
public companion object {
public val DEFAULT: DisplayConfiguration = DisplayConfiguration()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
includeCss = true,
).plus(
df.toHTML(
configuration = reifiedDisplayConfiguration,
// is added later to make sure it's put outside of potential iFrames
configuration = reifiedDisplayConfiguration.copy(enableFallbackStaticTables = false),
cellRenderer = contextRenderer,
includeStatic = false, // is added later to make sure it's put outside of potential iFrames
) { footer }
).toJupyterHtmlData()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import org.jetbrains.kotlinx.dataframe.api.where
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
import org.jetbrains.kotlinx.dataframe.io.sessionId
import org.jetbrains.kotlinx.dataframe.io.tableInSessionId
import org.jetbrains.kotlinx.dataframe.io.toHTML
Expand Down Expand Up @@ -244,8 +243,8 @@ object PluginCallbackProxy : PluginCallback {
}

private fun convertToHTML(dataframeLike: Any): DataFrameHtmlData {
fun DataFrame<*>.toHTML() = toHTML(DisplayConfiguration(), getFooter = { "" })
fun FormattedFrame<*>.toHTML1() = toHTML(DisplayConfiguration())
fun DataFrame<*>.toHTML() = toHTML(SamplesDisplayConfiguration, getFooter = { "" })
fun FormattedFrame<*>.toHTML1() = toHTML(SamplesDisplayConfiguration)

return when (dataframeLike) {
is Pivot<*> -> dataframeLike.frames().toDataFrame().toHTML()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.jetbrains.kotlinx.dataframe.explainer

import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration

val SamplesDisplayConfiguration = DisplayConfiguration(enableFallbackStaticTables = false)
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,6 @@ class Analyze : TestBase() {
df.groupBy { name }
df.groupBy { city and name.lastName }
df.groupBy { age / 10 named "ageDecade" }
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
// SampleEnd
}

Expand All @@ -601,7 +600,6 @@ class Analyze : TestBase() {

df.groupBy { age / 10 named "ageDecade" }

df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
// SampleEnd
}

Expand All @@ -612,10 +610,53 @@ class Analyze : TestBase() {
df.groupBy("name")
df.groupBy { "city" and "name"["lastName"] }
df.groupBy { "age"<Int>() / 10 named "ageDecade" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByExpr_properties() {
// SampleStart
df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByExpr_accessors() {
// SampleStart
val name by columnGroup()
val lastName by name.column<String>()
val firstName by name.column<String>()

df.groupBy { expr { firstName().length + lastName().length } named "nameLength" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByExpr_strings() {
// SampleStart
df.groupBy { expr { "name"["firstName"]<String>().length + "name"["lastName"]<String>().length } named "nameLength" }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByMoveToTop() {
// SampleStart
df.groupBy(moveToTop = true) { name.lastName }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun groupByMoveToTopFalse() {
// SampleStart
df.groupBy(moveToTop = false) { name.lastName }
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun dataFrameToGroupBy() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.jetbrains.kotlinx.dataframe.api.rightJoin
import org.jetbrains.kotlinx.dataframe.api.rightJoinWith
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy
import org.jetbrains.kotlinx.dataframe.explainer.SamplesDisplayConfiguration
import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
Expand Down Expand Up @@ -131,7 +132,7 @@ class JoinWith : TestBase() {
private fun AnyFrame.toColoredHTML() = toHTML(
getFooter = { null },
cellRenderer = renderer,
configuration = DisplayConfiguration.DEFAULT.copy(
configuration = SamplesDisplayConfiguration.copy(
cellFormatter = { row, col ->
val value = row[col]
if (value is ColoredValue<*>) {
Expand Down
Loading