Skip to content

Update diff function #339 #410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.indices
import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.DIFF_DEPRECATION_MESSAGE
import org.jetbrains.kotlinx.dataframe.util.DIFF_OR_NULL_IMPORT
import org.jetbrains.kotlinx.dataframe.util.DIFF_REPLACE_MESSAGE
import kotlin.experimental.ExperimentalTypeInference
import kotlin.reflect.KProperty
import kotlin.reflect.KType
Expand Down Expand Up @@ -74,17 +77,122 @@ public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey

// endregion

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return [firstRowValue] for the first row; difference between expression computed for current and previous row for the following rows
*/
internal interface DiffDocs

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return null for the first row; difference between expression computed for current and previous row for the following rows
*/
internal interface DiffOrNullDocs

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return [firstRowValue] for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diff(firstRowResult: Double, expression: RowExpression<T, Double>): Double =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

// required to resolve `diff(0) { intValue }`
@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return [firstRowValue] for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diff(firstRowResult: Int, expression: RowExpression<T, Int>): Int =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return [firstRowValue] for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diff(firstRowResult: Long, expression: RowExpression<T, Long>): Long =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return [firstRowValue] for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diff(firstRowResult: Float, expression: RowExpression<T, Float>): Float =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return null for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Double>): Double? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return null for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Int>): Int? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return null for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Long>): Long? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return null for the first row; difference between expression computed for current and previous row for the following rows
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Float>): Float? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Double>): Double? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Int>): Int? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Long>): Long? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Float>): Float? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public inline fun <reified T> Iterable<T>.toDataFrame(vararg props: KProperty<*>
properties(roots = props, maxDepth = maxDepth)
}

@Deprecated(DF_READ_DEPRECATION_MESSAGE, ReplaceWith(DF_READ_REPLACE_MESSAGE), DeprecationLevel.ERROR)
@Deprecated(DF_READ_DEPRECATION_MESSAGE, ReplaceWith("this.unfold(columns)"), DeprecationLevel.ERROR)
public fun <T> DataFrame<T>.read(columns: ColumnsSelector<T, *>): DataFrame<T> = unfold(columns)

@Deprecated(DF_READ_DEPRECATION_MESSAGE, ReplaceWith(DF_READ_REPLACE_MESSAGE), DeprecationLevel.ERROR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@ internal const val DF_READ_DEPRECATION_MESSAGE = "Replaced with `unfold` operati
internal const val DF_READ_REPLACE_MESSAGE = "this.unfold(*columns)"

internal const val ITERABLE_COLUMNS_DEPRECATION_MESSAGE = "Replaced with `toColumnSet()` operation."

internal const val DIFF_DEPRECATION_MESSAGE = "Replaced to explicitly indicate nullable return value; added a new non-null overload."

internal const val DIFF_REPLACE_MESSAGE = "this.diffOrNull(expression)"

internal const val DIFF_OR_NULL_IMPORT = "org.jetbrains.kotlinx.dataframe.api.diffOrNull"
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
package org.jetbrains.kotlinx.dataframe.puzzles

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.api.colsOf
import org.jetbrains.kotlinx.dataframe.api.column
import org.jetbrains.kotlinx.dataframe.api.columnOf
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.countDistinct
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.diffOrNull
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.groupBy
import org.jetbrains.kotlinx.dataframe.api.isNaN
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.mapToColumn
import org.jetbrains.kotlinx.dataframe.api.minBy
import org.jetbrains.kotlinx.dataframe.api.name
import org.jetbrains.kotlinx.dataframe.api.named
import org.jetbrains.kotlinx.dataframe.api.namedValuesOf
import org.jetbrains.kotlinx.dataframe.api.prev
import org.jetbrains.kotlinx.dataframe.api.rowMean
import org.jetbrains.kotlinx.dataframe.api.sortBy
import org.jetbrains.kotlinx.dataframe.api.sortDesc
import org.jetbrains.kotlinx.dataframe.api.sum
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.api.transpose
import org.jetbrains.kotlinx.dataframe.api.transposeTo
import org.jetbrains.kotlinx.dataframe.api.value
import org.jetbrains.kotlinx.dataframe.api.with
import org.junit.Test
import kotlin.math.roundToInt
import kotlin.random.Random
Expand All @@ -24,8 +46,8 @@ class MediumTests {
df.filter { prev()?.get(a) != a() } shouldBe expected
df.filter { prev()?.get("A") != "A"() } shouldBe expected

df.filter { diff { a() } != 0 } shouldBe expected
df.filter { diff { "A"<Int>() } != 0 } shouldBe expected
df.filter { diffOrNull { a() } != 0 } shouldBe expected
df.filter { diffOrNull { "A"<Int>() } != 0 } shouldBe expected
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package org.jetbrains.kotlinx.dataframe.samples.api

import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.at
import org.jetbrains.kotlinx.dataframe.api.diff
import org.jetbrains.kotlinx.dataframe.api.diffOrNull
import org.jetbrains.kotlinx.dataframe.api.drop
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.pivot
Expand Down Expand Up @@ -39,7 +39,7 @@ class DataRowApi : TestBase() {
df.filter { index() % 5 == 0 }

// Row condition is used to drop rows where `age` is the same as in previous row
df.drop { diff { age } == 0 }
df.drop { diffOrNull { age } == 0 }

// Row condition is used to filter rows for value update
df.update { weight }.where { index() > 4 && city != "Paris" }.withValue(50)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import org.jetbrains.kotlinx.dataframe.api.by
import org.jetbrains.kotlinx.dataframe.api.columnNames
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.diff
import org.jetbrains.kotlinx.dataframe.api.diffOrNull
import org.jetbrains.kotlinx.dataframe.api.drop
import org.jetbrains.kotlinx.dataframe.api.dropLast
import org.jetbrains.kotlinx.dataframe.api.first
Expand Down Expand Up @@ -40,7 +40,7 @@ class DataRowTests : BaseTest() {

@Test
fun diff() {
typed.update { age }.with { diff { age } }.age.drop(1).values() shouldBe typed.age.values()
typed.update { age }.with { diffOrNull { age } }.age.drop(1).values() shouldBe typed.age.values()
.zipWithNext { curr, next -> next - curr }
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class SeriesTests {
val withDiff = typed
.sortBy { city and day }
.groupBy { city }
.add("diff") { diff { temp } ?: 0 }
.add("diff") { diff(0) { temp } }
.concat()

val srcData = typed.rows().map { (it.city to it.day) to it.temp }.toMap()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.indices
import org.jetbrains.kotlinx.dataframe.ncol
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.DIFF_DEPRECATION_MESSAGE
import org.jetbrains.kotlinx.dataframe.util.DIFF_OR_NULL_IMPORT
import org.jetbrains.kotlinx.dataframe.util.DIFF_REPLACE_MESSAGE
import kotlin.experimental.ExperimentalTypeInference
import kotlin.reflect.KProperty
import kotlin.reflect.KType
Expand Down Expand Up @@ -74,17 +77,106 @@ public operator fun AnyRow.contains(column: KProperty<*>): Boolean = containsKey

// endregion

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return [firstRowValue] for the first row; difference between expression computed for current and previous row for the following rows
*/
internal interface DiffDocs

/**
* Calculates the difference between the results of a row expression computed on the current and previous DataRow.
*
* @return null for the first row; difference between expression computed for current and previous row for the following rows
*/
internal interface DiffOrNullDocs

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
/**
* @include [DiffDocs]
*/
public fun <T> DataRow<T>.diff(firstRowResult: Double, expression: RowExpression<T, Double>): Double =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

// required to resolve `diff(0) { intValue }`
@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
/**
* @include [DiffDocs]
*/
public fun <T> DataRow<T>.diff(firstRowResult: Int, expression: RowExpression<T, Int>): Int =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

/**
* @include [DiffDocs]
*/
public fun <T> DataRow<T>.diff(firstRowResult: Long, expression: RowExpression<T, Long>): Long =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

/**
* @include [DiffDocs]
*/
public fun <T> DataRow<T>.diff(firstRowResult: Float, expression: RowExpression<T, Float>): Float =
prev()?.let { p -> expression(this, this) - expression(p, p) } ?: firstRowResult

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
/**
* @include [DiffOrNullDocs]
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Double>): Double? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

/**
* @include [DiffOrNullDocs]
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Int>): Int? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

/**
* @include [DiffOrNullDocs]
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Long>): Long? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

/**
* @include [DiffOrNullDocs]
*/
public fun <T> DataRow<T>.diffOrNull(expression: RowExpression<T, Float>): Float? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@OptIn(ExperimentalTypeInference::class)
@OverloadResolutionByLambdaReturnType
@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Double>): Double? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Int>): Int? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Long>): Long? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

@Deprecated(
DIFF_DEPRECATION_MESSAGE,
ReplaceWith(DIFF_REPLACE_MESSAGE, DIFF_OR_NULL_IMPORT),
DeprecationLevel.WARNING
)
public fun <T> DataRow<T>.diff(expression: RowExpression<T, Float>): Float? =
prev()?.let { p -> expression(this, this) - expression(p, p) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public inline fun <reified T> Iterable<T>.toDataFrame(vararg props: KProperty<*>
properties(roots = props, maxDepth = maxDepth)
}

@Deprecated(DF_READ_DEPRECATION_MESSAGE, ReplaceWith(DF_READ_REPLACE_MESSAGE), DeprecationLevel.ERROR)
@Deprecated(DF_READ_DEPRECATION_MESSAGE, ReplaceWith("this.unfold(columns)"), DeprecationLevel.ERROR)
public fun <T> DataFrame<T>.read(columns: ColumnsSelector<T, *>): DataFrame<T> = unfold(columns)

@Deprecated(DF_READ_DEPRECATION_MESSAGE, ReplaceWith(DF_READ_REPLACE_MESSAGE), DeprecationLevel.ERROR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@ internal const val DF_READ_DEPRECATION_MESSAGE = "Replaced with `unfold` operati
internal const val DF_READ_REPLACE_MESSAGE = "this.unfold(*columns)"

internal const val ITERABLE_COLUMNS_DEPRECATION_MESSAGE = "Replaced with `toColumnSet()` operation."

internal const val DIFF_DEPRECATION_MESSAGE = "Replaced to explicitly indicate nullable return value; added a new non-null overload."

internal const val DIFF_REPLACE_MESSAGE = "this.diffOrNull(expression)"

internal const val DIFF_OR_NULL_IMPORT = "org.jetbrains.kotlinx.dataframe.api.diffOrNull"
Loading