Skip to content

CumSum #1152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 26, 2025
Merged

CumSum #1152

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -1740,6 +1740,18 @@ public final class org/jetbrains/kotlinx/dataframe/api/CumSumKt {
public static synthetic fun cumSum$default (Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;
public static synthetic fun cumSum$default (Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;
public static synthetic fun cumSum$default (Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/GroupBy;
public static final fun cumSumByte (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun cumSumByte$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun cumSumDouble (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun cumSumDouble$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun cumSumFloat (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun cumSumFloat$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun cumSumNullableByte (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun cumSumNullableByte$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun cumSumNullableShort (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun cumSumNullableShort$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun cumSumShort (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun cumSumShort$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/api/DataColumnArithmeticsKt {
Expand Down Expand Up @@ -1943,6 +1955,7 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/DataSchemaEn

public final class org/jetbrains/kotlinx/dataframe/api/DefaultsKt {
public static final fun getDdofDefault ()I
public static final fun getDefaultCumSumSkipNA ()Z
public static final fun getSkipNaNDefault ()Z
}

Expand Down Expand Up @@ -6541,6 +6554,10 @@ public final class org/jetbrains/kotlinx/dataframe/keywords/SoftKeywords$Compani
public final fun getVALUES ()Ljava/util/List;
}

public final class org/jetbrains/kotlinx/dataframe/math/CumsumKt {
public static final fun getCumSumTypeConversion ()Lkotlin/jvm/functions/Function2;
}

public final class org/jetbrains/kotlinx/dataframe/math/MedianKt {
public static final fun medianOrNull (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;Z)Ljava/lang/Object;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ internal val skipNaNDefault: Boolean = false
*/
@PublishedApi
internal val ddofDefault: Int = 1

/**
* whether to skip nulls and NaNs in the cumSum operation.
*/
@PublishedApi
internal val defaultCumSumSkipNA: Boolean = true
147 changes: 72 additions & 75 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.AnyColumnReference
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.math.cumSum
import org.jetbrains.kotlinx.dataframe.math.defaultCumSumSkipNA
import org.jetbrains.kotlinx.dataframe.typeClass
import java.math.BigDecimal
import java.math.BigInteger
import org.jetbrains.kotlinx.dataframe.math.cumSumImpl
import kotlin.reflect.KProperty
import kotlin.reflect.typeOf

// region DataColumn

Expand All @@ -28,13 +21,14 @@ import kotlin.reflect.typeOf
* from the first cell to the last cell.
*
* __NOTE:__ If the column contains nullable values and [skipNA\] is set to `true`,
* null values are skipped when computing the cumulative sum.
* Otherwise, any null value encountered will propagate null values in the output from that point onward.
* null and NaN values are skipped when computing the cumulative sum.
* When false, all values after the first NA will be NaN (for Double and Float columns)
* or null (for integer columns).
*
* {@get [CumSumDocs.CUMSUM_PARAM] @param [columns\]
* The names of the columns to apply cumSum operation.}
*
* @param [skipNA\] Whether to skip null values (default: `true`).
* @param [skipNA\] Whether to skip null and NaN values (default: `true`).
*
* @return A new {@get [CumSumDocs.DATA_TYPE]} of the same type with the cumulative sums.
*
Expand All @@ -54,62 +48,62 @@ private interface CumSumDocs {
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
public fun <T : Number?> DataColumn<T>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<T> =
when (type()) {
typeOf<Double>() -> cast<Double>().cumSum(skipNA).cast()

typeOf<Double?>() -> cast<Double?>().cumSum(skipNA).cast()

typeOf<Float>() -> cast<Float>().cumSum(skipNA).cast()

typeOf<Float?>() -> cast<Float?>().cumSum(skipNA).cast()

typeOf<Int>() -> cast<Int>().cumSum().cast()

// TODO cumSum for Byte returns Int but is converted back to T: Byte, Issue #558
typeOf<Byte>() -> cast<Byte>().cumSum().map { it.toByte() }.cast()

// TODO cumSum for Short returns Int but is converted back to T: Short, Issue #558
typeOf<Short>() -> cast<Short>().cumSum().map { it.toShort() }.cast()

typeOf<Int?>() -> cast<Int?>().cumSum(skipNA).cast()

// TODO cumSum for Byte? returns Int? but is converted back to T: Byte?, Issue #558
typeOf<Byte?>() -> cast<Byte?>().cumSum(skipNA).map { it?.toByte() }.cast()

// TODO cumSum for Short? returns Int? but is converted back to T: Short?, Issue #558
typeOf<Short?>() -> cast<Short?>().cumSum(skipNA).map { it?.toShort() }.cast()

typeOf<Long>() -> cast<Long>().cumSum().cast()

typeOf<Long?>() -> cast<Long?>().cumSum(skipNA).cast()

typeOf<BigInteger>() -> cast<BigInteger>().cumSum().cast()
@JvmName("cumSumShort")
public fun DataColumn<Short>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Int> =
cumSumImpl(type(), skipNA).cast()

typeOf<BigInteger?>() -> cast<BigInteger?>().cumSum(skipNA).cast()

typeOf<BigDecimal>() -> cast<BigDecimal>().cumSum().cast()
/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
@JvmName("cumSumNullableShort")
public fun DataColumn<Short?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Int?> =
cumSumImpl(type(), skipNA).cast()

typeOf<BigDecimal?>() -> cast<BigDecimal?>().cumSum(skipNA).cast()
/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
@JvmName("cumSumByte")
public fun DataColumn<Byte>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Int> =
cumSumImpl(type(), skipNA).cast()

typeOf<Number?>(), typeOf<Number>() -> convertToDouble().cumSum(skipNA).cast()
/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
@JvmName("cumSumNullableByte")
public fun DataColumn<Byte?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Int?> =
cumSumImpl(type(), skipNA).cast()

// Cumsum for empty column or column with just null is itself
nothingType, nullableNothingType -> this
/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
@JvmName("cumSumDouble")
public fun DataColumn<Double?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Double> =
cumSumImpl(type(), skipNA).cast()

else -> error("Cumsum for type ${type()} is not supported")
}
/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
@JvmName("cumSumFloat")
public fun DataColumn<Float?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Float> =
cumSumImpl(type(), skipNA).cast()

private val supportedClasses = setOf(
Double::class,
Float::class,
Int::class,
Byte::class,
Short::class,
Long::class,
BigInteger::class,
BigDecimal::class,
)
/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataColumn]}
* {@set [CumSumDocs.CUMSUM_PARAM]}
*/
public fun <T : Number?> DataColumn<T>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<T> =
cumSumImpl(type(), skipNA).cast()

// endregion

Expand All @@ -119,26 +113,25 @@ private val supportedClasses = setOf(
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataFrame]}
*/
public fun <T, C> DataFrame<T>.cumSum(
public fun <T, C : Number?> DataFrame<T>.cumSum(
skipNA: Boolean = defaultCumSumSkipNA,
columns: ColumnsSelector<T, C>,
): DataFrame<T> =
convert(columns).to { if (it.typeClass in supportedClasses) it.cast<Number?>().cumSum(skipNA) else it }
): DataFrame<T> = convert(columns).to { it.cumSum(skipNA) }

/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataFrame]}
*/
public fun <T> DataFrame<T>.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): DataFrame<T> =
cumSum(skipNA) { columns.toColumnSet() }
cumSum(skipNA) { columns.toColumnSet().cast() }

/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [DataFrame]}
*/
@AccessApiOverload
public fun <T> DataFrame<T>.cumSum(
vararg columns: AnyColumnReference,
vararg columns: ColumnReference<Number?>,
skipNA: Boolean = defaultCumSumSkipNA,
): DataFrame<T> = cumSum(skipNA) { columns.toColumnSet() }

Expand All @@ -147,8 +140,10 @@ public fun <T> DataFrame<T>.cumSum(
* {@set [CumSumDocs.DATA_TYPE] [DataFrame]}
*/
@AccessApiOverload
public fun <T> DataFrame<T>.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): DataFrame<T> =
cumSum(skipNA) { columns.toColumnSet() }
public fun <T> DataFrame<T>.cumSum(
vararg columns: KProperty<Number?>,
skipNA: Boolean = defaultCumSumSkipNA,
): DataFrame<T> = cumSum(skipNA) { columns.toColumnSet() }

/**
* {@include [CumSumDocs]}
Expand All @@ -157,7 +152,8 @@ public fun <T> DataFrame<T>.cumSum(vararg columns: KProperty<*>, skipNA: Boolean
*/
public fun <T> DataFrame<T>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame<T> =
cumSum(skipNA) {
colsAtAnyDepth { !it.isColumnGroup() }
// TODO keep at any depth?
colsAtAnyDepth { it.isNumber() }.cast()
}

// endregion
Expand All @@ -168,7 +164,7 @@ public fun <T> DataFrame<T>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataF
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [GroupBy]}
*/
public fun <T, G, C> GroupBy<T, G>.cumSum(
public fun <T, G, C : Number?> GroupBy<T, G>.cumSum(
skipNA: Boolean = defaultCumSumSkipNA,
columns: ColumnsSelector<G, C>,
): GroupBy<T, G> = updateGroups { cumSum(skipNA, columns) }
Expand All @@ -178,15 +174,15 @@ public fun <T, G, C> GroupBy<T, G>.cumSum(
* {@set [CumSumDocs.DATA_TYPE] [GroupBy]}
*/
public fun <T, G> GroupBy<T, G>.cumSum(vararg columns: String, skipNA: Boolean = defaultCumSumSkipNA): GroupBy<T, G> =
cumSum(skipNA) { columns.toColumnSet() }
cumSum(skipNA) { columns.toColumnSet().cast() }

/**
* {@include [CumSumDocs]}
* {@set [CumSumDocs.DATA_TYPE] [GroupBy]}
*/
@AccessApiOverload
public fun <T, G> GroupBy<T, G>.cumSum(
vararg columns: AnyColumnReference,
vararg columns: ColumnReference<Number?>,
skipNA: Boolean = defaultCumSumSkipNA,
): GroupBy<T, G> = cumSum(skipNA) { columns.toColumnSet() }

Expand All @@ -196,7 +192,7 @@ public fun <T, G> GroupBy<T, G>.cumSum(
*/
@AccessApiOverload
public fun <T, G> GroupBy<T, G>.cumSum(
vararg columns: KProperty<*>,
vararg columns: KProperty<Number?>,
skipNA: Boolean = defaultCumSumSkipNA,
): GroupBy<T, G> = cumSum(skipNA) { columns.toColumnSet() }

Expand All @@ -207,7 +203,8 @@ public fun <T, G> GroupBy<T, G>.cumSum(
*/
public fun <T, G> GroupBy<T, G>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy<T, G> =
cumSum(skipNA) {
colsAtAnyDepth { !it.isColumnGroup() }
// TODO keep at any depth?
colsAtAnyDepth { it.isNumber() }.cast()
}

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ internal object Aggregators {
}

// T: Number -> T
// Byte -> Int
// Short -> Int
// Nothing -> Double
val sum by withOneOption { skipNaN: Boolean ->
twoStepReducingForNumbers(sumTypeConversion) { type ->
sum(type, skipNaN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ internal class NumberInputHandler<out Return : Any?> : AggregatorInputHandler<Nu
* If no valid unification can be found or the input is solely [Number]`(?)`, the type [Number]`(?)` is returned.
*/
override fun calculateValueType(valueTypes: Set<KType>): ValueType {
val unifiedType = valueTypes.unifiedNumberTypeOrNull(UnifiedNumberTypeOptions.Companion.PRIMITIVES_ONLY)
val unifiedType = valueTypes.unifiedNumberTypeOrNull(UnifiedNumberTypeOptions.PRIMITIVES_ONLY)
?: typeOf<Number>().withNullability(valueTypes.any { it.isMarkedNullable })

if (unifiedType.isSubtypeOf(typeOf<Double?>()) &&
Expand Down
Loading