Skip to content

Commit

Permalink
Merge branch 'master' into statistics-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jolanrensen committed Nov 6, 2024
2 parents c97a973 + 439f65d commit 6e028f1
Show file tree
Hide file tree
Showing 93 changed files with 14,990 additions and 643 deletions.
10,892 changes: 10,892 additions & 0 deletions core/api/core.api

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ plugins {
alias(docProcessor)
alias(simpleGit)
alias(buildconfig)
alias(binary.compatibility.validator)

// generates keywords using the :generator module
alias(keywordGenerator)
Expand Down Expand Up @@ -71,6 +72,7 @@ dependencies {
implementation(libs.commonsIo)
implementation(libs.serialization.core)
implementation(libs.serialization.json)
implementation(libs.fastDoubleParser)

implementation(libs.fuel)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.concat
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.take
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
Expand All @@ -15,16 +14,28 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.dataframe.util.CHUNKED_IMPL_IMPORT
import org.jetbrains.kotlinx.dataframe.util.CREATE
import org.jetbrains.kotlinx.dataframe.util.CREATE_BY_INFERENCE_IMPORT
import org.jetbrains.kotlinx.dataframe.util.CREATE_BY_TYPE_IMPORT
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE
import org.jetbrains.kotlinx.dataframe.util.CREATE_INLINE_REPLACE
import org.jetbrains.kotlinx.dataframe.util.CREATE_REPLACE
import org.jetbrains.kotlinx.dataframe.util.CREATE_WITH_TYPE_INFERENCE
import org.jetbrains.kotlinx.dataframe.util.CREATE_WITH_TYPE_INFERENCE_REPLACE
import org.jetbrains.kotlinx.dataframe.util.TYPE_SUGGESTION_IMPORT
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
import kotlin.reflect.KType
Expand All @@ -45,6 +56,9 @@ public interface DataColumn<out T> : BaseColumn<T> {
/**
* Creates [ValueColumn] using given [name], [values] and [type].
*
* Be careful; values are NOT checked to adhere to [type] for efficiency,
* unless you specify [infer].
*
* @param name name of the column
* @param values list of column values
* @param type type of the column
Expand All @@ -56,12 +70,20 @@ public interface DataColumn<out T> : BaseColumn<T> {
type: KType,
infer: Infer = Infer.None,
defaultValue: T? = null,
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue)
): ValueColumn<T> =
ValueColumnImpl(
values = values,
name = name,
type = getValuesType(values, type, infer),
defaultValue = defaultValue,
)

/**
* Creates [ValueColumn] using given [name], [values] and reified column [type].
*
* Note, that column [type] will be defined at compile-time using [T] argument
* The column [type] will be defined at compile-time using [T] argument.
* Be careful with casting; values are NOT checked to adhere to `reified` type [T] for efficiency,
* unless you specify [infer].
*
* @param T type of the column
* @param name name of the column
Expand All @@ -74,48 +96,187 @@ public interface DataColumn<out T> : BaseColumn<T> {
infer: Infer = Infer.None,
): ValueColumn<T> =
createValueColumn(
name,
values,
getValuesType(
values,
typeOf<T>(),
infer,
),
name = name,
values = values,
type = typeOf<T>(),
infer = infer,
)

/**
* Creates [ColumnGroup] using the given [name] and [df] representing the group of columns.
*
* @param name name of the column group
* @param df the collection of columns representing the column group
*/
public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df)

public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> =
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })

/**
* Creates [FrameColumn] using the given [name] and list of dataframes [groups].
*
* [groups] must be a non-null list of [DataFrames][DataFrame], as [FrameColumn] does
* not allow `null` values.
* This is NOT checked at runtime for efficiency, nor is the validity of given [schema].
*
* @param name name of the frame column
* @param groups the dataframes to be put in the column
* @param schema an optional (lazily calculated) [DataFrameSchema] representing
* the intersecting schema of [groups]
*/
public fun <T> createFrameColumn(
name: String,
groups: List<DataFrame<T>>,
schema: Lazy<DataFrameSchema>? = null,
): FrameColumn<T> = FrameColumnImpl(name, groups, schema)

public fun <T> createWithTypeInference(
/**
* Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in
* [values].
*
* This is safer but slower than the other functions.
*
* Some conversions are done automatically to attempt to unify the values.
*
* For instance, when there are other [DataFrames][DataFrame] present in [values], we'll convert:
* - `null` -> [DataFrame.empty]`()`
* - [DataRow] -> single-row [DataFrame]
* - [List][List]`<`[DataRow][DataRow]`<*>>` -> multi-row [DataFrame]
*
* to be able to create a [FrameColumn].
* There are more conversions for other types as well.
*
* @param name name of the column
* @param values the values to represent each row in the column
* @param suggestedType optional suggested type for values. Default is [TypeSuggestion.Infer].
* See [TypeSuggestion] for more information.
* @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
*/
public fun <T> createByInference(
name: String,
values: List<T>,
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
nullable: Boolean? = null,
): DataColumn<T> = guessColumnType(name, values, nullable = nullable)
): DataColumn<T> =
createColumnGuessingType(
name = name,
values = values,
suggestedType = suggestedType,
nullable = nullable,
)

public fun <T> create(
/**
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
* [type].
*
* This may be unsafe but is more efficient than [createByInference].
*
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
* do we check whether there are unexpected nulls among the values.
*
* It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead.
*
* @param name the name of the column
* @param values the values to represent each row in the column
* @param type the (unchecked) common type of [values]
* @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
*/
public fun <T> createByType(
name: String,
values: List<T>,
type: KType,
infer: Infer = Infer.None,
): DataColumn<T> =
when (type.toColumnKind()) {
when (type.toColumnKind()) { // AnyFrame -> Frame, AnyRow? -> Group, else -> Value
ColumnKind.Value -> createValueColumn(name, values, type, infer)

ColumnKind.Group -> createColumnGroup(name, (values as List<AnyRow?>).concat()).asDataColumn().cast()

ColumnKind.Frame -> createFrameColumn(name, values as List<AnyFrame>).asDataColumn().cast()
}

public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> =
create(name, values, typeOf<T>(), infer)
/**
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
* type [T].
*
* This is generally safe, as [T] can be inferred by the compiler,
* and more efficient than [createByInference].
*
* Be careful when casting occurs; Values in [values] are NOT checked to adhere to the given/inferred type [T],
* nor do we check whether there are unexpected nulls among the values.
*
* It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead.
*
* @param T the (unchecked) common type of [values]
* @param name the name of the column
* @param values the values to represent each row in the column
* @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
*/
public inline fun <reified T> createByType(
name: String,
values: List<T>,
infer: Infer = Infer.None,
): DataColumn<T> = createByType(name, values, typeOf<T>(), infer)

/** Creates an empty [DataColumn] with given [name]. */
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())

// region deprecated

@Deprecated(
message = CREATE_FRAME_COLUMN,
replaceWith = ReplaceWith(CREATE_FRAME_COLUMN_REPLACE, CHUNKED_IMPL_IMPORT),
level = DeprecationLevel.WARNING,
)
public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> =
df.chunkedImpl(startIndices = startIndices, name = name)

@Deprecated(
message = CREATE_WITH_TYPE_INFERENCE,
replaceWith = ReplaceWith(
CREATE_WITH_TYPE_INFERENCE_REPLACE,
CREATE_BY_INFERENCE_IMPORT,
TYPE_SUGGESTION_IMPORT,
),
level = DeprecationLevel.WARNING,
)
public fun <T> createWithTypeInference(
name: String,
values: List<T>,
nullable: Boolean? = null,
): DataColumn<T> =
createByInference(
name = name,
values = values,
suggestedType = TypeSuggestion.Infer,
nullable = nullable,
)

@Deprecated(
message = CREATE,
replaceWith = ReplaceWith(CREATE_REPLACE, CREATE_BY_TYPE_IMPORT),
level = DeprecationLevel.WARNING,
)
public fun <T> create(
name: String,
values: List<T>,
type: KType,
infer: Infer = Infer.None,
): DataColumn<T> =
createByType(
name = name,
values = values,
type = type,
infer = infer,
)

@Deprecated(
message = CREATE,
replaceWith = ReplaceWith(CREATE_INLINE_REPLACE, CREATE_BY_TYPE_IMPORT),
level = DeprecationLevel.WARNING,
)
public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> =
createByType(name = name, values = values, type = typeOf<T>(), infer = infer)

// endregion
}

public fun hasNulls(): Boolean = type().isMarkedNullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@ import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl
import org.jetbrains.kotlinx.dataframe.impl.getListType
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.type

/**
* Creates a [FrameColumn] from [this] by splitting the dataframe into
* smaller ones, with their number of rows at most [size].
*/
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> {
val startIndices = (0 until nrow step size)
return DataColumn.createFrameColumn(name, this, startIndices)
return this.chunkedImpl(startIndices, name)
}

public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
Expand Down
Loading

0 comments on commit 6e028f1

Please sign in to comment.