Skip to content

Add documentation metadata and overloads for distinct API #1023

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import org.jetbrains.kotlinx.dataframe.AnyColumnReference
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
import org.jetbrains.kotlinx.dataframe.documentation.Indent
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet
Expand All @@ -16,33 +19,92 @@ import kotlin.reflect.KProperty

// region DataFrame


/**
* ## The Distinct Operation
*
* It removes duplicated rows based on {@get PHRASE_ENDING}.
*
* __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame].
*
* {@get [DISTINCT_PARAM] @param [columns]
* The names of the columns to consider for evaluating distinct rows.}
*
* @return A new DataFrame containing only distinct rows.
*
* @see [Selecting Columns][SelectSelectingOptions].
* @see {@include [DocumentationUrls.Distinct]}
*/
@ExcludeFromSources
private interface DistinctDocs {
interface DISTINCT_PARAM
}

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING all columns}.
* {@set [DistinctDocs.DISTINCT_PARAM]}
*/
public fun <T> DataFrame<T>.distinct(): DataFrame<T> = distinctBy { all() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
public fun <T, C> DataFrame<T>.distinct(columns: ColumnsSelector<T, C>): DataFrame<T> = select(columns).distinct()

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@AccessApiOverload
public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T> =
distinct {
val set = columns.toColumnSet()
set
}

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
public fun <T> DataFrame<T>.distinct(vararg columns: String): DataFrame<T> = distinct { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@AccessApiOverload
public fun <T> DataFrame<T>.distinct(vararg columns: AnyColumnReference): DataFrame<T> =
distinct { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@AccessApiOverload
public fun <T> DataFrame<T>.distinctBy(vararg columns: KProperty<*>): DataFrame<T> =
distinctBy { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
public fun <T> DataFrame<T>.distinctBy(vararg columns: String): DataFrame<T> = distinctBy { columns.toColumnSet() }

/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
@AccessApiOverload
public fun <T> DataFrame<T>.distinctBy(vararg columns: AnyColumnReference): DataFrame<T> =
distinctBy { columns.toColumnSet() }


/**
* {@include [DistinctDocs]}
* {@set PHRASE_ENDING the specified columns}.
*/
public fun <T, C> DataFrame<T>.distinctBy(columns: ColumnsSelector<T, C>): DataFrame<T> {
val cols = get(columns)
val distinctIndices = indices.distinctBy { i -> cols.map { it[i] } }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,7 @@ internal interface DocumentationUrls {

/** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */
interface Remove

/** <a href="{@include [Url]}/distinct.html">See `distinct` on the documentation website.</a> */
interface Distinct
}