Skip to content

All except fix: Option 2 #1038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 12, 2025
Merged
185 changes: 185 additions & 0 deletions core/api/core.api

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,10 @@ public interface ColumnsSelectionDsl<out T> : // SingleColumn<DataRow<T>>
*
* {@include [Indent]}`| `{@include [DropColumnsSelectionDsl.Grammar.ColumnGroupWhileName]}**` { `**{@include [DslGrammarTemplate.ConditionRef]}**` \}`**
*
* {@include [Indent]}`| `{@include [AllExceptColumnsSelectionDsl.Grammar.ColumnGroupExceptName]}**` { `**{@include [DslGrammarTemplate.ColumnsSelectorRef]}**` \} `**
*
* {@include [Indent]}`| `{@include [AllExceptColumnsSelectionDsl.Grammar.ColumnGroupExceptName]}**`(`**{@include [DslGrammarTemplate.ColumnNoAccessorRef]}**`,`**` ..`**`)`**
*
* {@include [Indent]}`| (`{@include [FirstColumnsSelectionDsl.Grammar.ColumnGroupName]}`| `{@include [LastColumnsSelectionDsl.Grammar.ColumnGroupName]}`| `{@include [SingleColumnsSelectionDsl.Grammar.ColumnGroupName]}`) [ `**`{ `**{@include [DslGrammarTemplate.ConditionRef]}**` \}`**` ]`
*
* {@include [Indent]}`| `{@include [SelectColumnsSelectionDsl.Grammar.ColumnGroupName]}**` { `**{@include [DslGrammarTemplate.ColumnsSelectorRef]}**` \}`**
Expand Down
385 changes: 348 additions & 37 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/allExcept.kt

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -416,59 +416,66 @@ internal fun List<ColumnWithPath<*>>.allColumnsExceptAndUnpack(
* Empty groups will be removed if [removeEmptyGroups]` == true`
*/
internal fun List<ColumnWithPath<*>>.allColumnsExceptKeepingStructure(
columns: Iterable<ColumnWithPath<*>>,
columns: Set<ColumnWithPath<*>>,
removeEmptyGroups: Boolean = true,
): List<ColumnWithPath<*>> {
if (isEmpty()) return emptyList()
val fullTree = collectTree()
for (columnToExcept in columns) {
// grab the node representing the column from the tree
val nodeToExcept = fullTree.getOrPut(columnToExcept.path).asNullable()
if (nodeToExcept != null) {
// remove the children from the node (if it's a column group) and remove its data (the column itself)
nodeToExcept.allChildren().forEach { it.data = null }
nodeToExcept.data = null

// we need to update the data of the parent node(s) to reflect the removal of the column
if (nodeToExcept.parent != null) {
// we grab the data of the parent node, which should be a column group
// treat it as a DF to remove the column to except from it and
// convert it back to a column group
val current = nodeToExcept.parent.data as ColumnGroup<*>? ?: continue
val adjustedCurrent = current
.remove(nodeToExcept.name)
.asColumnGroup(current.name)
.addPath(current.path())

// remove the group if it's empty and removeEmptyGroups is true
// else, simply update the parent's data with the adjusted column group
nodeToExcept.parent.data =
if (adjustedCurrent.cols().isEmpty() && removeEmptyGroups) {
null
} else {
adjustedCurrent
return flatMap {
val fullTree = listOf(it).collectTree()
for (columnToExcept in columns.sortedByDescending { it.path.size }) {
// grab the node representing the column from the tree
val nodeToExcept = fullTree.getOrPut(columnToExcept.path).asNullable()
if (nodeToExcept != null) {
// remove the children from the node (if it's a column group) and remove its data (the column itself)
nodeToExcept.allChildren().forEach { it.data = null }
nodeToExcept.data = null

// we need to update the data of the parent node(s) to reflect the removal of the column
if (nodeToExcept.parent != null) {
// we grab the data of the parent node, which should be a column group
// treat it as a DF to remove the column to except from it and
// convert it back to a column group
val current = nodeToExcept.parent.data as ColumnGroup<*>? ?: continue
val adjustedCurrent = current
.remove(nodeToExcept.name)
.asColumnGroup(current.name)
.addPath(current.path())

// remove the group if it's empty and removeEmptyGroups is true
// else, simply update the parent's data with the adjusted column group
nodeToExcept.parent.data =
if (removeEmptyGroups && adjustedCurrent.cols().isEmpty()) {
null
} else {
adjustedCurrent
}

// now we update the parent's parents recursively with new column group instances
var parent = nodeToExcept.parent.parent

@Suppress("UNNECESSARY_NOT_NULL_ASSERTION")
var currentNode = nodeToExcept.parent!!
while (parent != null) {
val parentData = parent.data as ColumnGroup<*>? ?: break
val currentData = currentNode.data
val modifiedParentData =
if (currentData == null) {
parentData.remove(currentNode.name)
} else {
parentData.replace(currentNode.name).with { currentData }
}
parent.data = modifiedParentData
.asColumnGroup(parentData.name)
.addPath(parentData.path())
currentNode = parent
parent = parent.parent
}

// now we update the parent's parents recursively with new column group instances
var parent = nodeToExcept.parent.parent

@Suppress("UNNECESSARY_NOT_NULL_ASSERTION")
var currentNode = nodeToExcept.parent!!
while (parent != null) {
val parentData = parent.data as ColumnGroup<*>? ?: break
parent.data = parentData
.replace(currentNode.name).with { currentNode.data!! }
.asColumnGroup(parentData.name)
.addPath(parentData.path())

currentNode = parent
parent = parent.parent
}
}
}
val subtrees = fullTree.topmostChildren { it.data != null }
subtrees.map { it.data!!.addPath(it.pathFromRoot()) }
}
val subtrees = fullTree.topmostChildren { it.data != null }
return subtrees.map { it.data!!.addPath(it.pathFromRoot()) }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ internal const val COL_REPLACE = "col"

internal const val ALL_COLS_EXCEPT =
"This overload is blocked to prevent issues with column accessors. Use the `{}` overload instead."
internal const val ALL_COLS_REPLACE = "allColsExcept { other }"
internal const val ALL_COLS_REPLACE_VARARG = "allColsExcept { others.toColumnSet() }"

internal const val ALL_COLS_EXCEPT_REPLACE = "this.allColsExcept { other }"
internal const val ALL_COLS_EXCEPT_REPLACE_VARARG = "this.allColsExcept { others.toColumnSet() }"
internal const val EXCEPT_REPLACE = "this.except { other }"
internal const val EXCEPT_REPLACE_VARARG = "this.except { others.toColumnSet() }"
// endregion
112 changes: 108 additions & 4 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/allExcept.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,32 @@ import org.junit.Test

class AllExceptTests : ColumnsSelectionDslTests() {

@Test
fun `issue 761`() {
val renamed = df.rename { colsAtAnyDepth() except name.firstName }.into { it.name.uppercase() }
renamed.columnNames() shouldBe listOf("NAME", "AGE", "CITY", "WEIGHT", "ISHAPPY")
renamed.getColumnGroup("NAME").columnNames() shouldBe listOf("firstName", "LASTNAME")

val df2 = dataFrameOf("a.b", "a.c.d", "d.e", "d.f")(1, 3.0, 2, "b")
.move { all() }.into { it.name.split(".").toPath() }
df2.select { cols("a") except "a"["b"] }.let {
it.getColumnGroup("a").getColumnOrNull("b") shouldBe null
it[pathOf("a", "c", "d")].single() shouldBe 3.0
}
df2.select { cols("a") except "a"["c"]["d"] }.let {
it.getColumnGroup("a").getColumnOrNull("c") shouldBe null
it[pathOf("a", "b")].single() shouldBe 1
}
df2.select { "a".except("b") }.let {
it.getColumnGroup("a").getColumnOrNull("b") shouldBe null
it[pathOf("a", "c", "d")].single() shouldBe 3.0
}
df2.select { "a".except { "c"["d"] } }.let {
it.getColumnGroup("a").getColumnOrNull("c") shouldBe null
it[pathOf("a", "b")].single() shouldBe 1
}
}

@Test
fun `exceptions`() {
shouldThrow<IllegalStateException> {
Expand Down Expand Up @@ -70,12 +96,15 @@ class AllExceptTests : ColumnsSelectionDslTests() {
).shouldAllBeEqual()

listOf(
df.select { name and name.firstName }.alsoDebug(),
df.select { cols(name) except name.firstName },
df.select { (name and name.firstName and name.firstName) except name.firstName },
df.select { (name and name and name.firstName).except(name.firstName).simplify() },
).shouldAllBeEqual()

df.select { (name and name.firstName and name.firstName) except name.firstName }.alsoDebug()

df.select { (name and name and name.firstName) except name.firstName }.alsoDebug()
df.getColumns { (name and name and name.firstName).except(name.firstName) }.forEach {
it.isColumnGroup() shouldBe true
it.asColumnGroup().columnNames() shouldBe listOf("lastName")
}
}

@Test
Expand Down Expand Up @@ -282,4 +311,79 @@ class AllExceptTests : ColumnsSelectionDslTests() {
},
).shouldAllBeEqual()
}

@Test
fun `except on column group`() {
val firstNameAccessor = column<String>("firstName")
listOf(
df.select { name }.remove { name.firstName }.alsoDebug(),
df.select { cols(name) except name.firstName },
df.select { name.except { cols { "first" in it.name } } },
df.select { name.except { cols { "first" in it.name } and cols { "first" in it.name } } },
df.select { name.except { firstName } },
df.select { name.except { firstNameAccessor } },
df.select { name.except { firstName and firstName } },
df.select { name.except { firstNameAccessor and firstNameAccessor } },
// df.select { name.except(name.firstName and name.firstName) }, // not allowed
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we need a test for this, but it's not possible in that manner and should be kind of tests for compiler plugin, when it's a String which should be compiled or not

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or could we make assert on that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes indeed, that's a good idea for later :)

// df.select { name.except(firstNameAccessor and firstNameAccessor) }, // not allowed
df.select { name.except("firstName") },
df.select { name.except("firstName", "firstName") },
df.select { name.except(Name::firstName) },
df.select { name.except(Name::firstName, Name::firstName) },
df.select { name.except(pathOf("firstName")) },
df.select { name.except(pathOf("firstName"), pathOf("firstName")) },
df.select { "name".except { cols { "first" in it.name } } },
df.select { "name".except { cols { "first" in it.name } and cols { "first" in it.name } } },
df.select { "name".except { firstNameAccessor } },
df.select { "name".except { firstNameAccessor and firstNameAccessor } },
// df.select { "name".except(name.firstName and name.firstName) }, // not allowed
// df.select { "name".except(firstNameAccessor and firstNameAccessor) }, // not allowed
df.select { "name".except("firstName") },
df.select { "name".except("firstName", "firstName") },
df.select { "name".except(Name::firstName) },
df.select { "name".except(Name::firstName, Name::firstName) },
df.select { "name".except(pathOf("firstName")) },
df.select { "name".except(pathOf("firstName"), pathOf("firstName")) },
// df.select { Person::name.except(name.firstName and name.firstName) }, // not allowed
// df.select { Person::name.except(firstNameAccessor and firstNameAccessor) }, // not allowed
df.select { Person::name.except("firstName") },
df.select { Person::name.except("firstName", "firstName") },
df.select { Person::name.except(Name::firstName) },
df.select { Person::name.except(Name::firstName, Name::firstName) },
df.select { Person::name.except(pathOf("firstName")) },
df.select { Person::name.except(pathOf("firstName"), pathOf("firstName")) },
df.select { NonDataSchemaPerson::name.except { cols { "first" in it.name } } },
df.select {
NonDataSchemaPerson::name.except {
cols { "first" in it.name } and
cols { "first" in it.name }
}
},
df.select { NonDataSchemaPerson::name.except { firstName } },
df.select { NonDataSchemaPerson::name.except { firstNameAccessor } },
df.select { NonDataSchemaPerson::name.except { firstName and firstName } },
df.select { NonDataSchemaPerson::name.except { firstNameAccessor and firstNameAccessor } },
// df.select { NonDataSchemaPerson::name.except(name.firstName and name.firstName) }, // not allowed
// df.select { NonDataSchemaPerson::name.except(firstNameAccessor and firstNameAccessor) }, // not allowed
df.select { NonDataSchemaPerson::name.except("firstName") },
df.select { NonDataSchemaPerson::name.except("firstName", "firstName") },
df.select { NonDataSchemaPerson::name.except(Name::firstName) },
df.select { NonDataSchemaPerson::name.except(Name::firstName, Name::firstName) },
df.select { NonDataSchemaPerson::name.except(pathOf("firstName")) },
df.select { NonDataSchemaPerson::name.except(pathOf("firstName"), pathOf("firstName")) },
df.select { pathOf("name").except { cols { "first" in it.name } } },
df.select { pathOf("name").except { cols { "first" in it.name } and cols { "first" in it.name } } },
df.select { pathOf("name").except { firstNameAccessor } },
df.select { pathOf("name").except { firstNameAccessor and firstNameAccessor } },
// df.select { pathOf("name").except(name.firstName and name.firstName) }, // not allowed
// df.select { pathOf("name").except(firstNameAccessor and firstNameAccessor) }, // not allowed
df.select { pathOf("name").except("firstName") },
df.select { pathOf("name").except("firstName", "firstName") },
df.select { pathOf("name").except(Name::firstName) },
df.select { pathOf("name").except(Name::firstName) },
df.select { pathOf("name").except(Name::firstName, Name::firstName) },
df.select { pathOf("name").except(pathOf("firstName")) },
df.select { pathOf("name").except(pathOf("firstName"), pathOf("firstName")) },
).shouldAllBeEqual()
}
}
Loading
Loading