Skip to content

Commit 90d8153

Browse files
authored
Merge pull request #1038 from Kotlin/allexcept-npe-fix
All except fix: Option 2
2 parents 9848ae3 + b101243 commit 90d8153

File tree

8 files changed

+721
-93
lines changed

8 files changed

+721
-93
lines changed

core/api/core.api

Lines changed: 185 additions & 0 deletions
Large diffs are not rendered by default.

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,10 @@ public interface ColumnsSelectionDsl<out T> : // SingleColumn<DataRow<T>>
333333
*
334334
* {@include [Indent]}`| `{@include [DropColumnsSelectionDsl.Grammar.ColumnGroupWhileName]}**` { `**{@include [DslGrammarTemplate.ConditionRef]}**` \}`**
335335
*
336+
* {@include [Indent]}`| `{@include [AllExceptColumnsSelectionDsl.Grammar.ColumnGroupExceptName]}**` { `**{@include [DslGrammarTemplate.ColumnsSelectorRef]}**` \} `**
337+
*
338+
* {@include [Indent]}`| `{@include [AllExceptColumnsSelectionDsl.Grammar.ColumnGroupExceptName]}**`(`**{@include [DslGrammarTemplate.ColumnNoAccessorRef]}**`,`**` ..`**`)`**
339+
*
336340
* {@include [Indent]}`| (`{@include [FirstColumnsSelectionDsl.Grammar.ColumnGroupName]}`| `{@include [LastColumnsSelectionDsl.Grammar.ColumnGroupName]}`| `{@include [SingleColumnsSelectionDsl.Grammar.ColumnGroupName]}`) [ `**`{ `**{@include [DslGrammarTemplate.ConditionRef]}**` \}`**` ]`
337341
*
338342
* {@include [Indent]}`| `{@include [SelectColumnsSelectionDsl.Grammar.ColumnGroupName]}**` { `**{@include [DslGrammarTemplate.ColumnsSelectorRef]}**` \}`**

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/allExcept.kt

Lines changed: 348 additions & 37 deletions
Large diffs are not rendered by default.

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -416,59 +416,66 @@ internal fun List<ColumnWithPath<*>>.allColumnsExceptAndUnpack(
416416
* Empty groups will be removed if [removeEmptyGroups]` == true`
417417
*/
418418
internal fun List<ColumnWithPath<*>>.allColumnsExceptKeepingStructure(
419-
columns: Iterable<ColumnWithPath<*>>,
419+
columns: Set<ColumnWithPath<*>>,
420420
removeEmptyGroups: Boolean = true,
421421
): List<ColumnWithPath<*>> {
422422
if (isEmpty()) return emptyList()
423-
val fullTree = collectTree()
424-
for (columnToExcept in columns) {
425-
// grab the node representing the column from the tree
426-
val nodeToExcept = fullTree.getOrPut(columnToExcept.path).asNullable()
427-
if (nodeToExcept != null) {
428-
// remove the children from the node (if it's a column group) and remove its data (the column itself)
429-
nodeToExcept.allChildren().forEach { it.data = null }
430-
nodeToExcept.data = null
431-
432-
// we need to update the data of the parent node(s) to reflect the removal of the column
433-
if (nodeToExcept.parent != null) {
434-
// we grab the data of the parent node, which should be a column group
435-
// treat it as a DF to remove the column to except from it and
436-
// convert it back to a column group
437-
val current = nodeToExcept.parent.data as ColumnGroup<*>? ?: continue
438-
val adjustedCurrent = current
439-
.remove(nodeToExcept.name)
440-
.asColumnGroup(current.name)
441-
.addPath(current.path())
442-
443-
// remove the group if it's empty and removeEmptyGroups is true
444-
// else, simply update the parent's data with the adjusted column group
445-
nodeToExcept.parent.data =
446-
if (adjustedCurrent.cols().isEmpty() && removeEmptyGroups) {
447-
null
448-
} else {
449-
adjustedCurrent
423+
return flatMap {
424+
val fullTree = listOf(it).collectTree()
425+
for (columnToExcept in columns.sortedByDescending { it.path.size }) {
426+
// grab the node representing the column from the tree
427+
val nodeToExcept = fullTree.getOrPut(columnToExcept.path).asNullable()
428+
if (nodeToExcept != null) {
429+
// remove the children from the node (if it's a column group) and remove its data (the column itself)
430+
nodeToExcept.allChildren().forEach { it.data = null }
431+
nodeToExcept.data = null
432+
433+
// we need to update the data of the parent node(s) to reflect the removal of the column
434+
if (nodeToExcept.parent != null) {
435+
// we grab the data of the parent node, which should be a column group
436+
// treat it as a DF to remove the column to except from it and
437+
// convert it back to a column group
438+
val current = nodeToExcept.parent.data as ColumnGroup<*>? ?: continue
439+
val adjustedCurrent = current
440+
.remove(nodeToExcept.name)
441+
.asColumnGroup(current.name)
442+
.addPath(current.path())
443+
444+
// remove the group if it's empty and removeEmptyGroups is true
445+
// else, simply update the parent's data with the adjusted column group
446+
nodeToExcept.parent.data =
447+
if (removeEmptyGroups && adjustedCurrent.cols().isEmpty()) {
448+
null
449+
} else {
450+
adjustedCurrent
451+
}
452+
453+
// now we update the parent's parents recursively with new column group instances
454+
var parent = nodeToExcept.parent.parent
455+
456+
@Suppress("UNNECESSARY_NOT_NULL_ASSERTION")
457+
var currentNode = nodeToExcept.parent!!
458+
while (parent != null) {
459+
val parentData = parent.data as ColumnGroup<*>? ?: break
460+
val currentData = currentNode.data
461+
val modifiedParentData =
462+
if (currentData == null) {
463+
parentData.remove(currentNode.name)
464+
} else {
465+
parentData.replace(currentNode.name).with { currentData }
466+
}
467+
parent.data = modifiedParentData
468+
.asColumnGroup(parentData.name)
469+
.addPath(parentData.path())
470+
currentNode = parent
471+
parent = parent.parent
450472
}
451-
452-
// now we update the parent's parents recursively with new column group instances
453-
var parent = nodeToExcept.parent.parent
454-
455-
@Suppress("UNNECESSARY_NOT_NULL_ASSERTION")
456-
var currentNode = nodeToExcept.parent!!
457-
while (parent != null) {
458-
val parentData = parent.data as ColumnGroup<*>? ?: break
459-
parent.data = parentData
460-
.replace(currentNode.name).with { currentNode.data!! }
461-
.asColumnGroup(parentData.name)
462-
.addPath(parentData.path())
463-
464-
currentNode = parent
465-
parent = parent.parent
466473
}
467474
}
468475
}
476+
val subtrees = fullTree.topmostChildren { it.data != null }
477+
subtrees.map { it.data!!.addPath(it.pathFromRoot()) }
469478
}
470-
val subtrees = fullTree.topmostChildren { it.data != null }
471-
return subtrees.map { it.data!!.addPath(it.pathFromRoot()) }
472479
}
473480

474481
/**

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ internal const val COL_REPLACE = "col"
7676

7777
internal const val ALL_COLS_EXCEPT =
7878
"This overload is blocked to prevent issues with column accessors. Use the `{}` overload instead."
79-
internal const val ALL_COLS_REPLACE = "allColsExcept { other }"
80-
internal const val ALL_COLS_REPLACE_VARARG = "allColsExcept { others.toColumnSet() }"
81-
79+
internal const val ALL_COLS_EXCEPT_REPLACE = "this.allColsExcept { other }"
80+
internal const val ALL_COLS_EXCEPT_REPLACE_VARARG = "this.allColsExcept { others.toColumnSet() }"
81+
internal const val EXCEPT_REPLACE = "this.except { other }"
82+
internal const val EXCEPT_REPLACE_VARARG = "this.except { others.toColumnSet() }"
8283
// endregion

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/allExcept.kt

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@ import org.junit.Test
1616

1717
class AllExceptTests : ColumnsSelectionDslTests() {
1818

19+
@Test
20+
fun `issue 761`() {
21+
val renamed = df.rename { colsAtAnyDepth() except name.firstName }.into { it.name.uppercase() }
22+
renamed.columnNames() shouldBe listOf("NAME", "AGE", "CITY", "WEIGHT", "ISHAPPY")
23+
renamed.getColumnGroup("NAME").columnNames() shouldBe listOf("firstName", "LASTNAME")
24+
25+
val df2 = dataFrameOf("a.b", "a.c.d", "d.e", "d.f")(1, 3.0, 2, "b")
26+
.move { all() }.into { it.name.split(".").toPath() }
27+
df2.select { cols("a") except "a"["b"] }.let {
28+
it.getColumnGroup("a").getColumnOrNull("b") shouldBe null
29+
it[pathOf("a", "c", "d")].single() shouldBe 3.0
30+
}
31+
df2.select { cols("a") except "a"["c"]["d"] }.let {
32+
it.getColumnGroup("a").getColumnOrNull("c") shouldBe null
33+
it[pathOf("a", "b")].single() shouldBe 1
34+
}
35+
df2.select { "a".except("b") }.let {
36+
it.getColumnGroup("a").getColumnOrNull("b") shouldBe null
37+
it[pathOf("a", "c", "d")].single() shouldBe 3.0
38+
}
39+
df2.select { "a".except { "c"["d"] } }.let {
40+
it.getColumnGroup("a").getColumnOrNull("c") shouldBe null
41+
it[pathOf("a", "b")].single() shouldBe 1
42+
}
43+
}
44+
1945
@Test
2046
fun `exceptions`() {
2147
shouldThrow<IllegalStateException> {
@@ -70,12 +96,15 @@ class AllExceptTests : ColumnsSelectionDslTests() {
7096
).shouldAllBeEqual()
7197

7298
listOf(
73-
df.select { name and name.firstName }.alsoDebug(),
99+
df.select { cols(name) except name.firstName },
100+
df.select { (name and name.firstName and name.firstName) except name.firstName },
101+
df.select { (name and name and name.firstName).except(name.firstName).simplify() },
74102
).shouldAllBeEqual()
75103

76-
df.select { (name and name.firstName and name.firstName) except name.firstName }.alsoDebug()
77-
78-
df.select { (name and name and name.firstName) except name.firstName }.alsoDebug()
104+
df.getColumns { (name and name and name.firstName).except(name.firstName) }.forEach {
105+
it.isColumnGroup() shouldBe true
106+
it.asColumnGroup().columnNames() shouldBe listOf("lastName")
107+
}
79108
}
80109

81110
@Test
@@ -282,4 +311,79 @@ class AllExceptTests : ColumnsSelectionDslTests() {
282311
},
283312
).shouldAllBeEqual()
284313
}
314+
315+
@Test
316+
fun `except on column group`() {
317+
val firstNameAccessor = column<String>("firstName")
318+
listOf(
319+
df.select { name }.remove { name.firstName }.alsoDebug(),
320+
df.select { cols(name) except name.firstName },
321+
df.select { name.except { cols { "first" in it.name } } },
322+
df.select { name.except { cols { "first" in it.name } and cols { "first" in it.name } } },
323+
df.select { name.except { firstName } },
324+
df.select { name.except { firstNameAccessor } },
325+
df.select { name.except { firstName and firstName } },
326+
df.select { name.except { firstNameAccessor and firstNameAccessor } },
327+
// df.select { name.except(name.firstName and name.firstName) }, // not allowed
328+
// df.select { name.except(firstNameAccessor and firstNameAccessor) }, // not allowed
329+
df.select { name.except("firstName") },
330+
df.select { name.except("firstName", "firstName") },
331+
df.select { name.except(Name::firstName) },
332+
df.select { name.except(Name::firstName, Name::firstName) },
333+
df.select { name.except(pathOf("firstName")) },
334+
df.select { name.except(pathOf("firstName"), pathOf("firstName")) },
335+
df.select { "name".except { cols { "first" in it.name } } },
336+
df.select { "name".except { cols { "first" in it.name } and cols { "first" in it.name } } },
337+
df.select { "name".except { firstNameAccessor } },
338+
df.select { "name".except { firstNameAccessor and firstNameAccessor } },
339+
// df.select { "name".except(name.firstName and name.firstName) }, // not allowed
340+
// df.select { "name".except(firstNameAccessor and firstNameAccessor) }, // not allowed
341+
df.select { "name".except("firstName") },
342+
df.select { "name".except("firstName", "firstName") },
343+
df.select { "name".except(Name::firstName) },
344+
df.select { "name".except(Name::firstName, Name::firstName) },
345+
df.select { "name".except(pathOf("firstName")) },
346+
df.select { "name".except(pathOf("firstName"), pathOf("firstName")) },
347+
// df.select { Person::name.except(name.firstName and name.firstName) }, // not allowed
348+
// df.select { Person::name.except(firstNameAccessor and firstNameAccessor) }, // not allowed
349+
df.select { Person::name.except("firstName") },
350+
df.select { Person::name.except("firstName", "firstName") },
351+
df.select { Person::name.except(Name::firstName) },
352+
df.select { Person::name.except(Name::firstName, Name::firstName) },
353+
df.select { Person::name.except(pathOf("firstName")) },
354+
df.select { Person::name.except(pathOf("firstName"), pathOf("firstName")) },
355+
df.select { NonDataSchemaPerson::name.except { cols { "first" in it.name } } },
356+
df.select {
357+
NonDataSchemaPerson::name.except {
358+
cols { "first" in it.name } and
359+
cols { "first" in it.name }
360+
}
361+
},
362+
df.select { NonDataSchemaPerson::name.except { firstName } },
363+
df.select { NonDataSchemaPerson::name.except { firstNameAccessor } },
364+
df.select { NonDataSchemaPerson::name.except { firstName and firstName } },
365+
df.select { NonDataSchemaPerson::name.except { firstNameAccessor and firstNameAccessor } },
366+
// df.select { NonDataSchemaPerson::name.except(name.firstName and name.firstName) }, // not allowed
367+
// df.select { NonDataSchemaPerson::name.except(firstNameAccessor and firstNameAccessor) }, // not allowed
368+
df.select { NonDataSchemaPerson::name.except("firstName") },
369+
df.select { NonDataSchemaPerson::name.except("firstName", "firstName") },
370+
df.select { NonDataSchemaPerson::name.except(Name::firstName) },
371+
df.select { NonDataSchemaPerson::name.except(Name::firstName, Name::firstName) },
372+
df.select { NonDataSchemaPerson::name.except(pathOf("firstName")) },
373+
df.select { NonDataSchemaPerson::name.except(pathOf("firstName"), pathOf("firstName")) },
374+
df.select { pathOf("name").except { cols { "first" in it.name } } },
375+
df.select { pathOf("name").except { cols { "first" in it.name } and cols { "first" in it.name } } },
376+
df.select { pathOf("name").except { firstNameAccessor } },
377+
df.select { pathOf("name").except { firstNameAccessor and firstNameAccessor } },
378+
// df.select { pathOf("name").except(name.firstName and name.firstName) }, // not allowed
379+
// df.select { pathOf("name").except(firstNameAccessor and firstNameAccessor) }, // not allowed
380+
df.select { pathOf("name").except("firstName") },
381+
df.select { pathOf("name").except("firstName", "firstName") },
382+
df.select { pathOf("name").except(Name::firstName) },
383+
df.select { pathOf("name").except(Name::firstName) },
384+
df.select { pathOf("name").except(Name::firstName, Name::firstName) },
385+
df.select { pathOf("name").except(pathOf("firstName")) },
386+
df.select { pathOf("name").except(pathOf("firstName"), pathOf("firstName")) },
387+
).shouldAllBeEqual()
388+
}
285389
}

0 commit comments

Comments
 (0)