@@ -4,12 +4,16 @@ import io.kotest.matchers.shouldBe
4
4
import org.jetbrains.kotlinx.dataframe.AnyFrame
5
5
import org.jetbrains.kotlinx.dataframe.DataFrame
6
6
import org.jetbrains.kotlinx.dataframe.hasNulls
7
+ import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl
7
8
import org.jetbrains.kotlinx.dataframe.type
8
9
import org.junit.Test
9
10
import kotlin.reflect.typeOf
10
11
11
12
class SplitTests {
12
13
14
+ val stringPairDf = dataFrameOf(" first" , " second" )(" 22-65" , " 22-66" )
15
+ val listPairDf = dataFrameOf(" first" , " second" )(listOf (" 22" , " 65" ), listOf (" 22" , " 66" ))
16
+
13
17
@Test
14
18
fun `split with default` () {
15
19
val recentDelays = listOf (listOf (23 , 47 ), listOf (), listOf (24 , 43 , 87 ), listOf (13 ), listOf (67 , 32 )).toColumn(" RecentDelays" )
@@ -60,4 +64,105 @@ class SplitTests {
60
64
3 , emptyList<Int >(), emptyList<Int >()
61
65
)
62
66
}
67
+
68
+ @Test
69
+ fun `split string by delimiter inward` () {
70
+ val res = stringPairDf.split(" first" , " second" ).by(" -" ).inward(" left" , " right" )
71
+
72
+ res shouldBe dataFrameOf(
73
+ columnOf(columnOf(" 22" ) named " left" , columnOf(" 65" ) named " right" ) named " first" ,
74
+ columnOf(columnOf(" 22" ) named " left" , columnOf(" 66" ) named " right" ) named " second"
75
+ )
76
+ }
77
+
78
+ @Test
79
+ fun `split string by delimiter into columns with suffixes` () {
80
+ val res = stringPairDf.split(" first" , " second" ).by(" -" ).into(" left" , " right" )
81
+
82
+ res shouldBe dataFrameOf(
83
+ columnOf(" 22" ) named " left" ,
84
+ columnOf(" 65" ) named " right" ,
85
+ columnOf(" 22" ) named " left1" ,
86
+ columnOf(" 66" ) named " right1"
87
+ )
88
+ }
89
+
90
+ @Test
91
+ fun `split list inward with autogenerated names` () {
92
+ val res = listPairDf.split { " first" <List <String >>() and " second" <List <String >>() }.inward()
93
+
94
+ res shouldBe dataFrameOf(
95
+ columnOf(columnOf(" 22" ) named " split1" , columnOf(" 65" ) named " split2" ) named " first" ,
96
+ columnOf(columnOf(" 22" ) named " split1" , columnOf(" 66" ) named " split2" ) named " second"
97
+ )
98
+ }
99
+
100
+ @Test
101
+ fun `split list into with autogenerated names` () {
102
+ val res = listPairDf.split { " first" <List <String >>() and " second" <List <String >>() }.into()
103
+
104
+ res shouldBe dataFrameOf(
105
+ columnOf(" 22" ) named " split1" ,
106
+ columnOf(" 65" ) named " split2" ,
107
+ columnOf(" 22" ) named " split3" ,
108
+ columnOf(" 66" ) named " split4"
109
+ )
110
+ }
111
+
112
+ @Test
113
+ fun `sequence of splits with autogenerated names` () {
114
+ var res = listPairDf.split { " first" <List <String >>() }.into()
115
+ res = res.split { " second" <List <String >>() }.into()
116
+
117
+ res shouldBe dataFrameOf(
118
+ columnOf(" 22" ) named " split1" ,
119
+ columnOf(" 65" ) named " split2" ,
120
+ columnOf(" 22" ) named " split3" ,
121
+ columnOf(" 66" ) named " split4"
122
+ )
123
+ }
124
+
125
+ @Test
126
+ fun `split column group inward` () {
127
+ val df = stringPairDf.group(" first" , " second" ).into(" group" )
128
+
129
+ // Note: this operation replaces original columns in group so there is no name conflict
130
+ val res = df.split { " group" <DataRowImpl <* >>() }
131
+ .by { it -> listOf (it[1 ], it[0 ]) } // swap columns
132
+ .inward(" first" , " second" ) // no name conflict
133
+
134
+ res shouldBe dataFrameOf(
135
+ columnOf(columnOf(" 22-66" ) named " first" , columnOf(" 22-65" ) named " second" ) named " group"
136
+ )
137
+ }
138
+
139
+ @Test
140
+ fun `split column group into hierarchy with correct names` () {
141
+ val df = dataFrameOf(
142
+ columnOf(
143
+ columnOf(" a" ) named " first" ,
144
+ columnOf(
145
+ columnOf(" b" ) named " first" ,
146
+ columnOf(" c" ) named " second"
147
+ ) named " nestedGroup"
148
+ ) named " topLevelGroup" ,
149
+ columnOf(" d" ) named " first" ,
150
+ )
151
+
152
+ val topLevelGroup by columnGroup()
153
+ val nestedGroup by topLevelGroup.columnGroup()
154
+
155
+ val res = df.split { nestedGroup }
156
+ .by { it -> listOf (it[0 ], it[1 ]) }
157
+ .into(" first" , " second" ) // name conflict
158
+
159
+ res shouldBe dataFrameOf(
160
+ columnOf(
161
+ columnOf(" a" ) named " first" ,
162
+ columnOf(" b" ) named " first1" ,
163
+ columnOf(" c" ) named " second"
164
+ ) named " topLevelGroup" ,
165
+ columnOf(" d" ) named " first" ,
166
+ )
167
+ }
63
168
}
0 commit comments