@@ -35,6 +35,7 @@ import org.jetbrains.kotlinx.dataframe.api.mean
35
35
import org.jetbrains.kotlinx.dataframe.api.meanFor
36
36
import org.jetbrains.kotlinx.dataframe.api.meanOf
37
37
import org.jetbrains.kotlinx.dataframe.api.median
38
+ import org.jetbrains.kotlinx.dataframe.api.medianBy
38
39
import org.jetbrains.kotlinx.dataframe.api.medianFor
39
40
import org.jetbrains.kotlinx.dataframe.api.medianOf
40
41
import org.jetbrains.kotlinx.dataframe.api.min
@@ -43,6 +44,7 @@ import org.jetbrains.kotlinx.dataframe.api.minFor
43
44
import org.jetbrains.kotlinx.dataframe.api.minOf
44
45
import org.jetbrains.kotlinx.dataframe.api.minOrNull
45
46
import org.jetbrains.kotlinx.dataframe.api.percentile
47
+ import org.jetbrains.kotlinx.dataframe.api.percentileBy
46
48
import org.jetbrains.kotlinx.dataframe.api.percentileFor
47
49
import org.jetbrains.kotlinx.dataframe.api.percentileOf
48
50
import org.jetbrains.kotlinx.dataframe.api.pivot
@@ -179,7 +181,7 @@ class Analyze : TestBase() {
179
181
// SampleStart
180
182
df.sum() // sum of values per every numeric column
181
183
df.sum { age and weight } // sum of all values in `age` and `weight`
182
- df.sumFor { age and weight } // sum of values per `age` and `weight` separately
184
+ df.sumFor(skipNaN = true ) { age and weight } // sum of values per `age` and `weight` separately
183
185
df.sumOf { (weight ? : 0 ) / age } // sum of expression evaluated for every row
184
186
// SampleEnd
185
187
}
@@ -190,7 +192,7 @@ class Analyze : TestBase() {
190
192
// SampleStart
191
193
df.min() // min of values per every comparable column
192
194
df.min { age and weight } // min of all values in `age` and `weight`
193
- df.minFor { age and weight } // min of values per `age` and `weight` separately
195
+ df.minFor(skipNaN = true ) { age and weight } // min of values per `age` and `weight` separately
194
196
df.minOf { (weight ? : 0 ) / age } // min of expression evaluated for every row
195
197
df.minBy { age } // DataRow with minimal `age`
196
198
// SampleEnd
@@ -214,8 +216,9 @@ class Analyze : TestBase() {
214
216
// SampleStart
215
217
df.median() // median of values per every comparable column
216
218
df.median { age and weight } // median of all values in `age` and `weight`
217
- df.medianFor { age and weight } // median of values per `age` and `weight` separately
219
+ df.medianFor(skipNaN = true ) { age and weight } // median of values per `age` and `weight` separately
218
220
df.medianOf { (weight ? : 0 ) / age } // median of expression evaluated for every row
221
+ df.medianBy { age } // DataRow where the median age lies (lower-median for an even number of values)
219
222
// SampleEnd
220
223
}
221
224
@@ -235,10 +238,11 @@ class Analyze : TestBase() {
235
238
@TransformDataFrameExpressions
236
239
fun percentileModes () {
237
240
// SampleStart
238
- df.percentile(25.0 ) // percentile of values per every comparable column
239
- df.percentile(25.0 ) { age and weight } // percentile of all values in `age` and `weight`
240
- df.percentileFor(25.0 ) { age and weight } // percentile of values per `age` and `weight` separately
241
- df.percentileOf(25.0 ) { (weight ? : 0 ) / age } // percentile of expression evaluated for every row
241
+ df.percentile(25.0 ) // 25th percentile of values per every comparable column
242
+ df.percentile(75.0 ) { age and weight } // 75th percentile of all values in `age` and `weight`
243
+ df.percentileFor(50.0 , skipNaN = true ) { age and weight } // 50th percentile of values per `age` and `weight` separately
244
+ df.percentileOf(75.0 ) { (weight ? : 0 ) / age } // 75th percentile of expression evaluated for every row
245
+ df.percentileBy(25.0 ) { age } // DataRow where the 25th percentile of `age` lies (index rounded using R3)
242
246
// SampleEnd
243
247
}
244
248
@@ -247,9 +251,9 @@ class Analyze : TestBase() {
247
251
fun percentileAggregations () {
248
252
// SampleStart
249
253
df.percentile(25.0 )
250
- df.age.percentile(25 .0 )
251
- df.groupBy { city }.percentile(25 .0 )
252
- df.pivot { city }.percentile(25 .0 )
254
+ df.age.percentile(75 .0 )
255
+ df.groupBy { city }.percentile(50 .0 )
256
+ df.pivot { city }.percentile(75 .0 )
253
257
df.pivot { city }.groupBy { name.lastName }.percentile(25.0 )
254
258
// SampleEnd
255
259
}
@@ -259,8 +263,8 @@ class Analyze : TestBase() {
259
263
fun meanModes () {
260
264
// SampleStart
261
265
df.mean() // mean of values per every numeric column
262
- df.mean(skipNaN = true ) { age and weight } // mean of all values in `age` and `weight`, skips NA
263
- df.meanFor(skipNaN = true ) { age and weight } // mean of values per `age` and `weight` separately, skips NA
266
+ df.mean { age and weight } // mean of all values in `age` and `weight`
267
+ df.meanFor(skipNaN = true ) { age and weight } // mean of values per `age` and `weight` separately, skips NaN
264
268
df.meanOf { (weight ? : 0 ) / age } // median of expression evaluated for every row
265
269
// SampleEnd
266
270
}
@@ -283,7 +287,7 @@ class Analyze : TestBase() {
283
287
// SampleStart
284
288
df.std() // std of values per every numeric column
285
289
df.std { age and weight } // std of all values in `age` and `weight`
286
- df.stdFor { age and weight } // std of values per `age` and `weight` separately, skips NA
290
+ df.stdFor(skipNaN = true ) { age and weight } // std of values per `age` and `weight` separately, skips NA
287
291
df.stdOf { (weight ? : 0 ) / age } // std of expression evaluated for every row
288
292
// SampleEnd
289
293
}
0 commit comments