Skip to content

Commit c3cc770

Browse files
committed
add more unit test for binary search
1 parent 3a16cc2 commit c3cc770

File tree

1 file changed

+33
-3
lines changed

1 file changed

+33
-3
lines changed

mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,45 @@ class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
4747
}
4848
}
4949

50-
test("Binary search for finding buckets") {
51-
val data = Array.fill[Double](100)(Random.nextDouble())
52-
val splits = Array.fill[Double](10)(Random.nextDouble()).sorted
50+
test("Binary search correctness in contrast with linear search") {
51+
val data = Array.fill(100)(Random.nextDouble())
52+
val splits = Array.fill(10)(Random.nextDouble()).sorted
5353
val wrappedSplits = Array(Double.MinValue) ++ splits ++ Array(Double.MaxValue)
5454
val bsResult = Vectors.dense(
5555
data.map(x => Bucketizer.binarySearchForBuckets(wrappedSplits, x, true, true)))
5656
val lsResult = Vectors.dense(data.map(x => BucketizerSuite.linearSearchForBuckets(splits, x)))
5757
assert(bsResult ~== lsResult absTol 1e-5)
5858
}
59+
60+
test("Binary search of features at splits") {
61+
val splits = Array.fill(10)(Random.nextDouble()).sorted
62+
val data = splits
63+
val expected = Vectors.dense(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
64+
val wrappedSplits = Array(Double.MinValue) ++ splits ++ Array(Double.MaxValue)
65+
val result = Vectors.dense(
66+
data.map(x => Bucketizer.binarySearchForBuckets(wrappedSplits, x, true, true)))
67+
assert(result ~== expected absTol 1e-5)
68+
}
69+
70+
test("Binary search of features between splits") {
71+
val data = Array.fill(10)(Random.nextDouble())
72+
val splits = Array(-0.1, 1.1)
73+
val expected = Vectors.dense(Array.fill(10)(1.0))
74+
val wrappedSplits = Array(Double.MinValue) ++ splits ++ Array(Double.MaxValue)
75+
val result = Vectors.dense(
76+
data.map(x => Bucketizer.binarySearchForBuckets(wrappedSplits, x, true, true)))
77+
assert(result ~== expected absTol 1e-5)
78+
}
79+
80+
test("Binary search of features outside splits") {
81+
val data = Array.fill(5)(Random.nextDouble() + 1.1) ++ Array.fill(5)(Random.nextDouble() - 1.1)
82+
val splits = Array(0.0, 1.1)
83+
val expected = Vectors.dense(Array.fill(5)(2.0) ++ Array.fill(5)(0.0))
84+
val wrappedSplits = Array(Double.MinValue) ++ splits ++ Array(Double.MaxValue)
85+
val result = Vectors.dense(
86+
data.map(x => Bucketizer.binarySearchForBuckets(wrappedSplits, x, true, true)))
87+
assert(result ~== expected absTol 1e-5)
88+
}
5989
}
6090

6191
private object BucketizerSuite {

0 commit comments

Comments
 (0)