@@ -47,15 +47,45 @@ class BucketizerSuite extends FunSuite with MLlibTestSparkContext {
47
47
}
48
48
}
49
49
50
- test(" Binary search for finding buckets " ) {
51
- val data = Array .fill[ Double ] (100 )(Random .nextDouble())
52
- val splits = Array .fill[ Double ] (10 )(Random .nextDouble()).sorted
50
+ test(" Binary search correctness in contrast with linear search " ) {
51
+ val data = Array .fill(100 )(Random .nextDouble())
52
+ val splits = Array .fill(10 )(Random .nextDouble()).sorted
53
53
val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
54
54
val bsResult = Vectors .dense(
55
55
data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
56
56
val lsResult = Vectors .dense(data.map(x => BucketizerSuite .linearSearchForBuckets(splits, x)))
57
57
assert(bsResult ~== lsResult absTol 1e-5 )
58
58
}
59
+
60
+ test(" Binary search of features at splits" ) {
61
+ val splits = Array .fill(10 )(Random .nextDouble()).sorted
62
+ val data = splits
63
+ val expected = Vectors .dense(1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 , 10.0 )
64
+ val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
65
+ val result = Vectors .dense(
66
+ data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
67
+ assert(result ~== expected absTol 1e-5 )
68
+ }
69
+
70
+ test(" Binary search of features between splits" ) {
71
+ val data = Array .fill(10 )(Random .nextDouble())
72
+ val splits = Array (- 0.1 , 1.1 )
73
+ val expected = Vectors .dense(Array .fill(10 )(1.0 ))
74
+ val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
75
+ val result = Vectors .dense(
76
+ data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
77
+ assert(result ~== expected absTol 1e-5 )
78
+ }
79
+
80
+ test(" Binary search of features outside splits" ) {
81
+ val data = Array .fill(5 )(Random .nextDouble() + 1.1 ) ++ Array .fill(5 )(Random .nextDouble() - 1.1 )
82
+ val splits = Array (0.0 , 1.1 )
83
+ val expected = Vectors .dense(Array .fill(5 )(2.0 ) ++ Array .fill(5 )(0.0 ))
84
+ val wrappedSplits = Array (Double .MinValue ) ++ splits ++ Array (Double .MaxValue )
85
+ val result = Vectors .dense(
86
+ data.map(x => Bucketizer .binarySearchForBuckets(wrappedSplits, x, true , true )))
87
+ assert(result ~== expected absTol 1e-5 )
88
+ }
59
89
}
60
90
61
91
private object BucketizerSuite {
0 commit comments