Skip to content

Commit d50a91d

Browse files
committed
[SPARK-5803][MLLIB] use ArrayBuilder to build primitive arrays
because ArrayBuffer is not specialized. Author: Xiangrui Meng <meng@databricks.com> Closes #4594 from mengxr/SPARK-5803 and squashes the following commits: 1261bd5 [Xiangrui Meng] merge master a4ea872 [Xiangrui Meng] use ArrayBuilder to build primitive arrays
1 parent cc56c87 commit d50a91d

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import java.lang.{Iterable => JavaIterable}
2121

2222
import scala.collection.JavaConverters._
2323
import scala.collection.mutable
24-
import scala.collection.mutable.ArrayBuffer
24+
import scala.collection.mutable.ArrayBuilder
2525

2626
import com.github.fommil.netlib.BLAS.{getInstance => blas}
2727

@@ -272,7 +272,7 @@ class Word2Vec extends Serializable with Logging {
272272
def hasNext: Boolean = iter.hasNext
273273

274274
def next(): Array[Int] = {
275-
var sentence = new ArrayBuffer[Int]
275+
val sentence = ArrayBuilder.make[Int]
276276
var sentenceLength = 0
277277
while (iter.hasNext && sentenceLength < MAX_SENTENCE_LENGTH) {
278278
val word = bcVocabHash.value.get(iter.next())
@@ -283,7 +283,7 @@ class Word2Vec extends Serializable with Logging {
283283
case None =>
284284
}
285285
}
286-
sentence.toArray
286+
sentence.result()
287287
}
288288
}
289289
}

mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
package org.apache.spark.mllib.tree
1919

20-
import scala.collection.mutable
2120
import scala.collection.JavaConverters._
22-
import scala.collection.mutable.ArrayBuffer
21+
import scala.collection.mutable
22+
import scala.collection.mutable.ArrayBuilder
2323

2424
import org.apache.spark.Logging
2525
import org.apache.spark.annotation.Experimental
@@ -1136,7 +1136,7 @@ object DecisionTree extends Serializable with Logging {
11361136
logDebug("stride = " + stride)
11371137

11381138
// iterate `valueCount` to find splits
1139-
val splits = new ArrayBuffer[Double]
1139+
val splitsBuilder = ArrayBuilder.make[Double]
11401140
var index = 1
11411141
// currentCount: sum of counts of values that have been visited
11421142
var currentCount = valueCounts(0)._2
@@ -1154,13 +1154,13 @@ object DecisionTree extends Serializable with Logging {
11541154
// makes the gap between currentCount and targetCount smaller,
11551155
// previous value is a split threshold.
11561156
if (previousGap < currentGap) {
1157-
splits.append(valueCounts(index - 1)._1)
1157+
splitsBuilder += valueCounts(index - 1)._1
11581158
targetCount += stride
11591159
}
11601160
index += 1
11611161
}
11621162

1163-
splits.toArray
1163+
splitsBuilder.result()
11641164
}
11651165
}
11661166

mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
1919

2020
import java.util.StringTokenizer
2121

22-
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
22+
import scala.collection.mutable.{ArrayBuilder, ListBuffer}
2323

2424
import org.apache.spark.SparkException
2525

@@ -51,7 +51,7 @@ private[mllib] object NumericParser {
5151
}
5252

5353
private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
54-
val values = ArrayBuffer.empty[Double]
54+
val values = ArrayBuilder.make[Double]
5555
var parsing = true
5656
var allowComma = false
5757
var token: String = null
@@ -67,14 +67,14 @@ private[mllib] object NumericParser {
6767
}
6868
} else {
6969
// expecting a number
70-
values.append(parseDouble(token))
70+
values += parseDouble(token)
7171
allowComma = true
7272
}
7373
}
7474
if (parsing) {
7575
throw new SparkException(s"An array must end with ']'.")
7676
}
77-
values.toArray
77+
values.result()
7878
}
7979

8080
private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
@@ -114,7 +114,7 @@ private[mllib] object NumericParser {
114114
try {
115115
java.lang.Double.parseDouble(s)
116116
} catch {
117-
case e: Throwable =>
117+
case e: NumberFormatException =>
118118
throw new SparkException(s"Cannot parse a double from: $s", e)
119119
}
120120
}

0 commit comments

Comments
 (0)