Skip to content

Commit

Permalink
[SPARK-5803][MLLIB] use ArrayBuilder to build primitive arrays
Browse files Browse the repository at this point in the history
because ArrayBuffer is not specialized.

Author: Xiangrui Meng <meng@databricks.com>

Closes apache#4594 from mengxr/SPARK-5803 and squashes the following commits:

1261bd5 [Xiangrui Meng] merge master
a4ea872 [Xiangrui Meng] use ArrayBuilder to build primitive arrays
  • Loading branch information
mengxr committed Feb 14, 2015
1 parent cc56c87 commit d50a91d
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.lang.{Iterable => JavaIterable}

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.ArrayBuilder

import com.github.fommil.netlib.BLAS.{getInstance => blas}

Expand Down Expand Up @@ -272,7 +272,7 @@ class Word2Vec extends Serializable with Logging {
def hasNext: Boolean = iter.hasNext

def next(): Array[Int] = {
var sentence = new ArrayBuffer[Int]
val sentence = ArrayBuilder.make[Int]
var sentenceLength = 0
while (iter.hasNext && sentenceLength < MAX_SENTENCE_LENGTH) {
val word = bcVocabHash.value.get(iter.next())
Expand All @@ -283,7 +283,7 @@ class Word2Vec extends Serializable with Logging {
case None =>
}
}
sentence.toArray
sentence.result()
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

package org.apache.spark.mllib.tree

import scala.collection.mutable
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable
import scala.collection.mutable.ArrayBuilder

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
Expand Down Expand Up @@ -1136,7 +1136,7 @@ object DecisionTree extends Serializable with Logging {
logDebug("stride = " + stride)

// iterate `valueCount` to find splits
val splits = new ArrayBuffer[Double]
val splitsBuilder = ArrayBuilder.make[Double]
var index = 1
// currentCount: sum of counts of values that have been visited
var currentCount = valueCounts(0)._2
Expand All @@ -1154,13 +1154,13 @@ object DecisionTree extends Serializable with Logging {
// makes the gap between currentCount and targetCount smaller,
// previous value is a split threshold.
if (previousGap < currentGap) {
splits.append(valueCounts(index - 1)._1)
splitsBuilder += valueCounts(index - 1)._1
targetCount += stride
}
index += 1
}

splits.toArray
splitsBuilder.result()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuffer, ListBuffer}
import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException

Expand Down Expand Up @@ -51,7 +51,7 @@ private[mllib] object NumericParser {
}

private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
val values = ArrayBuffer.empty[Double]
val values = ArrayBuilder.make[Double]
var parsing = true
var allowComma = false
var token: String = null
Expand All @@ -67,14 +67,14 @@ private[mllib] object NumericParser {
}
} else {
// expecting a number
values.append(parseDouble(token))
values += parseDouble(token)
allowComma = true
}
}
if (parsing) {
throw new SparkException(s"An array must end with ']'.")
}
values.toArray
values.result()
}

private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
Expand Down Expand Up @@ -114,7 +114,7 @@ private[mllib] object NumericParser {
try {
java.lang.Double.parseDouble(s)
} catch {
case e: Throwable =>
case e: NumberFormatException =>
throw new SparkException(s"Cannot parse a double from: $s", e)
}
}
Expand Down

0 comments on commit d50a91d

Please sign in to comment.