Skip to content

[SPARK-5803][MLLIB] use ArrayBuilder to build primitive arrays #4594

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.lang.{Iterable => JavaIterable}

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.ArrayBuilder

import com.github.fommil.netlib.BLAS.{getInstance => blas}

Expand Down Expand Up @@ -272,7 +272,7 @@ class Word2Vec extends Serializable with Logging {
def hasNext: Boolean = iter.hasNext

def next(): Array[Int] = {
var sentence = new ArrayBuffer[Int]
val sentence = ArrayBuilder.make[Int]
var sentenceLength = 0
while (iter.hasNext && sentenceLength < MAX_SENTENCE_LENGTH) {
val word = bcVocabHash.value.get(iter.next())
Expand All @@ -283,7 +283,7 @@ class Word2Vec extends Serializable with Logging {
case None =>
}
}
sentence.toArray
sentence.result()
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

package org.apache.spark.mllib.tree

import scala.collection.mutable
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable
import scala.collection.mutable.ArrayBuilder

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
Expand Down Expand Up @@ -1136,7 +1136,7 @@ object DecisionTree extends Serializable with Logging {
logDebug("stride = " + stride)

// iterate `valueCount` to find splits
val splits = new ArrayBuffer[Double]
val splitsBuilder = ArrayBuilder.make[Double]
var index = 1
// currentCount: sum of counts of values that have been visited
var currentCount = valueCounts(0)._2
Expand All @@ -1154,13 +1154,13 @@ object DecisionTree extends Serializable with Logging {
// makes the gap between currentCount and targetCount smaller,
// previous value is a split threshold.
if (previousGap < currentGap) {
splits.append(valueCounts(index - 1)._1)
splitsBuilder += valueCounts(index - 1)._1
targetCount += stride
}
index += 1
}

splits.toArray
splitsBuilder.result()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.mllib.util

import java.util.StringTokenizer

import scala.collection.mutable.{ArrayBuffer, ListBuffer}
import scala.collection.mutable.{ArrayBuilder, ListBuffer}

import org.apache.spark.SparkException

Expand Down Expand Up @@ -51,7 +51,7 @@ private[mllib] object NumericParser {
}

private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
val values = ArrayBuffer.empty[Double]
val values = ArrayBuilder.make[Double]
var parsing = true
var allowComma = false
var token: String = null
Expand All @@ -67,14 +67,14 @@ private[mllib] object NumericParser {
}
} else {
// expecting a number
values.append(parseDouble(token))
values += parseDouble(token)
allowComma = true
}
}
if (parsing) {
throw new SparkException(s"An array must end with ']'.")
}
values.toArray
values.result()
}

private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
Expand Down Expand Up @@ -114,7 +114,7 @@ private[mllib] object NumericParser {
try {
java.lang.Double.parseDouble(s)
} catch {
case e: Throwable =>
case e: NumberFormatException =>
throw new SparkException(s"Cannot parse a double from: $s", e)
}
}
Expand Down