Skip to content

Commit dd0b0b2

Browse files
committed
Preallocate wordVectors
1 parent ffc9240 commit dd0b0b2

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -432,12 +432,13 @@ class Word2VecModel private[mllib] (
432432
model: Map[String, Array[Float]]) extends Serializable with Saveable {
433433

434434
// wordList: Ordered list of words obtained from model.
435-
// wordIndex: Maps each word to an index, which can retrieve the corresponding
436-
// vector from wordVectors (see below)
437-
// vectorSize: Dimension of each vector.
438-
// numWords: Number of words.
439435
private val wordList: Array[String] = model.keys.toArray
436+
437+
// wordIndex: Maps each word to an index, which can retrieve the corresponding
438+
// vector from wordVectors (see below).
440439
private val wordIndex: Map[String, Int] = wordList.zip(0 until model.size).toMap
440+
441+
// vectorSize: Dimension of each word's vector.
441442
private val vectorSize = model.head._2.size
442443
private val numWords = wordIndex.size
443444

@@ -447,11 +448,12 @@ class Word2VecModel private[mllib] (
447448
// wordVecNorms: Array of length numWords, each value being the Euclidean norm
448449
// of the wordVector.
449450
private val (wordVectors: Array[Float], wordVecNorms: Array[Double]) = {
450-
val wordVectors = wordList.flatMap(word => model.get(word).get).toArray
451+
val wordVectors = new Array[Float](vectorSize * numWords)
451452
val wordVecNorms = new Array[Double](numWords)
452453
var i = 0
453454
while (i < numWords) {
454455
val vec = model.get(wordList(i)).get
456+
Array.copy(vec, 0, wordVectors, i * vectorSize, vectorSize)
455457
wordVecNorms(i) = blas.snrm2(vectorSize, vec, 1)
456458
i += 1
457459
}

0 commit comments

Comments
 (0)