@@ -432,12 +432,13 @@ class Word2VecModel private[mllib] (
432
432
model : Map [String , Array [Float ]]) extends Serializable with Saveable {
433
433
434
434
// wordList: Ordered list of words obtained from model.
435
- // wordIndex: Maps each word to an index, which can retrieve the corresponding
436
- // vector from wordVectors (see below)
437
- // vectorSize: Dimension of each vector.
438
- // numWords: Number of words.
439
435
private val wordList : Array [String ] = model.keys.toArray
436
+
437
+ // wordIndex: Maps each word to an index, which can retrieve the corresponding
438
+ // vector from wordVectors (see below).
440
439
private val wordIndex : Map [String , Int ] = wordList.zip(0 until model.size).toMap
440
+
441
+ // vectorSize: Dimension of each word's vector.
441
442
private val vectorSize = model.head._2.size
442
443
private val numWords = wordIndex.size
443
444
@@ -447,11 +448,12 @@ class Word2VecModel private[mllib] (
447
448
// wordVecNorms: Array of length numWords, each value being the Euclidean norm
448
449
// of the wordVector.
449
450
private val (wordVectors : Array [Float ], wordVecNorms : Array [Double ]) = {
450
- val wordVectors = wordList.flatMap(word => model.get(word).get).toArray
451
+ val wordVectors = new Array [ Float ](vectorSize * numWords)
451
452
val wordVecNorms = new Array [Double ](numWords)
452
453
var i = 0
453
454
while (i < numWords) {
454
455
val vec = model.get(wordList(i)).get
456
+ Array .copy(vec, 0 , wordVectors, i * vectorSize, vectorSize)
455
457
wordVecNorms(i) = blas.snrm2(vectorSize, vec, 1 )
456
458
i += 1
457
459
}
0 commit comments