Skip to content

Commit 4c7aec3

Browse files
author
Li Pu
committed
improve comments
1 parent e7850ed commit 4c7aec3

File tree

2 files changed

+34
-8
lines changed

2 files changed

+34
-8
lines changed

mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,26 @@ object EigenValueDecomposition {
5050

5151
val arpack = ARPACK.getInstance()
5252

53+
// tolerance used in stopping criterion
5354
val tolW = new doubleW(tol)
55+
// number of desired eigenvalues, 0 < nev < n
5456
val nev = new intW(k)
57+
// nev Lanczos vectors are generated are generated in the first iteration
58+
// ncv-nev Lanczos vectors are generated are generated in each subsequent iteration
59+
// ncv must be smaller than n
5560
val ncv = scala.math.min(2 * k, n)
5661

62+
// "I" for standard eigenvalue problem, "G" for generalized eigenvalue problem
5763
val bmat = "I"
64+
// "LM" : compute the NEV largest (in magnitude) eigenvalues
5865
val which = "LM"
5966

6067
var iparam = new Array[Int](11)
68+
// use exact shift in each iteration
6169
iparam(0) = 1
70+
// maximum number of Arnoldi update iterations, or the actual number of iterations on output
6271
iparam(2) = 300
72+
// Mode 1: A*x = lambda*x, A symmetric
6373
iparam(6) = 1
6474

6575
var ido = new intW(0)
@@ -70,44 +80,58 @@ object EigenValueDecomposition {
7080
var workl = new Array[Double](ncv * (ncv + 8))
7181
var ipntr = new Array[Int](11)
7282

73-
// first call to ARPACK
83+
// call ARPACK's reverse communication, first iteration with ido = 0
7484
arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, workd,
7585
workl, workl.length, info)
7686

7787
val w = BDV(workd)
7888

79-
while(ido.`val` != 99) {
89+
// ido = 99 : done flag in reverse communication
90+
while (ido.`val` != 99) {
8091
if (ido.`val` != -1 && ido.`val` != 1) {
81-
throw new IllegalStateException("ARPACK returns ido = " + ido.`val`)
92+
throw new IllegalStateException("ARPACK returns ido = " + ido.`val` +
93+
" This flag is not compatible with Mode 1: A*x = lambda*x, A symmetric.")
8294
}
8395
// multiply working vector with the matrix
8496
val inputOffset = ipntr(0) - 1
8597
val outputOffset = ipntr(1) - 1
8698
val x = w(inputOffset until inputOffset + n)
8799
val y = w(outputOffset until outputOffset + n)
88100
y := BDV(mul(Vectors.fromBreeze(x).asInstanceOf[DenseVector]).toArray)
89-
// call ARPACK
101+
// call ARPACK's reverse communication
90102
arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr,
91103
workd, workl, workl.length, info)
92104
}
93105

94106
if (info.`val` != 0) {
95-
throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val`)
107+
info.`val` match {
108+
case 1 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` +
109+
" Maximum number of iterations taken. (Refer ARPACK user guide for details)")
110+
case 2 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` +
111+
" No shifts could be applied. Try to increase NCV. " +
112+
"(Refer ARPACK user guide for details)")
113+
case _ => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` +
114+
" Please refer ARPACK user guide for error message.")
115+
}
96116
}
97117

98118
val d = new Array[Double](nev.`val`)
99119
val select = new Array[Boolean](ncv)
120+
// copy the Ritz vectors
100121
val z = java.util.Arrays.copyOfRange(v, 0, nev.`val` * n)
101122

123+
// call ARPACK's post-processing for eigenvectors
102124
arpack.dseupd(true, "A", select, d, z, n, 0.0, bmat, n, which, nev, tol, resid, ncv, v, n,
103125
iparam, ipntr, workd, workl, workl.length, info)
104126

127+
// number of computed eigenvalues, might be smaller than k
105128
val computed = iparam(4)
106129

107130
val eigenPairs = java.util.Arrays.copyOfRange(d, 0, computed).zipWithIndex.map{
108131
r => (r._1, java.util.Arrays.copyOfRange(z, r._2 * n, r._2 * n + n))
109132
}
110133

134+
// sort the eigen-pairs in descending order
111135
val sortedEigenPairs = eigenPairs.sortBy(-1 * _._1)
112136

113137
// copy eigenvectors in descending order of eigenvalues

mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,9 @@ class RowMatrix(
259259
* Then we compute U via easy matrix multiplication as U = A * (V * S^{-1}).
260260
* Note that this approach requires `O(nnz(A))` time.
261261
*
262-
* When the requested eigenvalues k = n, a non-sparse implementation will be used, which requires
263-
* `n^2` doubles to fit in memory and `O(n^3)` time on the master node.
262+
* ARPACK requires k to be strictly less than n. Thus when the requested eigenvalues k = n, a
263+
* non-sparse implementation will be used, which requires `n^2` doubles to fit in memory and
264+
* `O(n^3)` time on the master node.
264265
*
265266
* At most k largest non-zero singular values and associated vectors are returned.
266267
* If there are k such values, then the dimensions of the return will be:
@@ -274,7 +275,8 @@ class RowMatrix(
274275
* @param computeU whether to compute U
275276
* @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
276277
* are treated as zero, where sigma(0) is the largest singular value.
277-
* @param tol the tolerance of the svd computation.
278+
* @param tol the numerical tolerance of svd computation. Larger tolerance means fewer iterations,
279+
* but less accurate result.
278280
* @return SingularValueDecomposition(U, s, V)
279281
*/
280282
def computeSVD(

0 commit comments

Comments
 (0)