|
66 | 66 | function transpose( N, nrhs, DL, strideDL, offsetDL, D, strideD, offsetD, DU, strideDU, offsetDU, DU2, strideDU2, offsetDU2, IPIV, strideIPIV, offsetIPIV, B, strideB1, strideB2, offsetB ) {
|
67 | 67 | var idu2;
|
68 | 68 | var temp;
|
69 |
| - var ipi; |
70 | 69 | var idu;
|
71 | 70 | var idl;
|
72 | 71 | var ib1;
|
73 | 72 | var ib2;
|
| 73 | + var ib3; |
74 | 74 | var id;
|
75 | 75 | var ip;
|
76 | 76 | var i;
|
77 | 77 | var j;
|
78 | 78 |
|
79 | 79 | if ( nrhs <= 1 ) {
|
80 |
| - ib1 = offsetB; // ib1 = offsetB + (j*strideB2); |
| 80 | + ib1 = offsetB; |
81 | 81 | for ( j = 0; j < nrhs; j++) {
|
82 | 82 | ip = offsetIPIV + ( (N-2) * strideIPIV );
|
83 | 83 | idl = offsetDL + ( (N-2) * strideDL );
|
@@ -105,10 +105,10 @@ function transpose( N, nrhs, DL, strideDL, offsetDL, D, strideD, offsetD, DU, st
|
105 | 105 |
|
106 | 106 | ib2 = strideB1 * ( N-2 );
|
107 | 107 | for ( i = N - 2; i >= 0; i-- ) {
|
108 |
| - ipi = IPIV[ ip ]; |
| 108 | + ib3 = offsetB + ( strideB1 * IPIV[ ip ] ) + ( strideB2 * j ); |
109 | 109 | temp = B[ ib1 + ib2 ] - ( DL[ idl ] * B[ ib1 + ib2 + strideB1 ] );
|
110 |
| - B[ ib1 + ib2 ] = B[ offsetB + (strideB1*ipi) + (strideB2*j) ]; |
111 |
| - B[ offsetB + (strideB1*ipi) + (strideB2*j) ] = temp; |
| 110 | + B[ ib1 + ib2 ] = B[ ib3 ]; |
| 111 | + B[ ib3 ] = temp; |
112 | 112 |
|
113 | 113 | ip -= strideIPIV;
|
114 | 114 | idl -= strideDL;
|
@@ -211,79 +211,93 @@ function noTranspose( N, nrhs, DL, strideDL, offsetDL, D, strideD, offsetD, DU,
|
211 | 211 | var ipi;
|
212 | 212 | var idl;
|
213 | 213 | var idu;
|
| 214 | + var ib3; |
| 215 | + var ib1; |
| 216 | + var ib2; |
214 | 217 | var ip;
|
215 | 218 | var id;
|
216 | 219 | var j;
|
217 | 220 | var i;
|
218 | 221 |
|
219 | 222 | if ( nrhs <= 1 ) {
|
| 223 | + ib1 = offsetB; |
220 | 224 | for ( j = 0; j < nrhs; j++ ) {
|
221 | 225 | ip = offsetIPIV;
|
222 | 226 | idl = offsetDL;
|
223 | 227 | id = offsetD;
|
224 |
| - idu = offsetDU; |
225 | 228 | idu2 = offsetDU2;
|
226 | 229 |
|
| 230 | + ib2 = 0; |
227 | 231 | for ( i = 0; i < N - 1; i++ ) {
|
228 | 232 | ipi = IPIV[ ip ];
|
229 |
| - temp = B[ offsetB + (strideB1*(i+1-ipi+i)) + (strideB2*j) ] - ( DL[ idl ] * B[ offsetB + (strideB1*ipi) + (strideB2*j) ] ); |
230 |
| - B[ offsetB + (strideB1*i) + (strideB2*j) ] = B[ offsetB + (strideB1*ipi) + (strideB2*j) ]; |
231 |
| - B[ offsetB + (strideB1*(i+1)) + (strideB2*j) ] = temp; |
| 233 | + ib3 = offsetB + (strideB1*ipi) + (strideB2*j); |
| 234 | + temp = B[ offsetB + (strideB1*(i+1-ipi+i)) + (strideB2*j) ] - ( DL[ idl ] * B[ ib3 ] ); |
| 235 | + B[ ib1 + ib2 ] = B[ ib3 ]; |
| 236 | + B[ ib1 + ib2 + strideB1 ] = temp; |
232 | 237 |
|
233 | 238 | ip += strideIPIV;
|
234 | 239 | idl += strideDL;
|
| 240 | + ib2 += strideB1; |
235 | 241 | }
|
236 | 242 |
|
237 |
| - B[ offsetB + (strideB1*(N-1)) + (strideB2*j) ] /= D[ offsetD + (strideD*(N-1)) ]; |
| 243 | + B[ ib1 + ib2 ] /= D[ offsetD + (strideD*(N-1)) ]; |
238 | 244 |
|
| 245 | + idu = offsetDU + ( (N-3) * strideDU ); |
| 246 | + id = offsetD + ( (N-3) * strideD ); |
239 | 247 | if ( N > 1 ) {
|
240 |
| - B[ offsetB + (strideB1*(N-2)) + (strideB2*j) ] = ( B[ offsetB + (strideB1*(N-2)) + (strideB2*j) ] - ( DU[ offsetDU + (strideDU*(N-2)) ] * B[ offsetB + (strideB1*(N-1)) + (strideB2*j) ] ) ) / D[ offsetD + (strideD*(N-2)) ]; |
| 248 | + B[ ib1 + ib2 - strideB1 ] = ( B[ ib1 + ib2 - strideB1 ] - ( DU[ idu + strideDU ] * B[ ib1 + ib2 ] ) ) / D[ id + strideD ]; |
241 | 249 | }
|
242 | 250 |
|
243 |
| - idu = offsetDU + ( (N-3) * strideDU ); |
244 | 251 | idu2 = offsetDU2 + ( (N-3) * strideDU2 );
|
245 |
| - id = offsetD + ( (N-3) * strideD ); |
| 252 | + ib2 = ( N - 3 ) * strideB1; |
246 | 253 | for ( i = N - 3; i >= 0; i-- ) {
|
247 |
| - B[ offsetB + (strideB1*i) + (strideB2*j) ] = ( B[ offsetB + (strideB1*i) + (strideB2*j) ] - ( DU[ idu ] * B[ offsetB + (strideB1*(i+1)) + (strideB2*j) ] ) - ( DU2[ idu2 ] * B[ offsetB + (strideB1*(i+2)) + (strideB2*j) ] ) ) / D[ id ]; |
| 254 | + B[ ib1 + ib2 ] = ( B[ ib1 + ib2 ] - ( DU[ idu ] * B[ ib1 + ib2 + strideB1 ] ) - ( DU2[ idu2 ] * B[ ib1 + ib2 + ( 2 * strideB1 ) ] ) ) / D[ id ]; |
248 | 255 |
|
249 | 256 | idu -= strideDU;
|
250 | 257 | idu2 -= strideDU2;
|
251 | 258 | id -= strideD;
|
| 259 | + ib2 -= strideB1; |
252 | 260 | }
|
| 261 | + ib1 += strideB2; |
253 | 262 | }
|
254 | 263 | } else {
|
| 264 | + ib1 = offsetB; |
255 | 265 | for ( j = 0; j < nrhs; j++ ) {
|
256 | 266 | ip = offsetIPIV;
|
257 | 267 | idl = offsetDL;
|
258 |
| - |
| 268 | + ib2 = 0; |
259 | 269 | for ( i = 0; i < N - 1; i++ ) {
|
260 | 270 | if ( IPIV[ ip ] === i ) {
|
261 |
| - B[ offsetB + (strideB1*(i+1)) + (strideB2*j) ] -= DL[ idl ] * B[ offsetB + (strideB1*i) + (strideB2*j) ]; |
| 271 | + B[ ib1 + ib2 + strideB1 ] -= DL[ idl ] * B[ ib1 + ib2 ]; |
262 | 272 | } else {
|
263 |
| - temp = B[ offsetB + (strideB1*i) + (strideB2*j) ]; |
264 |
| - B[ offsetB + (strideB1*i) + (strideB2*j) ] = B[ offsetB + (strideB1*(i+1)) + (strideB2*j) ]; |
265 |
| - B[ offsetB + (strideB1*(i+1)) + (strideB2*j) ] = temp - ( DL[ idl ] * B[ offsetB + (strideB1*i) + (strideB2*j) ] ); |
| 273 | + temp = B[ ib1 + ib2 ]; |
| 274 | + B[ ib1 + ib2 ] = B[ ib1 + ib2 + strideB1 ]; |
| 275 | + B[ ib1 + ib2 + strideB1 ] = temp - ( DL[ idl ] * B[ ib1 + ib2 ] ); |
266 | 276 | }
|
267 | 277 |
|
268 | 278 | ip += strideIPIV;
|
269 | 279 | idl += strideDL;
|
| 280 | + ib2 += strideB1; |
270 | 281 | }
|
271 |
| - B[ offsetB + (strideB1*(N-1)) + (strideB2*j) ] /= D[ offsetD + (strideD*(N-1)) ]; |
| 282 | + B[ ib1 + ib2 ] /= D[ offsetD + (strideD*(N-1)) ]; |
272 | 283 |
|
| 284 | + idu = offsetDU + ( (N-3) * strideDU ); |
| 285 | + id = offsetD + ( (N-3) * strideD ); |
273 | 286 | if ( N > 1 ) {
|
274 |
| - B[ offsetB + (strideB1*(N-2)) + (strideB2 * j) ] = ( B[ offsetB + (strideB1*(N-2)) + (strideB2*j) ] - ( DU[ offsetDU + (strideDU*(N-2)) ] * B[ offsetB + (strideB1*(N-1)) + (strideB2*j) ] ) ) / D[ offsetD + (strideD*(N-2)) ]; |
| 287 | + B[ ib1 + ib2 - strideB1 ] = ( B[ ib1 + ib2 - strideB1 ] - ( DU[ idu + strideDU ] * B[ ib1 + ib2 ] ) ) / D[ id + strideD ]; |
275 | 288 | }
|
276 | 289 |
|
277 |
| - idu = offsetDU + ( (N-3) * strideDU ); |
278 | 290 | idu2 = offsetDU2 + ( (N-3) * strideDU2 );
|
279 |
| - id = offsetD + ( (N-3) * strideD ); |
| 291 | + ib2 = ( N - 3 ) * strideB1; |
280 | 292 | for ( i = N - 3; i >= 0; i-- ) {
|
281 |
| - B[ offsetB + (strideB1*i) + (strideB2*j) ] = ( B[ offsetB + (strideB1*i) + (strideB2*j) ] - ( DU[ idu ] * B[ offsetB + (strideB1*(i+1)) + (strideB2*j) ] ) - ( DU2[ idu2 ] * B[ offsetB + (strideB1*(i+2)) + (strideB2*j) ] ) ) / D[ id ]; |
| 293 | + B[ ib1 + ib2 ] = ( B[ ib1 + ib2 ] - ( DU[ idu ] * B[ ib1 + ib2 + strideB1 ] ) - ( DU2[ idu2 ] * B[ ib1 + ib2 + ( 2 * strideB1 ) ] ) ) / D[ id ]; |
282 | 294 |
|
283 | 295 | idu -= strideDU;
|
284 | 296 | idu2 -= strideDU2;
|
285 | 297 | id -= strideD;
|
286 | 298 | }
|
| 299 | + |
| 300 | + ib1 += strideB2; |
287 | 301 | }
|
288 | 302 | }
|
289 | 303 |
|
|
0 commit comments