@@ -69,76 +69,95 @@ function transpose( N, nrhs, DL, strideDL, offsetDL, D, strideD, offsetD, DU, st
69
69
var ipi ;
70
70
var idu ;
71
71
var idl ;
72
+ var ib1 ;
73
+ var ib2 ;
72
74
var id ;
73
75
var ip ;
74
76
var i ;
75
77
var j ;
76
78
77
79
if ( nrhs <= 1 ) {
80
+ ib1 = offsetB ; // ib1 = offsetB + (j*strideB2);
78
81
for ( j = 0 ; j < nrhs ; j ++ ) {
79
82
ip = offsetIPIV + ( ( N - 2 ) * strideIPIV ) ;
80
83
idl = offsetDL + ( ( N - 2 ) * strideDL ) ;
81
84
id = offsetD ;
82
85
idu = offsetDU ;
83
86
idu2 = offsetDU2 ;
84
87
85
- B [ offsetB + ( strideB2 * j ) ] = B [ offsetB + ( strideB2 * j ) ] / D [ id ] ;
88
+ B [ ib1 ] /= D [ id ] ;
86
89
if ( N > 1 ) {
87
- B [ offsetB + strideB1 + ( strideB2 * j ) ] = ( B [ offsetB + strideB1 + ( strideB2 * j ) ] - ( DU [ idu ] * B [ offsetB + ( strideB2 * j ) ] ) ) / D [ id + strideD ] ;
90
+ B [ ib1 + strideB1 ] = ( B [ ib1 + strideB1 ] - ( DU [ idu ] * B [ ib1 ] ) ) / D [ id + strideD ] ;
88
91
}
89
92
90
93
idu = offsetDU + strideDU ;
91
94
id = offsetD + ( 2 * strideD ) ;
95
+
96
+ ib2 = strideB1 * 2 ;
92
97
for ( i = 2 ; i < N ; i ++ ) {
93
- B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] = ( B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] - ( DU [ idu ] * B [ offsetB + ( strideB1 * ( i - 1 ) ) + ( strideB2 * j ) ] ) - ( DU2 [ idu2 ] * B [ offsetB + ( strideB1 * ( i - 2 ) ) + ( strideB2 * j ) ] ) ) / D [ id ] ;
98
+ B [ ib1 + ib2 ] = ( B [ ib1 + ib2 ] - ( DU [ idu ] * B [ ib1 + ib2 - strideB1 ] ) - ( DU2 [ idu2 ] * B [ ib1 + ib2 - ( 2 * strideB1 ) ] ) ) / D [ id ] ;
94
99
95
100
idu += strideDU ;
96
101
idu2 += strideDU2 ;
97
102
id += strideD ;
103
+ ib2 += strideB1 ;
98
104
}
105
+
106
+ ib2 = strideB1 * ( N - 2 ) ;
99
107
for ( i = N - 2 ; i >= 0 ; i -- ) {
100
108
ipi = IPIV [ ip ] ;
101
- temp = B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] - ( DL [ idl ] * B [ offsetB + ( strideB1 * ( i + 1 ) ) + ( strideB2 * j ) ] ) ;
102
- B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] = B [ offsetB + ( strideB1 * ipi ) + ( strideB2 * j ) ] ;
109
+ temp = B [ ib1 + ib2 ] - ( DL [ idl ] * B [ ib1 + ib2 + strideB1 ] ) ;
110
+ B [ ib1 + ib2 ] = B [ offsetB + ( strideB1 * ipi ) + ( strideB2 * j ) ] ;
103
111
B [ offsetB + ( strideB1 * ipi ) + ( strideB2 * j ) ] = temp ;
104
112
105
113
ip -= strideIPIV ;
106
114
idl -= strideDL ;
115
+ ib2 -= strideB1 ;
107
116
}
117
+
118
+ ib1 += strideB2 ;
108
119
}
109
120
} else {
121
+ ib1 = offsetB ;
110
122
for ( j = 0 ; j < nrhs ; j ++ ) {
111
123
idu = offsetDU ;
112
124
id = offsetD ;
113
125
idu2 = offsetDU2 ;
114
126
ip = offsetIPIV + ( ( N - 2 ) * strideIPIV ) ;
115
- idl = offsetDL + ( ( N - 2 ) * strideDL ) ;
116
127
117
- B [ offsetB + ( strideB2 * j ) ] /= D [ id ] ;
128
+ B [ ib1 ] /= D [ id ] ;
118
129
if ( N > 1 ) {
119
- B [ offsetB + strideB1 + ( strideB2 * j ) ] = ( B [ offsetB + strideB1 + ( strideB2 * j ) ] - ( DU [ idu ] * B [ offsetB + ( strideB2 * j ) ] ) ) / D [ id + strideD ] ;
130
+ B [ ib1 + strideB1 ] = ( B [ ib1 + strideB1 ] - ( DU [ idu ] * B [ ib1 ] ) ) / D [ id + strideD ] ;
120
131
}
121
132
idu = offsetDU + strideDU ;
122
133
id = offsetD + ( 2 * strideD ) ;
134
+ ib2 = 2 * strideB1 ;
123
135
for ( i = 2 ; i < N ; i ++ ) {
124
- B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] = ( B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] - ( DU [ idu ] * B [ offsetB + ( strideB1 * ( i - 1 ) ) + ( strideB2 * j ) ] ) - ( DU2 [ idu2 ] * B [ offsetB + ( strideB1 * ( i - 2 ) ) + ( strideB2 * j ) ] ) ) / D [ id ] ;
136
+ B [ ib1 + ib2 ] = ( B [ ib1 + ib2 ] - ( DU [ idu ] * B [ ib1 + ib2 - strideB1 ] ) - ( DU2 [ idu2 ] * B [ ib1 + ib2 - ( 2 * strideB1 ) ] ) ) / D [ id ] ;
125
137
126
138
idu += strideDU ;
127
139
id += strideD ;
128
140
idu2 += strideDU2 ;
141
+ ib2 += strideB1 ;
129
142
}
143
+
144
+ ib2 = ( N - 2 ) * strideB1 ;
145
+ idl = offsetDL + ( ( N - 2 ) * strideDL ) ;
130
146
for ( i = N - 2 ; i >= 0 ; i -- ) {
131
147
if ( IPIV [ ip ] === i ) {
132
- B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] -= DL [ offsetDL + ( strideDL * i ) ] * B [ offsetB + ( strideB1 * ( i + 1 ) ) + ( strideB2 * j ) ] ;
148
+ B [ ib1 + ib2 ] -= DL [ idl ] * B [ ib1 + ib2 + strideB1 ] ;
133
149
} else {
134
- temp = B [ offsetB + ( strideB1 * ( i + 1 ) ) + ( strideB2 * j ) ] ;
135
- B [ offsetB + ( strideB1 * ( i + 1 ) ) + ( strideB2 * j ) ] = B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] - ( DL [ idl ] * temp ) ;
136
- B [ offsetB + ( strideB1 * i ) + ( strideB2 * j ) ] = temp ;
150
+ temp = B [ ib1 + ib2 + strideB1 ] ;
151
+ B [ ib1 + ib2 + strideB1 ] = B [ ib1 + ib2 ] - ( DL [ idl ] * temp ) ;
152
+ B [ ib1 + ib2 ] = temp ;
137
153
}
138
154
139
155
ip -= strideIPIV ;
140
156
idl -= strideDL ;
157
+ ib2 -= strideB1 ;
141
158
}
159
+
160
+ ib1 += strideB2 ;
142
161
}
143
162
}
144
163
0 commit comments