|
29 | 29 | #endif
|
30 | 30 | /* -------------------- */
|
31 | 31 |
|
| 32 | +/* |
| 33 | + * build_ppo_triples: build (p0, p1, o) triples for dyson evaluation. |
| 34 | + */ |
| 35 | +int build_ppo_triples(int **ppo, struct occstr *str0, int nstr0, |
| 36 | + struct occstr *str1, int nstr1, int ninto) |
| 37 | +{ |
| 38 | + int nppo = 0; |
| 39 | + int oindex = 0; |
| 40 | + int i, j; |
| 41 | + for (i = 0; i < nstr0; i++) { |
| 42 | + for (j = 0; j < nstr1; j++) { |
| 43 | + |
| 44 | + oindex = comparestrings_dyson(str0[i], str1[j], ninto); |
| 45 | + if (oindex == 0) continue; |
| 46 | + ppo[nppo][0] = i; |
| 47 | + ppo[nppo][1] = j; |
| 48 | + ppo[nppo][2] = oindex; |
| 49 | + nppo++; |
| 50 | + } |
| 51 | + } |
| 52 | + return nppo; |
| 53 | +} |
| 54 | + |
32 | 55 | /*
|
33 | 56 | * comparestrings_dyson: compare the strings of N and N+1 determinants
|
34 | 57 | * to get the index of orbital contribution.
|
@@ -100,118 +123,135 @@ void compute_dyson_orbital(int w0_hndl, int dlen0, int w1_hndl, int dlen1,
|
100 | 123 | int norbs, int *dysnst0, int ndyst0, int *dysnst1,
|
101 | 124 | int ndyst1, int ndyorbs, double **dyorb)
|
102 | 125 | {
|
103 |
| -#define BUFSIZE 1000 |
104 |
| - int buflen = 0; /* Buffer length */ |
105 |
| - |
106 |
| - double **v0buff = NULL; /* N+1 electron ci buffer */ |
107 |
| - double *v0data = NULL; |
108 |
| - double **v1buff = NULL; /* N electron ci buffer */ |
109 |
| - double *v1data = NULL; |
110 |
| - int **w0buff = NULL; /* N+1 electron wf buffer */ |
111 |
| - int *w0data = NULL; |
112 |
| - int **w1buff = NULL; /* N electron wf buffer */ |
113 |
| - int *w1data = NULL; |
| 126 | +#define BUFSIZE 1000000 |
| 127 | +#define MAXSIZE 1000000 |
| 128 | + |
| 129 | + int buflen = 0; /* Buffer length */ |
| 130 | + |
| 131 | + double **v0buff = NULL; /* N+1 electron ci buffer */ |
| 132 | + double *v0data = NULL; |
| 133 | + double **v1buff = NULL; /* N electron ci buffer */ |
| 134 | + double *v1data = NULL; |
| 135 | + int **w0buff = NULL; /* N+1 electron wf buffer */ |
| 136 | + int *w0data = NULL; |
| 137 | + int **w1buff = NULL; /* N electron wf buffer */ |
| 138 | + int *w1data = NULL; |
| 139 | + |
| 140 | + int **ppo = NULL, *ppodata = NULL; /* (p0, p1, o-index) */ |
| 141 | + int nppo = 0; /* Number of ppo triples */ |
| 142 | + |
| 143 | + int v0_lo[2] = {0, 0}; |
| 144 | + int v0_hi[2] = {0, 0}; |
| 145 | + int v0_ld[1] = {0}; |
| 146 | + int v0_rows = 0; |
| 147 | + int v0_cols = 0; /* Rows and columns of v0 buffer */ |
| 148 | + int v1_lo[2] = {0, 0}; |
| 149 | + int v1_hi[2] = {0, 0}; |
| 150 | + int v1_ld[1] = {0}; |
| 151 | + int v1_rows = 0; |
| 152 | + int v1_cols = 0; |
| 153 | + |
| 154 | + int w0_lo[2] = {0, 0}; |
| 155 | + int w0_hi[2] = {0, 0}; |
| 156 | + int w0_ld[1] = {0}; |
| 157 | + int w1_lo[2] = {0, 0}; |
| 158 | + int w1_hi[2] = {0, 0}; |
| 159 | + int w1_ld[1] = {0}; |
| 160 | + int w1len = 0; |
| 161 | + |
| 162 | + int i0 = 0, i1 = 0; |
| 163 | + int i1max = 0; |
| 164 | + |
| 165 | + int ninto = 0; /* Number of internal orbitals */ |
114 | 166 |
|
115 |
| - int v0_lo[2] = {0, 0}; |
116 |
| - int v0_hi[2] = {0, 0}; |
117 |
| - int v0_ld[1] = {0}; |
118 |
| - int v0_rows = 0; |
119 |
| - int v0_cols = 0; /* Rows and columns of v0 buffer */ |
120 |
| - int v1_lo[2] = {0, 0}; |
121 |
| - int v1_hi[2] = {0, 0}; |
122 |
| - int v1_ld[1] = {0}; |
123 |
| - int v1_rows = 0; |
124 |
| - int v1_cols = 0; |
125 |
| - |
126 |
| - int w0_lo[2] = {0, 0}; |
127 |
| - int w0_hi[2] = {0, 0}; |
128 |
| - int w0_ld[1] = {0}; |
129 |
| - int w1_lo[2] = {0, 0}; |
130 |
| - int w1_hi[2] = {0, 0}; |
131 |
| - int w1_ld[1] = {0}; |
132 |
| - int w1len = 0; |
| 167 | + int nstr0 = 106420; // HARDCODE |
| 168 | + int nstr1 = 98218; // HARDCODE |
| 169 | + |
| 170 | + ninto = int_max(ninto0, ninto1); |
| 171 | + if (mpi_proc_rank == mpi_root) { |
| 172 | + printf(" Internal orbitals: %d\n", ninto); |
| 173 | + fflush(stdout); |
| 174 | + } |
| 175 | + |
| 176 | + /* Set up (p0, p1, o-index) triples */ |
| 177 | + if (mpi_proc_rank == mpi_rot) { |
| 178 | + printf(" Building ppo triples...\n"); |
| 179 | + } |
| 180 | + ppodata = allocate_mem_int_cont(&ppo, 3, MAXSIZE); |
| 181 | + nppo = build_ppo_triples(ppo, str0, nstr0, str1, nstr1, ninto); |
| 182 | + |
| 183 | + |
| 184 | + /* Determine which blocks of data are locally owned for the N+1 |
| 185 | + * electron vector. This will be the "static" buffer for which |
| 186 | + * we compute the dyson orbital contributions. */ |
| 187 | + NGA_Distribution(v0_hndl, mpi_proc_rank, v0_lo, v0_hi); |
| 188 | + v0_cols = v0_hi[0] - v0_lo[0] + 1; |
| 189 | + v0_rows = v0_hi[1] - v0_lo[1] + 1; |
| 190 | + v0_ld[0]= v0_rows; |
| 191 | + /* Allocate local array */ |
| 192 | + v0data = allocate_mem_double_cont(&v0buff, v0_rows, v0_cols); |
| 193 | + NGA_Get(v0_hndl, v0_lo, v0_hi, v0data, v0_ld); |
| 194 | + /* Allocate corresponding W0 array and get W0 data */ |
| 195 | + w0data = allocate_mem_int_cont(&w0buff, 3, v0_rows); |
| 196 | + w0_lo[0] = v0_lo[1]; |
| 197 | + w0_lo[1] = 0; |
| 198 | + w0_hi[0] = v0_hi[1]; |
| 199 | + w0_hi[1] = 2; |
| 200 | + w0_ld[0] = 3; |
| 201 | + NGA_Get(w0_hndl, w0_lo, w0_hi, w0data, w0_ld); |
| 202 | + |
| 203 | + buflen = BUFSIZE; /* Set buffer length */ |
| 204 | + |
| 205 | + /* Get state number for v1 by getting number of columns |
| 206 | + * on this process. (Array is distributed by row.) */ |
| 207 | + NGA_Distribution(v1_hndl, mpi_proc_rank, v1_lo, v1_hi); |
| 208 | + v1_cols = v1_hi[0] - v1_lo[0] + 1; |
| 209 | + /* Set values for v1_lo and v1_hi */ |
| 210 | + v1_lo[0] = 0; |
| 211 | + v1_lo[1] = 0; |
| 212 | + v1_hi[0] = v1_cols - 1; |
| 213 | + v1_hi[1] = dlen1 - 1; |
| 214 | + v1_rows = buflen; |
| 215 | + v1_ld[0] = v1_rows; |
| 216 | + /* Allocate local array buffer (buflen x v1_cols) */ |
| 217 | + v1data = allocate_mem_double_cont(&v1buff, v1_rows, v1_cols); |
| 218 | + |
| 219 | + /* Allocate W1 array */ |
| 220 | + w1data = allocate_mem_int_cont(&w1buff, 3, v1_rows); |
| 221 | + w1_lo[1] = 0; |
| 222 | + w1_hi[1] = 2; |
| 223 | + w1_ld[0] = 3; |
| 224 | + |
| 225 | + GA_Sync(); |
| 226 | + |
| 227 | + for (i1 = 0; i1 < dlen1; i1 += buflen) { |
| 228 | + |
| 229 | + i1max = int_min((i1 + buflen - 1), (dlen1 - 1)); |
133 | 230 |
|
134 |
| - int i0 = 0, i1 = 0; |
135 |
| - int i1max = 0; |
| 231 | + /* Get patch of V1 vectors and corresponding W1 info */ |
| 232 | + v1_lo[1] = i1; |
| 233 | + v1_hi[1] = i1max; |
| 234 | + v1_rows = v1_hi[1] - v1_lo[1] + 1; |
| 235 | + NGA_Get(v1_hndl, v1_lo, v1_hi, v1data, v1_ld); |
| 236 | + w1_lo[0] = i1; |
| 237 | + w1_hi[0] = i1max; |
| 238 | + //w1len = i1max - i1 + 1; |
| 239 | + NGA_Get(w1_hndl, w1_lo, w1_hi, w1data, w1_ld); |
136 | 240 |
|
137 |
| - int ninto = 0; /* Number of internal orbitals */ |
138 |
| - |
139 |
| - ninto = int_max(ninto0, ninto1); |
140 |
| - if (mpi_proc_rank == mpi_root) { |
141 |
| - printf(" Internal orbitals: %d\n", ninto); |
142 |
| - } |
143 |
| - |
144 |
| - /* Determine which blocks of data are locally owned for the N+1 |
145 |
| - * electron vector. This will be the "static" buffer for which |
146 |
| - * we compute the dyson orbital contributions. */ |
147 |
| - NGA_Distribution(v0_hndl, mpi_proc_rank, v0_lo, v0_hi); |
148 |
| - v0_cols = v0_hi[0] - v0_lo[0] + 1; |
149 |
| - v0_rows = v0_hi[1] - v0_lo[1] + 1; |
150 |
| - v0_ld[0]= v0_rows; |
151 |
| - /* Allocate local array */ |
152 |
| - v0data = allocate_mem_double_cont(&v0buff, v0_rows, v0_cols); |
153 |
| - NGA_Get(v0_hndl, v0_lo, v0_hi, v0data, v0_ld); |
154 |
| - /* Allocate corresponding W0 array and get W0 data */ |
155 |
| - w0data = allocate_mem_int_cont(&w0buff, 3, v0_rows); |
156 |
| - w0_lo[0] = v0_lo[1]; |
157 |
| - w0_lo[1] = 0; |
158 |
| - w0_hi[0] = v0_hi[1]; |
159 |
| - w0_hi[1] = 2; |
160 |
| - w0_ld[0] = 3; |
161 |
| - NGA_Get(w0_hndl, w0_lo, w0_hi, w0data, w0_ld); |
162 |
| - |
163 |
| - buflen = BUFSIZE; /* Set buffer length */ |
164 |
| - |
165 |
| - /* Get state number for v1 by getting number of columns |
166 |
| - * on this process. (Array is distributed by row.) */ |
167 |
| - NGA_Distribution(v1_hndl, mpi_proc_rank, v1_lo, v1_hi); |
168 |
| - v1_cols = v1_hi[0] - v1_lo[0] + 1; |
169 |
| - /* Set values for v1_lo and v1_hi */ |
170 |
| - v1_lo[0] = 0; |
171 |
| - v1_lo[1] = 0; |
172 |
| - v1_hi[0] = v1_cols - 1; |
173 |
| - v1_hi[1] = dlen1 - 1; |
174 |
| - v1_rows = buflen; |
175 |
| - v1_ld[0] = v1_rows; |
176 |
| - /* Allocate local array buffer (buflen x v1_cols) */ |
177 |
| - v1data = allocate_mem_double_cont(&v1buff, v1_rows, v1_cols); |
178 |
| - |
179 |
| - /* Allocate W1 array */ |
180 |
| - w1data = allocate_mem_int_cont(&w1buff, 3, v1_rows); |
181 |
| - w1_lo[1] = 0; |
182 |
| - w1_hi[1] = 2; |
183 |
| - w1_ld[0] = 3; |
184 |
| - |
185 |
| - GA_Sync(); |
186 |
| - |
187 |
| - for (i1 = 0; i1 < dlen1; i1 += buflen) { |
188 |
| - |
189 |
| - i1max = int_min((i1 + buflen - 1), (dlen1 - 1)); |
190 |
| - |
191 |
| - /* Get patch of V1 vectors and corresponding W1 info */ |
192 |
| - v1_lo[1] = i1; |
193 |
| - v1_hi[1] = i1max; |
194 |
| - v1_rows = v1_hi[1] - v1_lo[1] + 1; |
195 |
| - NGA_Get(v1_hndl, v1_lo, v1_hi, v1data, v1_ld); |
196 |
| - w1_lo[0] = i1; |
197 |
| - w1_hi[0] = i1max; |
198 |
| - //w1len = i1max - i1 + 1; |
199 |
| - NGA_Get(w1_hndl, w1_lo, w1_hi, w1data, w1_ld); |
200 |
| - |
201 |
| - compute_det_contributions(w0buff, v0buff, v0_rows, |
202 |
| - v0_cols, w1buff, v1buff, |
203 |
| - v1_rows, v1_cols, spindx1, spindx2, |
204 |
| - str0, str1, dysnst0, ndyst0, |
205 |
| - dysnst1, ndyst1, dyorb, ninto); |
206 |
| - |
207 |
| - } |
208 |
| - |
209 |
| - GA_Sync(); |
210 |
| - deallocate_mem_cont_int(&w0buff, w0data); |
211 |
| - deallocate_mem_cont_int(&w1buff, w1data); |
212 |
| - deallocate_mem_cont(&v0buff, v0data); |
213 |
| - deallocate_mem_cont(&v1buff, v1data); |
214 |
| - return; |
| 241 | + compute_det_contributions(w0buff, v0buff, v0_rows, |
| 242 | + v0_cols, w1buff, v1buff, |
| 243 | + v1_rows, v1_cols, spindx1, spindx2, |
| 244 | + str0, str1, dysnst0, ndyst0, |
| 245 | + dysnst1, ndyst1, dyorb, ninto); |
| 246 | + |
| 247 | + } |
| 248 | + |
| 249 | + GA_Sync(); |
| 250 | + deallocate_mem_cont_int(&w0buff, w0data); |
| 251 | + deallocate_mem_cont_int(&w1buff, w1data); |
| 252 | + deallocate_mem_cont(&v0buff, v0data); |
| 253 | + deallocate_mem_cont(&v1buff, v1data); |
| 254 | + return; |
215 | 255 | }
|
216 | 256 |
|
217 | 257 | /*
|
@@ -245,37 +285,47 @@ void compute_det_contributions(int **w0, double **v0, int v0_rows, int v0_cols,
|
245 | 285 | int *dyst0, int ndyst0, int *dyst1, int ndyst1,
|
246 | 286 | double **dyorb, int ninto)
|
247 | 287 | {
|
248 |
| - int i = 0, j = 0; |
249 |
| - int k = 0, l = 0; |
250 |
| - int dyind = 0; /* dyson orbital index */ |
251 |
| - int ndiff = 0; /* number of differences */ |
252 |
| - int orbindx = 0; /* orbital index of contribution */ |
253 |
| - |
254 |
| - /* Loop over determinants. N electrons must be in the same slots for |
255 |
| - * both determinants. The N+1 electron, in a unique slot, is the MO |
256 |
| - * index to which the matrix element contributes to the dyson orbital. */ |
257 |
| - for (i = 0; i < v0_rows; i++) { |
258 |
| - for (j = 0; j < v1_rows; j++) { |
259 |
| - /* If the other spin strings are different, skip. */ |
260 |
| - if (w0[i][spx] != w1[j][spx]) continue; |
261 |
| - orbindx = comparestrings_dyson(str0[w0[i][sp]], |
262 |
| - str1[w1[j][sp]], ninto); |
263 |
| - if (orbindx == 0) continue; |
264 |
| - |
265 |
| - /* Compute contributions to orbital (orbindx) for all |
266 |
| - * CI vectors involved in dyson orbitals */ |
267 |
| - dyind = 0; |
268 |
| - for (k = 0; k < ndyst0; k++) { |
269 |
| - for (l = 0; l < ndyst1; l++) { |
270 |
| - dyorb[dyind][(orbindx - 1)] = |
271 |
| - dyorb[dyind][(orbindx - 1)] + |
272 |
| - v0[dyst0[k]][i]* |
273 |
| - v1[dyst1[l]][j]; |
274 |
| - dyind++; |
275 |
| - } |
276 |
| - } |
277 |
| - } |
| 288 | + int i = 0, j = 0; |
| 289 | + int k = 0, l = 0; |
| 290 | + int dyind = 0; /* dyson orbital index */ |
| 291 | + int ndiff = 0; /* number of differences */ |
| 292 | + int orbindx = 0; /* orbital index of contribution */ |
| 293 | + |
| 294 | + /* Loop over determinants. N electrons must be in the same slots for |
| 295 | + * both determinants. The N+1 electron, in a unique slot, is the MO |
| 296 | + * index to which the matrix element contributes to the dyson orbital. */ |
| 297 | + |
| 298 | +#pragma omp parallel \ |
| 299 | + default(none) \ |
| 300 | + shared(w0,v0,v0_rows,v0_cols, \ |
| 301 | + w1,v1,v1_rows,v1_cols, \ |
| 302 | + sp, spx, str0, str1, dyst0, ndyst0, dyst1, ndyst1, \ |
| 303 | + dyorb,ninto) \ |
| 304 | + private(i, j, orbindx, ndiff, dyind, k, l) |
| 305 | + { |
| 306 | +#pragma omp for schedule(runtime) |
| 307 | + for (i = 0; i < v0_rows; i++) { |
| 308 | + for (j = 0; j < v1_rows; j++) { |
| 309 | + /* If the other spin strings are different, skip. */ |
| 310 | + if (w0[i][spx] != w1[j][spx]) continue; |
| 311 | + orbindx = comparestrings_dyson(str0[w0[i][sp]], |
| 312 | + str1[w1[j][sp]], ninto); |
| 313 | + if (orbindx == 0) continue; |
| 314 | + |
| 315 | + /* Compute contributions to orbital (orbindx) for all |
| 316 | + * CI vectors involved in dyson orbitals */ |
| 317 | + dyind = 0; |
| 318 | + for (k = 0; k < ndyst0; k++) { |
| 319 | + for (l = 0; l < ndyst1; l++) { |
| 320 | + dyorb[dyind][(orbindx - 1)] = |
| 321 | + dyorb[dyind][(orbindx - 1)] + |
| 322 | + v0[dyst0[k]][i]* |
| 323 | + v1[dyst1[l]][j]; |
| 324 | + dyind++; |
| 325 | + } |
| 326 | + } |
| 327 | + } |
278 | 328 | }
|
279 |
| - |
280 |
| - return; |
281 |
| -}; |
| 329 | + } |
| 330 | + return; |
| 331 | +} |
0 commit comments