Skip to content

Commit a6d9b75

Browse files
committed
Changing dyson orbital algo
1 parent 6bd6bbd commit a6d9b75

File tree

3 files changed

+210
-145
lines changed

3 files changed

+210
-145
lines changed

mpi_source/dysoncomp.c

+192-142
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,29 @@
2929
#endif
3030
/* -------------------- */
3131

32+
/*
33+
* build_ppo_triples: build (p0, p1, o) triples for dyson evaluation.
34+
*/
35+
int build_ppo_triples(int **ppo, struct occstr *str0, int nstr0,
36+
struct occstr *str1, int nstr1, int ninto)
37+
{
38+
int nppo = 0;
39+
int oindex = 0;
40+
int i, j;
41+
for (i = 0; i < nstr0; i++) {
42+
for (j = 0; j < nstr1; j++) {
43+
44+
oindex = comparestrings_dyson(str0[i], str1[j], ninto);
45+
if (oindex == 0) continue;
46+
ppo[nppo][0] = i;
47+
ppo[nppo][1] = j;
48+
ppo[nppo][2] = oindex;
49+
nppo++;
50+
}
51+
}
52+
return nppo;
53+
}
54+
3255
/*
3356
* comparestrings_dyson: compare the strings of N and N+1 determinants
3457
* to get the index of orbital contribution.
@@ -100,118 +123,135 @@ void compute_dyson_orbital(int w0_hndl, int dlen0, int w1_hndl, int dlen1,
100123
int norbs, int *dysnst0, int ndyst0, int *dysnst1,
101124
int ndyst1, int ndyorbs, double **dyorb)
102125
{
103-
#define BUFSIZE 1000
104-
int buflen = 0; /* Buffer length */
105-
106-
double **v0buff = NULL; /* N+1 electron ci buffer */
107-
double *v0data = NULL;
108-
double **v1buff = NULL; /* N electron ci buffer */
109-
double *v1data = NULL;
110-
int **w0buff = NULL; /* N+1 electron wf buffer */
111-
int *w0data = NULL;
112-
int **w1buff = NULL; /* N electron wf buffer */
113-
int *w1data = NULL;
126+
#define BUFSIZE 1000000
127+
#define MAXSIZE 1000000
128+
129+
int buflen = 0; /* Buffer length */
130+
131+
double **v0buff = NULL; /* N+1 electron ci buffer */
132+
double *v0data = NULL;
133+
double **v1buff = NULL; /* N electron ci buffer */
134+
double *v1data = NULL;
135+
int **w0buff = NULL; /* N+1 electron wf buffer */
136+
int *w0data = NULL;
137+
int **w1buff = NULL; /* N electron wf buffer */
138+
int *w1data = NULL;
139+
140+
int **ppo = NULL, *ppodata = NULL; /* (p0, p1, o-index) */
141+
int nppo = 0; /* Number of ppo triples */
142+
143+
int v0_lo[2] = {0, 0};
144+
int v0_hi[2] = {0, 0};
145+
int v0_ld[1] = {0};
146+
int v0_rows = 0;
147+
int v0_cols = 0; /* Rows and columns of v0 buffer */
148+
int v1_lo[2] = {0, 0};
149+
int v1_hi[2] = {0, 0};
150+
int v1_ld[1] = {0};
151+
int v1_rows = 0;
152+
int v1_cols = 0;
153+
154+
int w0_lo[2] = {0, 0};
155+
int w0_hi[2] = {0, 0};
156+
int w0_ld[1] = {0};
157+
int w1_lo[2] = {0, 0};
158+
int w1_hi[2] = {0, 0};
159+
int w1_ld[1] = {0};
160+
int w1len = 0;
161+
162+
int i0 = 0, i1 = 0;
163+
int i1max = 0;
164+
165+
int ninto = 0; /* Number of internal orbitals */
114166

115-
int v0_lo[2] = {0, 0};
116-
int v0_hi[2] = {0, 0};
117-
int v0_ld[1] = {0};
118-
int v0_rows = 0;
119-
int v0_cols = 0; /* Rows and columns of v0 buffer */
120-
int v1_lo[2] = {0, 0};
121-
int v1_hi[2] = {0, 0};
122-
int v1_ld[1] = {0};
123-
int v1_rows = 0;
124-
int v1_cols = 0;
125-
126-
int w0_lo[2] = {0, 0};
127-
int w0_hi[2] = {0, 0};
128-
int w0_ld[1] = {0};
129-
int w1_lo[2] = {0, 0};
130-
int w1_hi[2] = {0, 0};
131-
int w1_ld[1] = {0};
132-
int w1len = 0;
167+
int nstr0 = 106420; // HARDCODE
168+
int nstr1 = 98218; // HARDCODE
169+
170+
ninto = int_max(ninto0, ninto1);
171+
if (mpi_proc_rank == mpi_root) {
172+
printf(" Internal orbitals: %d\n", ninto);
173+
fflush(stdout);
174+
}
175+
176+
/* Set up (p0, p1, o-index) triples */
177+
if (mpi_proc_rank == mpi_rot) {
178+
printf(" Building ppo triples...\n");
179+
}
180+
ppodata = allocate_mem_int_cont(&ppo, 3, MAXSIZE);
181+
nppo = build_ppo_triples(ppo, str0, nstr0, str1, nstr1, ninto);
182+
183+
184+
/* Determine which blocks of data are locally owned for the N+1
185+
* electron vector. This will be the "static" buffer for which
186+
* we compute the dyson orbital contributions. */
187+
NGA_Distribution(v0_hndl, mpi_proc_rank, v0_lo, v0_hi);
188+
v0_cols = v0_hi[0] - v0_lo[0] + 1;
189+
v0_rows = v0_hi[1] - v0_lo[1] + 1;
190+
v0_ld[0]= v0_rows;
191+
/* Allocate local array */
192+
v0data = allocate_mem_double_cont(&v0buff, v0_rows, v0_cols);
193+
NGA_Get(v0_hndl, v0_lo, v0_hi, v0data, v0_ld);
194+
/* Allocate corresponding W0 array and get W0 data */
195+
w0data = allocate_mem_int_cont(&w0buff, 3, v0_rows);
196+
w0_lo[0] = v0_lo[1];
197+
w0_lo[1] = 0;
198+
w0_hi[0] = v0_hi[1];
199+
w0_hi[1] = 2;
200+
w0_ld[0] = 3;
201+
NGA_Get(w0_hndl, w0_lo, w0_hi, w0data, w0_ld);
202+
203+
buflen = BUFSIZE; /* Set buffer length */
204+
205+
/* Get state number for v1 by getting number of columns
206+
* on this process. (Array is distributed by row.) */
207+
NGA_Distribution(v1_hndl, mpi_proc_rank, v1_lo, v1_hi);
208+
v1_cols = v1_hi[0] - v1_lo[0] + 1;
209+
/* Set values for v1_lo and v1_hi */
210+
v1_lo[0] = 0;
211+
v1_lo[1] = 0;
212+
v1_hi[0] = v1_cols - 1;
213+
v1_hi[1] = dlen1 - 1;
214+
v1_rows = buflen;
215+
v1_ld[0] = v1_rows;
216+
/* Allocate local array buffer (buflen x v1_cols) */
217+
v1data = allocate_mem_double_cont(&v1buff, v1_rows, v1_cols);
218+
219+
/* Allocate W1 array */
220+
w1data = allocate_mem_int_cont(&w1buff, 3, v1_rows);
221+
w1_lo[1] = 0;
222+
w1_hi[1] = 2;
223+
w1_ld[0] = 3;
224+
225+
GA_Sync();
226+
227+
for (i1 = 0; i1 < dlen1; i1 += buflen) {
228+
229+
i1max = int_min((i1 + buflen - 1), (dlen1 - 1));
133230

134-
int i0 = 0, i1 = 0;
135-
int i1max = 0;
231+
/* Get patch of V1 vectors and corresponding W1 info */
232+
v1_lo[1] = i1;
233+
v1_hi[1] = i1max;
234+
v1_rows = v1_hi[1] - v1_lo[1] + 1;
235+
NGA_Get(v1_hndl, v1_lo, v1_hi, v1data, v1_ld);
236+
w1_lo[0] = i1;
237+
w1_hi[0] = i1max;
238+
//w1len = i1max - i1 + 1;
239+
NGA_Get(w1_hndl, w1_lo, w1_hi, w1data, w1_ld);
136240

137-
int ninto = 0; /* Number of internal orbitals */
138-
139-
ninto = int_max(ninto0, ninto1);
140-
if (mpi_proc_rank == mpi_root) {
141-
printf(" Internal orbitals: %d\n", ninto);
142-
}
143-
144-
/* Determine which blocks of data are locally owned for the N+1
145-
* electron vector. This will be the "static" buffer for which
146-
* we compute the dyson orbital contributions. */
147-
NGA_Distribution(v0_hndl, mpi_proc_rank, v0_lo, v0_hi);
148-
v0_cols = v0_hi[0] - v0_lo[0] + 1;
149-
v0_rows = v0_hi[1] - v0_lo[1] + 1;
150-
v0_ld[0]= v0_rows;
151-
/* Allocate local array */
152-
v0data = allocate_mem_double_cont(&v0buff, v0_rows, v0_cols);
153-
NGA_Get(v0_hndl, v0_lo, v0_hi, v0data, v0_ld);
154-
/* Allocate corresponding W0 array and get W0 data */
155-
w0data = allocate_mem_int_cont(&w0buff, 3, v0_rows);
156-
w0_lo[0] = v0_lo[1];
157-
w0_lo[1] = 0;
158-
w0_hi[0] = v0_hi[1];
159-
w0_hi[1] = 2;
160-
w0_ld[0] = 3;
161-
NGA_Get(w0_hndl, w0_lo, w0_hi, w0data, w0_ld);
162-
163-
buflen = BUFSIZE; /* Set buffer length */
164-
165-
/* Get state number for v1 by getting number of columns
166-
* on this process. (Array is distributed by row.) */
167-
NGA_Distribution(v1_hndl, mpi_proc_rank, v1_lo, v1_hi);
168-
v1_cols = v1_hi[0] - v1_lo[0] + 1;
169-
/* Set values for v1_lo and v1_hi */
170-
v1_lo[0] = 0;
171-
v1_lo[1] = 0;
172-
v1_hi[0] = v1_cols - 1;
173-
v1_hi[1] = dlen1 - 1;
174-
v1_rows = buflen;
175-
v1_ld[0] = v1_rows;
176-
/* Allocate local array buffer (buflen x v1_cols) */
177-
v1data = allocate_mem_double_cont(&v1buff, v1_rows, v1_cols);
178-
179-
/* Allocate W1 array */
180-
w1data = allocate_mem_int_cont(&w1buff, 3, v1_rows);
181-
w1_lo[1] = 0;
182-
w1_hi[1] = 2;
183-
w1_ld[0] = 3;
184-
185-
GA_Sync();
186-
187-
for (i1 = 0; i1 < dlen1; i1 += buflen) {
188-
189-
i1max = int_min((i1 + buflen - 1), (dlen1 - 1));
190-
191-
/* Get patch of V1 vectors and corresponding W1 info */
192-
v1_lo[1] = i1;
193-
v1_hi[1] = i1max;
194-
v1_rows = v1_hi[1] - v1_lo[1] + 1;
195-
NGA_Get(v1_hndl, v1_lo, v1_hi, v1data, v1_ld);
196-
w1_lo[0] = i1;
197-
w1_hi[0] = i1max;
198-
//w1len = i1max - i1 + 1;
199-
NGA_Get(w1_hndl, w1_lo, w1_hi, w1data, w1_ld);
200-
201-
compute_det_contributions(w0buff, v0buff, v0_rows,
202-
v0_cols, w1buff, v1buff,
203-
v1_rows, v1_cols, spindx1, spindx2,
204-
str0, str1, dysnst0, ndyst0,
205-
dysnst1, ndyst1, dyorb, ninto);
206-
207-
}
208-
209-
GA_Sync();
210-
deallocate_mem_cont_int(&w0buff, w0data);
211-
deallocate_mem_cont_int(&w1buff, w1data);
212-
deallocate_mem_cont(&v0buff, v0data);
213-
deallocate_mem_cont(&v1buff, v1data);
214-
return;
241+
compute_det_contributions(w0buff, v0buff, v0_rows,
242+
v0_cols, w1buff, v1buff,
243+
v1_rows, v1_cols, spindx1, spindx2,
244+
str0, str1, dysnst0, ndyst0,
245+
dysnst1, ndyst1, dyorb, ninto);
246+
247+
}
248+
249+
GA_Sync();
250+
deallocate_mem_cont_int(&w0buff, w0data);
251+
deallocate_mem_cont_int(&w1buff, w1data);
252+
deallocate_mem_cont(&v0buff, v0data);
253+
deallocate_mem_cont(&v1buff, v1data);
254+
return;
215255
}
216256

217257
/*
@@ -245,37 +285,47 @@ void compute_det_contributions(int **w0, double **v0, int v0_rows, int v0_cols,
245285
int *dyst0, int ndyst0, int *dyst1, int ndyst1,
246286
double **dyorb, int ninto)
247287
{
248-
int i = 0, j = 0;
249-
int k = 0, l = 0;
250-
int dyind = 0; /* dyson orbital index */
251-
int ndiff = 0; /* number of differences */
252-
int orbindx = 0; /* orbital index of contribution */
253-
254-
/* Loop over determinants. N electrons must be in the same slots for
255-
* both determinants. The N+1 electron, in a unique slot, is the MO
256-
* index to which the matrix element contributes to the dyson orbital. */
257-
for (i = 0; i < v0_rows; i++) {
258-
for (j = 0; j < v1_rows; j++) {
259-
/* If the other spin strings are different, skip. */
260-
if (w0[i][spx] != w1[j][spx]) continue;
261-
orbindx = comparestrings_dyson(str0[w0[i][sp]],
262-
str1[w1[j][sp]], ninto);
263-
if (orbindx == 0) continue;
264-
265-
/* Compute contributions to orbital (orbindx) for all
266-
* CI vectors involved in dyson orbitals */
267-
dyind = 0;
268-
for (k = 0; k < ndyst0; k++) {
269-
for (l = 0; l < ndyst1; l++) {
270-
dyorb[dyind][(orbindx - 1)] =
271-
dyorb[dyind][(orbindx - 1)] +
272-
v0[dyst0[k]][i]*
273-
v1[dyst1[l]][j];
274-
dyind++;
275-
}
276-
}
277-
}
288+
int i = 0, j = 0;
289+
int k = 0, l = 0;
290+
int dyind = 0; /* dyson orbital index */
291+
int ndiff = 0; /* number of differences */
292+
int orbindx = 0; /* orbital index of contribution */
293+
294+
/* Loop over determinants. N electrons must be in the same slots for
295+
* both determinants. The N+1 electron, in a unique slot, is the MO
296+
* index to which the matrix element contributes to the dyson orbital. */
297+
298+
#pragma omp parallel \
299+
default(none) \
300+
shared(w0,v0,v0_rows,v0_cols, \
301+
w1,v1,v1_rows,v1_cols, \
302+
sp, spx, str0, str1, dyst0, ndyst0, dyst1, ndyst1, \
303+
dyorb,ninto) \
304+
private(i, j, orbindx, ndiff, dyind, k, l)
305+
{
306+
#pragma omp for schedule(runtime)
307+
for (i = 0; i < v0_rows; i++) {
308+
for (j = 0; j < v1_rows; j++) {
309+
/* If the other spin strings are different, skip. */
310+
if (w0[i][spx] != w1[j][spx]) continue;
311+
orbindx = comparestrings_dyson(str0[w0[i][sp]],
312+
str1[w1[j][sp]], ninto);
313+
if (orbindx == 0) continue;
314+
315+
/* Compute contributions to orbital (orbindx) for all
316+
* CI vectors involved in dyson orbitals */
317+
dyind = 0;
318+
for (k = 0; k < ndyst0; k++) {
319+
for (l = 0; l < ndyst1; l++) {
320+
dyorb[dyind][(orbindx - 1)] =
321+
dyorb[dyind][(orbindx - 1)] +
322+
v0[dyst0[k]][i]*
323+
v1[dyst1[l]][j];
324+
dyind++;
325+
}
326+
}
327+
}
278328
}
279-
280-
return;
281-
};
329+
}
330+
return;
331+
}

mpi_source/include/dysoncomp.h

+12
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@
33
#ifndef dysoncomp_h
44
#define dysoncomp_h
55

6+
/*
7+
* build_ppo_triples: build (p0, p1, o) triples for dyson evaluation.
8+
*/
9+
int build_ppo_triples(int **ppo, struct occstr *str0, int nstr0,
10+
struct occstr *str1, int nstr1, int ninto);
11+
12+
/*
13+
* comparestrings_dyson: compare the strings of N and N+1 determinants
14+
* to get the index of orbital contribution.
15+
*/
16+
int comparestrings_dyson(struct occstr str0, struct occstr str1, int ninto);
17+
618
/*
719
* compute_dyson_orbital: compute the dyson orbital between electronic
820
* states of N+1 and N electron wavefuntions by comparing alpha/beta strings.

0 commit comments

Comments
 (0)