Changing dyson orbital algo

cthree-40 · cthree-40 · commit a6d9b75f57c2 · 2020-09-04T11:31:25.000-07:00
diff --git a/mpi_source/dysoncomp.c b/mpi_source/dysoncomp.c
@@ -29,6 +29,29 @@
 #endif
 /* -------------------- */
 
+/*
+ * build_ppo_triples: build (p0, p1, o) triples for dyson evaluation.
+ */
+int build_ppo_triples(int **ppo, struct occstr *str0, int nstr0,
+		      struct occstr *str1, int nstr1, int ninto)
+{
+    int nppo = 0;
+    int oindex = 0;
+    int i, j;
+    for (i = 0; i < nstr0; i++) {
+	for (j = 0; j < nstr1; j++) {
+	    
+	    oindex = comparestrings_dyson(str0[i], str1[j], ninto);
+	    if (oindex == 0) continue;
+	    ppo[nppo][0] = i;
+	    ppo[nppo][1] = j;
+	    ppo[nppo][2] = oindex;
+	    nppo++;
+	}
+    }
+    return nppo;
+}
+
 /*
  * comparestrings_dyson: compare the strings of N and N+1 determinants
  * to get the index of orbital contribution.
@@ -100,118 +123,135 @@ void compute_dyson_orbital(int w0_hndl, int dlen0, int w1_hndl, int dlen1,
                            int norbs,  int *dysnst0, int ndyst0, int *dysnst1,
                            int ndyst1, int ndyorbs, double **dyorb)
 {
-#define BUFSIZE 1000
-        int buflen = 0;         /* Buffer length */
-        
-        double **v0buff = NULL; /* N+1 electron ci buffer */
-        double *v0data  = NULL;
-        double **v1buff = NULL; /* N   electron ci buffer */
-        double *v1data  = NULL;
-        int **w0buff    = NULL; /* N+1 electron wf buffer */
-        int  *w0data    = NULL;
-        int **w1buff    = NULL; /* N   electron wf buffer */
-        int  *w1data    = NULL;
+#define BUFSIZE 1000000
+#define MAXSIZE 1000000
+    
+    int buflen = 0;         /* Buffer length */
+    
+    double **v0buff = NULL; /* N+1 electron ci buffer */
+    double *v0data  = NULL;
+    double **v1buff = NULL; /* N   electron ci buffer */
+    double *v1data  = NULL;
+    int **w0buff    = NULL; /* N+1 electron wf buffer */
+    int  *w0data    = NULL;
+    int **w1buff    = NULL; /* N   electron wf buffer */
+    int  *w1data    = NULL;
+    
+    int **ppo = NULL, *ppodata = NULL; /* (p0, p1, o-index) */
+    int nppo = 0; /* Number of ppo triples */
+    
+    int v0_lo[2] = {0, 0};
+    int v0_hi[2] = {0, 0};
+    int v0_ld[1] = {0};
+    int v0_rows = 0;
+    int v0_cols = 0;        /* Rows and columns of v0 buffer */
+    int v1_lo[2] = {0, 0};
+    int v1_hi[2] = {0, 0};
+    int v1_ld[1] = {0};
+    int v1_rows = 0;
+    int v1_cols = 0;
+    
+    int w0_lo[2] = {0, 0};
+    int w0_hi[2] = {0, 0};
+    int w0_ld[1] = {0};
+    int w1_lo[2] = {0, 0};
+    int w1_hi[2] = {0, 0};
+    int w1_ld[1] = {0};
+    int w1len = 0;
+    
+    int i0 = 0, i1 = 0;
+    int i1max = 0;
+    
+    int ninto = 0; /* Number of internal orbitals */
 
-        int v0_lo[2] = {0, 0};
-        int v0_hi[2] = {0, 0};
-        int v0_ld[1] = {0};
-        int v0_rows = 0;
-        int v0_cols = 0;        /* Rows and columns of v0 buffer */
-        int v1_lo[2] = {0, 0};
-        int v1_hi[2] = {0, 0};
-        int v1_ld[1] = {0};
-        int v1_rows = 0;
-        int v1_cols = 0;
-        
-        int w0_lo[2] = {0, 0};
-        int w0_hi[2] = {0, 0};
-        int w0_ld[1] = {0};
-        int w1_lo[2] = {0, 0};
-        int w1_hi[2] = {0, 0};
-        int w1_ld[1] = {0};
-        int w1len = 0;
+    int nstr0 = 106420; // HARDCODE
+    int nstr1 =  98218; // HARDCODE
+    
+    ninto = int_max(ninto0, ninto1);
+    if (mpi_proc_rank == mpi_root) {
+	printf(" Internal orbitals: %d\n", ninto);
+	fflush(stdout);
+    }
+    
+    /* Set up (p0, p1, o-index) triples */
+    if (mpi_proc_rank == mpi_rot) {
+	printf(" Building ppo triples...\n");
+    }
+    ppodata = allocate_mem_int_cont(&ppo, 3, MAXSIZE);
+    nppo = build_ppo_triples(ppo, str0, nstr0, str1, nstr1, ninto);
+    
+    
+    /* Determine which blocks of data are locally owned for the N+1
+     * electron vector. This will be the "static" buffer for which
+     * we compute the dyson orbital contributions. */
+    NGA_Distribution(v0_hndl, mpi_proc_rank, v0_lo, v0_hi);
+    v0_cols = v0_hi[0] - v0_lo[0] + 1;
+    v0_rows = v0_hi[1] - v0_lo[1] + 1;
+    v0_ld[0]= v0_rows;
+    /* Allocate local array */
+    v0data = allocate_mem_double_cont(&v0buff, v0_rows, v0_cols);
+    NGA_Get(v0_hndl, v0_lo, v0_hi, v0data, v0_ld);
+    /* Allocate corresponding W0 array and get W0 data */
+    w0data = allocate_mem_int_cont(&w0buff, 3, v0_rows);
+    w0_lo[0] = v0_lo[1];
+    w0_lo[1] = 0;
+    w0_hi[0] = v0_hi[1];
+    w0_hi[1] = 2;
+    w0_ld[0] = 3;
+    NGA_Get(w0_hndl, w0_lo, w0_hi, w0data, w0_ld);
+    
+    buflen = BUFSIZE; /* Set buffer length */
+    
+    /* Get state number for v1 by getting number of columns
+     * on this process. (Array is distributed by row.) */
+    NGA_Distribution(v1_hndl, mpi_proc_rank, v1_lo, v1_hi);
+    v1_cols = v1_hi[0] - v1_lo[0] + 1;
+    /* Set values for v1_lo and v1_hi */
+    v1_lo[0] = 0;
+    v1_lo[1] = 0;
+    v1_hi[0] = v1_cols - 1;
+    v1_hi[1] = dlen1 - 1;
+    v1_rows = buflen;
+    v1_ld[0] = v1_rows;
+    /* Allocate local array buffer (buflen x v1_cols) */
+    v1data = allocate_mem_double_cont(&v1buff, v1_rows, v1_cols);
+    
+    /* Allocate W1 array */
+    w1data = allocate_mem_int_cont(&w1buff, 3, v1_rows);
+    w1_lo[1] = 0;
+    w1_hi[1] = 2;
+    w1_ld[0] = 3;
+    
+    GA_Sync();
+    
+    for (i1 = 0; i1 < dlen1; i1 += buflen) {
+	
+	i1max = int_min((i1 + buflen - 1), (dlen1 - 1));
         
-        int i0 = 0, i1 = 0;
-        int i1max = 0;
+	/* Get patch of V1 vectors and corresponding W1 info */
+	v1_lo[1] = i1;
+	v1_hi[1] = i1max;
+	v1_rows = v1_hi[1] - v1_lo[1] + 1;
+	NGA_Get(v1_hndl, v1_lo, v1_hi, v1data, v1_ld);
+	w1_lo[0] = i1;
+	w1_hi[0] = i1max;
+	//w1len = i1max - i1 + 1;
+	NGA_Get(w1_hndl, w1_lo, w1_hi, w1data, w1_ld);
         
-        int ninto = 0; /* Number of internal orbitals */
-
-        ninto = int_max(ninto0, ninto1);
-        if (mpi_proc_rank == mpi_root) {
-                printf(" Internal orbitals: %d\n", ninto);
-        }
-
-        /* Determine which blocks of data are locally owned for the N+1
-         * electron vector. This will be the "static" buffer for which
-         * we compute the dyson orbital contributions. */
-        NGA_Distribution(v0_hndl, mpi_proc_rank, v0_lo, v0_hi);
-        v0_cols = v0_hi[0] - v0_lo[0] + 1;
-        v0_rows = v0_hi[1] - v0_lo[1] + 1;
-        v0_ld[0]= v0_rows;
-        /* Allocate local array */
-        v0data = allocate_mem_double_cont(&v0buff, v0_rows, v0_cols);
-        NGA_Get(v0_hndl, v0_lo, v0_hi, v0data, v0_ld);
-        /* Allocate corresponding W0 array and get W0 data */
-        w0data = allocate_mem_int_cont(&w0buff, 3, v0_rows);
-        w0_lo[0] = v0_lo[1];
-        w0_lo[1] = 0;
-        w0_hi[0] = v0_hi[1];
-        w0_hi[1] = 2;
-        w0_ld[0] = 3;
-        NGA_Get(w0_hndl, w0_lo, w0_hi, w0data, w0_ld);
-        
-        buflen = BUFSIZE; /* Set buffer length */
-
-        /* Get state number for v1 by getting number of columns
-         * on this process. (Array is distributed by row.) */
-        NGA_Distribution(v1_hndl, mpi_proc_rank, v1_lo, v1_hi);
-        v1_cols = v1_hi[0] - v1_lo[0] + 1;
-        /* Set values for v1_lo and v1_hi */
-        v1_lo[0] = 0;
-        v1_lo[1] = 0;
-        v1_hi[0] = v1_cols - 1;
-        v1_hi[1] = dlen1 - 1;
-        v1_rows = buflen;
-        v1_ld[0] = v1_rows;
-        /* Allocate local array buffer (buflen x v1_cols) */
-        v1data = allocate_mem_double_cont(&v1buff, v1_rows, v1_cols);
-
-        /* Allocate W1 array */
-        w1data = allocate_mem_int_cont(&w1buff, 3, v1_rows);
-        w1_lo[1] = 0;
-        w1_hi[1] = 2;
-        w1_ld[0] = 3;
-
-        GA_Sync();
-
-        for (i1 = 0; i1 < dlen1; i1 += buflen) {
-
-                i1max = int_min((i1 + buflen - 1), (dlen1 - 1));
-                
-                /* Get patch of V1 vectors and corresponding W1 info */
-                v1_lo[1] = i1;
-                v1_hi[1] = i1max;
-                v1_rows = v1_hi[1] - v1_lo[1] + 1;
-                NGA_Get(v1_hndl, v1_lo, v1_hi, v1data, v1_ld);
-                w1_lo[0] = i1;
-                w1_hi[0] = i1max;
-                //w1len = i1max - i1 + 1;
-                NGA_Get(w1_hndl, w1_lo, w1_hi, w1data, w1_ld);
-                
-                compute_det_contributions(w0buff,  v0buff, v0_rows,
-                                          v0_cols, w1buff, v1buff,
-                                          v1_rows, v1_cols, spindx1, spindx2,
-                                          str0, str1, dysnst0, ndyst0,
-                                          dysnst1, ndyst1, dyorb, ninto);
-                
-        }
-        
-        GA_Sync();
-        deallocate_mem_cont_int(&w0buff, w0data);
-        deallocate_mem_cont_int(&w1buff, w1data);
-        deallocate_mem_cont(&v0buff, v0data);
-        deallocate_mem_cont(&v1buff, v1data);
-        return;
+	compute_det_contributions(w0buff,  v0buff, v0_rows,
+				  v0_cols, w1buff, v1buff,
+				  v1_rows, v1_cols, spindx1, spindx2,
+				  str0, str1, dysnst0, ndyst0,
+				  dysnst1, ndyst1, dyorb, ninto);
+	
+    }
+    
+    GA_Sync();
+    deallocate_mem_cont_int(&w0buff, w0data);
+    deallocate_mem_cont_int(&w1buff, w1data);
+    deallocate_mem_cont(&v0buff, v0data);
+    deallocate_mem_cont(&v1buff, v1data);
+    return;
 }
 
 /*
@@ -245,37 +285,47 @@ void compute_det_contributions(int **w0, double **v0, int v0_rows, int v0_cols,
                                int *dyst0, int ndyst0, int *dyst1, int ndyst1,
                                double **dyorb, int ninto)
 {
-        int i = 0, j = 0;
-        int k = 0, l = 0;
-        int dyind = 0;      /* dyson orbital index */
-        int ndiff = 0;      /* number of differences */
-        int orbindx = 0;    /* orbital index of contribution */
-
-        /* Loop over determinants. N electrons must be in the same slots for
-         * both determinants. The N+1 electron, in a unique slot, is the MO
-         * index to which the matrix element contributes to the dyson orbital. */
-        for (i = 0; i < v0_rows; i++) {
-                for (j = 0; j < v1_rows; j++) {
-                        /* If the other spin strings are different, skip. */
-                        if (w0[i][spx] != w1[j][spx]) continue;
-                        orbindx = comparestrings_dyson(str0[w0[i][sp]],
-                                                       str1[w1[j][sp]], ninto);
-                        if (orbindx == 0) continue;
-
-                        /* Compute contributions to orbital (orbindx) for all
-                         * CI vectors involved in dyson orbitals */
-                        dyind = 0;
-                        for (k = 0; k < ndyst0; k++) {
-                                for (l = 0; l < ndyst1; l++) {
-                                        dyorb[dyind][(orbindx - 1)] =
-                                                dyorb[dyind][(orbindx - 1)] +
-                                                v0[dyst0[k]][i]*
-                                                v1[dyst1[l]][j];
-                                        dyind++;
-                                }
-                        }
-                }
+    int i = 0, j = 0;
+    int k = 0, l = 0;
+    int dyind = 0;      /* dyson orbital index */
+    int ndiff = 0;      /* number of differences */
+    int orbindx = 0;    /* orbital index of contribution */
+    
+    /* Loop over determinants. N electrons must be in the same slots for
+     * both determinants. The N+1 electron, in a unique slot, is the MO
+     * index to which the matrix element contributes to the dyson orbital. */
+    
+#pragma omp parallel					      \
+    default(none)					      \
+    shared(w0,v0,v0_rows,v0_cols,			      \
+	   w1,v1,v1_rows,v1_cols,			      \
+	   sp, spx, str0, str1, dyst0, ndyst0, dyst1, ndyst1, \
+	   dyorb,ninto)					      \
+    private(i, j, orbindx, ndiff, dyind, k, l)
+    {
+#pragma omp for schedule(runtime)
+	for (i = 0; i < v0_rows; i++) {
+	    for (j = 0; j < v1_rows; j++) {
+		/* If the other spin strings are different, skip. */
+		if (w0[i][spx] != w1[j][spx]) continue;
+		orbindx = comparestrings_dyson(str0[w0[i][sp]],
+					       str1[w1[j][sp]], ninto);
+		if (orbindx == 0) continue;
+		
+		/* Compute contributions to orbital (orbindx) for all
+		 * CI vectors involved in dyson orbitals */
+		dyind = 0;
+		for (k = 0; k < ndyst0; k++) {
+		    for (l = 0; l < ndyst1; l++) {
+			dyorb[dyind][(orbindx - 1)] =
+			    dyorb[dyind][(orbindx - 1)] +
+			    v0[dyst0[k]][i]*
+			    v1[dyst1[l]][j];
+			dyind++;
+		    }
+		}
+	    }
         }
-
-        return;
-};
+    }
+    return;
+}
diff --git a/mpi_source/include/dysoncomp.h b/mpi_source/include/dysoncomp.h
@@ -3,6 +3,18 @@
 #ifndef dysoncomp_h
 #define dysoncomp_h
 
+/*
+ * build_ppo_triples: build (p0, p1, o) triples for dyson evaluation.
+ */
+int build_ppo_triples(int **ppo, struct occstr *str0, int nstr0,
+		      struct occstr *str1, int nstr1, int ninto);
+
+/*
+ * comparestrings_dyson: compare the strings of N and N+1 determinants
+ * to get the index of orbital contribution.
+ */
+int comparestrings_dyson(struct occstr str0, struct occstr str1, int ninto);
+
 /*
  * compute_dyson_orbital: compute the dyson orbital between electronic
  * states of N+1 and N electron wavefuntions by comparing alpha/beta strings.
diff --git a/mpi_source/run_pdycicalc.c b/mpi_source/run_pdycicalc.c