backsort.cc

Go to the documentation of this file.
00001 
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <psifiles.h>
00008 #include <libciomr/libciomr.h>
00009 #include <libiwl/iwl.h>
00010 #include <psifiles.h>
00011 #include "MOInfo.h"
00012 #include "Params.h"
00013 #include "globals.h"
00014 
00015 #define INDEX(i,j) ((i>j) ? (ioff[(i)]+(j)) : (ioff[(j)]+(i)))
00016 #define MIN0(a,b) (((a)<(b)) ? (a) : (b))
00017 
00018 namespace psi { namespace transqt {
00019 
00020 int get_p(int i);
00021 
00022 /* backsort.c: This code prepares for and carries out a Yoshimine sort
00023 ** of the backtransformed (AO-basis) two-particle density into
00024 ** canonical shell ordering.  The input AO-basis density, G(ij,kl), is
00025 ** currently assumed to be eight-fold packed, i.e., i>=j, k>=l, and
00026 ** ij>=kl, where i, j, k, and l are AO indices.
00027 **
00028 ** The detailed steps of the sort are as follows:
00029 **
00030 ** (1) Sorting buffers are prepared by backsort_prep().  Each buffer
00031 ** will contain a group of shell quartets of density elements.  Since
00032 ** shell quartets have different sizes, e.g., (pp|pp) is smaller than
00033 ** (dp|dp), the number of quartets that can be stored in each sorting
00034 ** buffer must be computed explicitly.
00035 **
00036 ** (2) The AO-basis density generated by transform_two() (RHF/ROHF) or
00037 ** transform_two_bactr_uhf() (UHF) is dumped to the sorting buffers by
00038 ** backsort_write().
00039 **
00040 ** (3) The backsort() routine orders all the density elements within
00041 ** each sorting buffer according to 
00042 **
00043 */
00044 
00045 /* Globals needed for the post-backtransform sort */
00046 int nbuckets;        /* number of sorting buckets   */
00047 int *shell;          /* AO -> shell map             */
00048 int *shell_size;     /* AOs in shell                */
00049 int *pair_size;      /* Length of each shell pair   */
00050 int *bucket_map;     /* shell-pair -> sort bucket   */
00051 int *bucket_offset;  /* bucket -> quartet offset    */
00052 int *bucket_quarts;  /* no. of quartets in a bucket */
00053 int *bucket_firstpq; /* First pq in bucket          */
00054 int *bucket_lastpq;  /* Last pq in bucket           */
00055 
00056 void backsort_prep(int uhf)
00057 {
00058   int i, j, k, l, lend, size_i, size_j, size_k, size_l, size_ij;
00059   int this_pair_size, this_pair_sizeb, this_pair_quarts;
00060   long int core_left;  /* this must be *signed* */
00061   int nshell, *snuc, num_pairs;
00062   int ii, ij, ijkl, imin, imax;
00063 
00064   snuc = moinfo.snuc;
00065   nshell = moinfo.nshell;
00066   num_pairs = ioff[nshell];
00067 
00068   shell_size = init_int_array(nshell);
00069   for(i=0; i < nshell; i++) shell_size[i] = ioff[moinfo.stype[i]];
00070 
00071   pair_size = init_int_array(num_pairs);
00072   for(i=0,ij=0; i < nshell; i++) {
00073     size_i = shell_size[i];
00074     for(j=0; j <= i; j++,ij++) {
00075       size_j = shell_size[j];
00076 
00077       pair_size[ij] = size_i * size_j;
00078     }
00079   }
00080 
00081   bucket_map = init_int_array(nshell*(nshell+1)/2);
00082   shell = init_int_array(moinfo.nao);
00083 
00084   /* Make room for one bucket to begin with */
00085   bucket_offset = init_int_array(1);
00086   bucket_quarts = init_int_array(1);
00087   bucket_firstpq = init_int_array(1);
00088   bucket_lastpq = init_int_array(1);
00089   
00090   /* Figure out how many buckets we need and where each */
00091   /* shell-pair (in canonical shell-ordering) goes.     */
00092   core_left = params.maxcor;
00093   nbuckets = 1;
00094   for(i=0,ij=0,ijkl=0; i < nshell; i++) {
00095     size_i = shell_size[i];
00096 
00097     /* Generate the orbital -> shell lookup while we're here */
00098     imin = moinfo.sloc[i] - 1;
00099     imax = imin + shell_size[i];
00100     for(ii=imin; ii < imax; ii++) shell[ii] = i;
00101       
00102     for(j=0; j <= i; j++,ij++) {
00103       size_j = shell_size[j];
00104       size_ij = pair_size[ij];
00105 
00106       /* Number of density elements in this pair */
00107       this_pair_size = 0;
00108 
00109       /* Number of quartets in this pair */
00110       this_pair_quarts = 0;
00111                    
00112       for(k=0; k <= i; k++) {
00113         size_k = shell_size[k];
00114         lend = (k==i) ? j : k;
00115         for(l=0; l <= lend; l++,ijkl++) {
00116           size_l = shell_size[l];
00117 
00118           this_pair_size += size_ij * size_k * size_l;
00119           this_pair_quarts++;
00120 
00121         } /* l loop */
00122       } /* k loop */
00123 
00124       if(uhf) this_pair_size *= 2;
00125 
00126       /* Add 4 indices to size and convert to bytes */
00127       this_pair_sizeb = this_pair_size*(4*sizeof(int) + sizeof(double));
00128           
00129       if((core_left - (long int) this_pair_sizeb) >= 0) {
00130         core_left -= (long int) this_pair_sizeb;
00131         bucket_quarts[nbuckets-1] += this_pair_quarts;
00132         bucket_lastpq[nbuckets-1] = ij;
00133       }
00134       else {
00135         nbuckets++;
00136         core_left = params.maxcor - (long int) this_pair_sizeb;
00137         bucket_offset = (int *) realloc((void *) bucket_offset,
00138                                         nbuckets * sizeof(int));
00139         bucket_offset[nbuckets-1] = bucket_offset[nbuckets-2] +
00140           bucket_quarts[nbuckets-2];
00141 
00142         bucket_firstpq = (int *) realloc((void *) bucket_firstpq,
00143                                          nbuckets * sizeof(int));
00144         bucket_firstpq[nbuckets-1] = ij;
00145               
00146         bucket_lastpq = (int *) realloc((void *) bucket_lastpq,
00147                                         nbuckets * sizeof(int));
00148         bucket_lastpq[nbuckets-1] = ij;
00149               
00150         bucket_quarts = (int *) realloc((void *) bucket_quarts,
00151                                         nbuckets * sizeof(int));
00152         bucket_quarts[nbuckets-1] = this_pair_quarts;
00153       }
00154 
00155       bucket_map[ij] = nbuckets - 1;
00156 
00157     } /* j loop */
00158   } /* i loop */
00159 
00160 }
00161 
00162 /*
00163 ** backsort(): After all AO-basis twopdm elements have been
00164 ** distributed to sorting buckets by backsort_write(), sort each
00165 ** bucket into canonical shell ordering.  This involves X algorithm:
00166 **
00167 ** (1) Loop over the buckets.
00168 **
00169 ** (2) Allocate sufficient memory to store the values and indices for
00170 ** the current bucket.
00171 **
00172 ** (3) For each twopdm element in this bucket, assign it to the
00173 ** correct shell quartet.  (The order within the quartet does not
00174 ** matter.)
00175 **
00176 ** (4) After all elements for the current bucket have been placed,
00177 ** write them out in canonical shell-quartet order.  Mark the end of
00178 ** each quartet with all four AO indices "-1".
00179 **
00180 */
00181 
00182 void backsort(int first_tmp_file, double tolerance, int uhf)
00183 {
00184   int i, n, lastbuf, idx, nquarts, this_quartet, this_counter, *counter;
00185   int *snuc, sp, sq, sr, ss, send, size_r, size_s, size_pq;
00186   int p, q, r, s, pshell, qshell, rshell, sshell, pqshell, rsshell, pqrs;
00187   int pq, rs;
00188   int **pidx, **qidx, **ridx, **sidx;
00189   double **gamma, value;
00190   struct iwlbuf InBuf, OutBuf;
00191   int num_tpdm, quartet_size, lastq;
00192 
00193   snuc = moinfo.snuc;
00194 
00195   iwl_buf_init(&OutBuf, params.mfile, tolerance, 0, 0);
00196 
00197   num_tpdm = 0;
00198   for(n=0; n < nbuckets; n++) {
00199     iwl_buf_init(&InBuf, first_tmp_file+n, tolerance, 1, 0);
00200     lastbuf = 0;
00201 
00202     nquarts = bucket_quarts[n];
00203       
00204     pidx = (int **) malloc(nquarts * sizeof(int *));
00205     qidx = (int **) malloc(nquarts * sizeof(int *));
00206     ridx = (int **) malloc(nquarts * sizeof(int *));
00207     sidx = (int **) malloc(nquarts * sizeof(int *));
00208     gamma = (double **) malloc(nquarts * sizeof(double *));
00209     counter = init_int_array(nquarts);
00210 
00211     /* Compute quartet sizes for this bucket and allocate space */
00212     for(pq=bucket_firstpq[n],nquarts=0; pq <= bucket_lastpq[n]; pq++) {
00213 
00214       size_pq = pair_size[pq];
00215 
00216       for(r=0,rs=0; r < moinfo.nshell; r++) {
00217         size_r = shell_size[r];
00218         for(s=0; s <= r; s++,rs++) {
00219           size_s = shell_size[s];
00220 
00221           if(rs > pq) break;
00222 
00223           quartet_size = size_pq * size_r * size_s;
00224 
00225           if(uhf) quartet_size *= 2;
00226 
00227           pidx[nquarts] = init_int_array(quartet_size);
00228           qidx[nquarts] = init_int_array(quartet_size);
00229           ridx[nquarts] = init_int_array(quartet_size);
00230           sidx[nquarts] = init_int_array(quartet_size);
00231           gamma[nquarts] = init_array(quartet_size);
00232           nquarts++;
00233         }
00234       }
00235     }
00236 
00237     if(nquarts != bucket_quarts[n]) {
00238       printf("Quartet error: nquarts = %d; bucket_quarts[%d] = %d\n",
00239              nquarts, n, bucket_quarts[n]);
00240       exit(PSI_RETURN_FAILURE);
00241     }
00242 
00243     while(!lastbuf) {
00244       iwl_buf_fetch(&InBuf);
00245       lastbuf = InBuf.lastbuf;
00246 
00247       for(idx=4*InBuf.idx; InBuf.idx < InBuf.inbuf; InBuf.idx++) {
00248         p = (int) InBuf.labels[idx++];
00249         q = (int) InBuf.labels[idx++];
00250         r = (int) InBuf.labels[idx++];
00251         s = (int) InBuf.labels[idx++];
00252 
00253         value = (double) InBuf.values[InBuf.idx];
00254 
00255         pshell = shell[p]; qshell = shell[q];
00256         rshell = shell[r]; sshell = shell[s];
00257 
00258         /* Skip this quartet if on one center */
00259         if(snuc[pshell] == snuc[qshell] &&
00260            snuc[pshell] == snuc[rshell] &&
00261            snuc[pshell] == snuc[sshell])
00262           continue;
00263 
00264         pqshell = INDEX(pshell,qshell); rsshell = INDEX(rshell,sshell);
00265         pqrs = INDEX(pqshell,rsshell);
00266 
00267         pq = INDEX(p,q);  rs = INDEX(r,s);
00268 
00269         this_quartet = pqrs - bucket_offset[n];
00270         this_counter = counter[this_quartet];
00271 
00272         pidx[this_quartet][this_counter] = p;
00273         qidx[this_quartet][this_counter] = q;
00274         ridx[this_quartet][this_counter] = r;
00275         sidx[this_quartet][this_counter] = s;
00276         gamma[this_quartet][this_counter] = value;
00277 
00278         counter[this_quartet]++;
00279 
00280         /* Now run through the appropriate permutations */
00281         if(pqshell != rsshell) {
00282           if(pshell != qshell) {
00283             if(rshell == sshell) {
00284               if(qshell != rshell) { /* (pq|rr) */
00285                 if(r!=s) {
00286                   this_counter = counter[this_quartet];
00287                   pidx[this_quartet][this_counter] = p;
00288                   qidx[this_quartet][this_counter] = q;
00289                   ridx[this_quartet][this_counter] = s;
00290                   sidx[this_quartet][this_counter] = r;
00291                   gamma[this_quartet][this_counter] = value;
00292                   counter[this_quartet]++;
00293                 }
00294               }
00295               else {  /* (pq|qq) */
00296                 if(r!=s) {
00297                   this_counter = counter[this_quartet];
00298                   pidx[this_quartet][this_counter] = p;
00299                   qidx[this_quartet][this_counter] = q;
00300                   ridx[this_quartet][this_counter] = s;
00301                   sidx[this_quartet][this_counter] = r;
00302                   gamma[this_quartet][this_counter] = value;
00303                   counter[this_quartet]++;
00304                 }
00305               }
00306             }
00307           }
00308           else {
00309             if(rshell != sshell) {
00310               if(qshell != rshell) { /* (pp|rs) */
00311                 if(p!=q) {
00312                   this_counter = counter[this_quartet];
00313                   pidx[this_quartet][this_counter] = q;
00314                   qidx[this_quartet][this_counter] = p;
00315                   ridx[this_quartet][this_counter] = r;
00316                   sidx[this_quartet][this_counter] = s;
00317                   gamma[this_quartet][this_counter] = value;
00318                   counter[this_quartet]++;
00319                 }
00320               }
00321               else { /* (pp|ps) */
00322                 if(p!=q) {
00323                   this_counter = counter[this_quartet];
00324                   pidx[this_quartet][this_counter] = q;
00325                   qidx[this_quartet][this_counter] = p;
00326                   ridx[this_quartet][this_counter] = r;
00327                   sidx[this_quartet][this_counter] = s;
00328                   gamma[this_quartet][this_counter] = value;
00329                   counter[this_quartet]++;
00330                 }
00331               }
00332             }
00333             else {
00334               if(qshell != rshell) { /* (pp|rr) */
00335                 if(p!=q && r!=s) {
00336                   this_counter = counter[this_quartet];
00337                   pidx[this_quartet][this_counter] = q;
00338                   qidx[this_quartet][this_counter] = p;
00339                   ridx[this_quartet][this_counter] = r;
00340                   sidx[this_quartet][this_counter] = s;
00341                   gamma[this_quartet][this_counter] = value;
00342                   counter[this_quartet]++;
00343                                   
00344                   this_counter = counter[this_quartet];
00345                   pidx[this_quartet][this_counter] = p;
00346                   qidx[this_quartet][this_counter] = q;
00347                   ridx[this_quartet][this_counter] = s;
00348                   sidx[this_quartet][this_counter] = r;
00349                   gamma[this_quartet][this_counter] = value;
00350                   counter[this_quartet]++;
00351                                   
00352                   this_counter = counter[this_quartet];
00353                   pidx[this_quartet][this_counter] = q;
00354                   qidx[this_quartet][this_counter] = p;
00355                   ridx[this_quartet][this_counter] = s;
00356                   sidx[this_quartet][this_counter] = r;
00357                   gamma[this_quartet][this_counter] = value;
00358                   counter[this_quartet]++;
00359                 }
00360                 else if(p!=q && r==s) {
00361                   this_counter = counter[this_quartet];
00362                   pidx[this_quartet][this_counter] = q;
00363                   qidx[this_quartet][this_counter] = p;
00364                   ridx[this_quartet][this_counter] = r;
00365                   sidx[this_quartet][this_counter] = s;
00366                   gamma[this_quartet][this_counter] = value;
00367                   counter[this_quartet]++;
00368                 }
00369                 else if(p==q && r!=s) {
00370                   this_counter = counter[this_quartet];
00371                   pidx[this_quartet][this_counter] = p;
00372                   qidx[this_quartet][this_counter] = q;
00373                   ridx[this_quartet][this_counter] = s;
00374                   sidx[this_quartet][this_counter] = r;
00375                   gamma[this_quartet][this_counter] = value;
00376                   counter[this_quartet]++;
00377                 }
00378               }
00379               else {
00380                 exit(PSI_RETURN_FAILURE);
00381               }
00382             }
00383           }
00384         }
00385         else { /* pqshell == rsshell */
00386           if(pshell != qshell) { /* (pq|pq) */
00387             if(pq != rs) {
00388               this_counter = counter[this_quartet];
00389               pidx[this_quartet][this_counter] = r;
00390               qidx[this_quartet][this_counter] = s;
00391               ridx[this_quartet][this_counter] = p;
00392               sidx[this_quartet][this_counter] = q;
00393               gamma[this_quartet][this_counter] = value;
00394               counter[this_quartet]++;
00395             }
00396           }
00397           else { /* (pp|pp) */
00398             /* This shouldn't actually occur because of the
00399                snuc[] filter above */
00400             exit(PSI_RETURN_FAILURE);
00401           }
00402         }
00403       }
00404     }
00405 
00406     iwl_buf_close(&InBuf, 0);
00407 
00408     /* Now flush each quartet in order */
00409     for(pq=bucket_firstpq[n],nquarts=0; pq <= bucket_lastpq[n]; pq++) {
00410 
00411       /* compute pshell and qshell from pq */
00412       p = get_p(pq);
00413       q = pq-ioff[p];
00414 
00415       for(r=0,rs=0; r < moinfo.nshell; r++) {
00416         for(s=0; s <= r; s++,rs++) {
00417 
00418           if(rs > pq) break;
00419 
00420           iwl_buf_wrt_arr(&OutBuf, gamma[nquarts],
00421                           pidx[nquarts], qidx[nquarts],
00422                           ridx[nquarts], sidx[nquarts],
00423                           counter[nquarts]);
00424 
00425           /*
00426           fprintf(outfile, "%d %d %d %d  count = %d\n", p, q, r, s, counter[nquarts]);
00427           for(i=0; i < counter[nquarts]; i++) {
00428             fprintf(outfile, "%d %d %d %d gamma = %20.12f\n", 
00429                     pidx[nquarts][i], qidx[nquarts][i],
00430                     ridx[nquarts][i], sidx[nquarts][i],
00431                     gamma[nquarts][i]);
00432           }
00433           */
00434 
00435           /* Mark the end of the shell quartet */
00436           if(!(snuc[p] == snuc[q] && snuc[r] == snuc[s] && snuc[p] == snuc[r])) {
00437             value = 9.9999999999;
00438             iwl_buf_wrt_val(&OutBuf, -1, -1, -1, -1, value, 0, outfile, 0);
00439             /*      fprintf(outfile, "-1 -1 -1 -1 gamma = %20.12f\n", value); */
00440           }
00441 
00442           num_tpdm += counter[nquarts]+1;
00443 
00444           nquarts++;
00445         }
00446       }
00447     }
00448 
00449     /* Free the sort arrays */
00450     for(i=0; i < bucket_quarts[n]; i++) {
00451       free(pidx[i]);  free(qidx[i]); free(ridx[i]);  free(sidx[i]);
00452       free(gamma[i]);
00453     }
00454     free(pidx); free(qidx); free(ridx); free(sidx);
00455     free(gamma); free(counter);
00456       
00457   } /* end of bucket loop */
00458 
00459   iwl_buf_flush(&OutBuf, 1);
00460   iwl_buf_close(&OutBuf, 1);
00461 
00462   /*  fprintf(outfile, "num_tpdm = %d\n", num_tpdm); */
00463 
00464   /* Free up the global data */
00465   free(shell);
00466   free(shell_size);
00467   free(pair_size);
00468   free(bucket_map);
00469   free(bucket_offset);
00470   free(bucket_quarts);
00471   free(bucket_firstpq);
00472   free(bucket_lastpq);
00473 }
00474 
00475 void backsort_write(int i, int j, double **A, int kfirst, int klast,
00476                     int lfirst, int llast, int printflag,FILE *outfile,
00477                     struct iwlbuf *twopdm_out, int uhf)
00478 {
00479 
00480   int k,l,K,L,ij,kl;
00481   int shell_i, shell_j, shell_ij, ij_bucket;
00482   int shell_k, shell_l, shell_kl, kl_bucket;
00483   double value;
00484 
00485   ij = INDEX(i,j);
00486 
00487   shell_i = shell[i];
00488   shell_j = shell[j];
00489   shell_ij = INDEX(shell_i, shell_j);
00490   ij_bucket = bucket_map[shell_ij];
00491 
00492   for (k=kfirst,K=0; k <= klast; k++,K++) {
00493     for (l=lfirst,L=0; l <= llast && l<=k ; l++,L++) {
00494       value = A[K][L];
00495 
00496       kl = INDEX(k,l);
00497       if (kl > ij && !uhf) continue;
00498       else if(kl != ij && uhf) value *= 0.5;
00499 
00500       if (printflag) {
00501         fprintf(outfile, ">%d %d %d %d [%d] [%d] = %20.10lf\n", 
00502                 i, j, k, l, ij, kl, value);
00503       }
00504 
00505       shell_k = shell[k];
00506       shell_l = shell[l];
00507       shell_kl = INDEX(shell_k, shell_l);
00508       kl_bucket = bucket_map[shell_kl];
00509 
00510       if(shell_ij >= shell_kl) {
00511         if(shell_i >= shell_j) {
00512           if(shell_k >= shell_l) {
00513             iwl_buf_wrt_val(&twopdm_out[ij_bucket], i, j, k, l,
00514                             value,0,outfile,0);
00515           }
00516           else
00517             iwl_buf_wrt_val(&twopdm_out[ij_bucket], i, j, l, k,
00518                             value,0,outfile,0);
00519         }
00520         else {
00521           if(shell_k >= shell_l) {
00522             iwl_buf_wrt_val(&twopdm_out[ij_bucket], j, i, k, l,
00523                             value,0,outfile,0);
00524           }
00525           else {
00526             iwl_buf_wrt_val(&twopdm_out[ij_bucket], j, i, l, k,
00527                             value,0,outfile,0);
00528           }
00529         }
00530       }
00531       else {
00532         if(shell_k >= shell_l) {
00533           if(shell_i >= shell_j) {
00534             iwl_buf_wrt_val(&twopdm_out[kl_bucket], k, l, i, j,
00535                             value,0,outfile,0);
00536           }
00537           else {
00538             iwl_buf_wrt_val(&twopdm_out[kl_bucket], k, l, j, i,
00539                             value,0,outfile,0);
00540           }
00541         }
00542         else {
00543           if(shell_i >= shell_j) {
00544             iwl_buf_wrt_val(&twopdm_out[kl_bucket], l, k, i, j,
00545                             value,0,outfile,0);
00546           }
00547           else {
00548             iwl_buf_wrt_val(&twopdm_out[kl_bucket], l, k, j, i,
00549                             value,0,outfile,0);
00550           }
00551         }
00552       }
00553     }
00554   }
00555 }
00556 
00557 
00558 /* int get_p(): For a given canonical index pair, pq, computed using
00559 ** the standard ioff array, compute the first index p of the pair.
00560 ** The second index, q, may be subsequently computed using 
00561 ** q = pq - ioff[p].
00562 */
00563 
00564 int get_p(int pq)
00565 {
00566   int i, p;
00567 
00568   for(i=0; i < 32641; i++) 
00569     if(ioff[i] > pq) { p = i-1; break; }
00570 
00571   return p;
00572 }
00573 
00574 }} // end namespace psi::transqt
00575 

Generated on Wed Feb 13 16:35:39 2008 for PSI by  doxygen 1.5.4