blas.cc

00001 /***************************************************************************
00002  *  PSIMRCC : Copyright (C) 2007 by Francesco Evangelista and Andrew Simmonett
00003  *  frank@ccc.uga.edu   andysim@ccc.uga.edu
00004  *  A multireference coupled cluster code
00005  ***************************************************************************/
00006 
00007 #include "calculation_options.h"
00008 #include "blas.h"
00009 #include "memory_manager.h"
00010 #include "debugging.h"
00011 #include "moinfo.h"
00012 #include "utilities.h"
00013 #include <algorithm>
00014 
00015 #include <libciomr/libciomr.h>
00016 #include <libqt/qt.h>
00017 
00018 extern FILE *infile, *outfile;
00019 
00020 namespace psi{ namespace psimrcc{
00021 
00022 using namespace std;
00023 
00024 CCBLAS::CCBLAS():
00025 full_in_core(false),work_size(0)
00026 {
00027   init();
00028 }
00029 
00030 
00031 CCBLAS::~CCBLAS()
00032 {
00033   cleanup();
00034 }
00035 
00036 void CCBLAS::init()
00037 {
00038   add_indices();
00039   allocate_work();
00040   allocate_buffer();
00041 }
00042 
00043 void CCBLAS::cleanup()
00044 {
00045   free_sortmap();
00046   free_buffer();
00047   free_work();
00048   free_matrices();
00049   free_indices();
00050 }
00051 
00052 void CCBLAS::allocate_work()
00053 {
00054   // Make sure work is empty
00055   if(!work.empty())
00056     for(int n=0;n<work.size();n++)
00057       if(work[n]!=NULL)
00058         delete[] work[n];
00059 
00060   for(int n=0;n<options->get_int_option("NUM_THREADS");n++)
00061     work.push_back(NULL);
00062   // Compute the temporary work space size
00063   CCIndex* oo_pair = get_index("[oo]");
00064   CCIndex* vv_pair = get_index("[vv]");
00065   work_size = 0;
00066   for(int h=0;h<moinfo->get_nirreps();h++){
00067     if(oo_pair->get_pairpi(h)<=vv_pair->get_pairpi(h))
00068       work_size += oo_pair->get_pairpi(h)*vv_pair->get_pairpi(h);
00069     else
00070       work_size += oo_pair->get_pairpi(h)*oo_pair->get_pairpi(h);
00071   }
00072   // Allocate the temporary work space
00073   for(int n=0;n<options->get_int_option("NUM_THREADS");n++){
00074     allocate1(double,work[n],work_size);
00075 //     work[n]=new double[work_size]; MEM_
00076     zero_arr(work[n],work_size);
00077   }
00078   mem->add_allocated_memory(to_MB(work_size));
00079 }
00080 
00081 void CCBLAS::allocate_buffer()
00082 {
00083   // Make sure buffer is empty
00084   if(!buffer.empty())
00085     for(int n=0;n<buffer.size();n++)
00086       if(buffer[n]!=NULL)
00087         delete[] buffer[n];
00088 
00089   for(int n=0;n<options->get_int_option("NUM_THREADS");n++)
00090     buffer.push_back(NULL);
00091   // Compute the temporary buffer space size, 101% of the actual strip size
00092   buffer_size = 1.01*mem->get_integral_strip_size() / to_MB(1);
00093   // Allocate the temporary buffer space
00094   for(int n=0;n<options->get_int_option("NUM_THREADS");n++){
00095     buffer[n]=new double[buffer_size];
00096     zero_arr(buffer[n],buffer_size);
00097   }
00098   mem->add_allocated_memory(to_MB(buffer_size));
00099 }
00100 
00101 void CCBLAS::free_sortmap()
00102 {
00103   for(SortMap::iterator iter=sortmap.begin();iter!=sortmap.end();++iter){
00104     for(int irrep=0;irrep<moinfo->get_nirreps();irrep++)
00105       delete[] iter->second[irrep];
00106     delete[] iter->second;
00107   }
00108 }
00109 
00110 void CCBLAS::free_work()
00111 {
00112   // Delete the temporary work space
00113   for(int n=0;n<work.size();n++){
00114     if(work[n]!=NULL){
00115       release1(work[n]);
00116 //        delete[] work[n]; MEM_
00117     }
00118   }
00119 }
00120 
00121 void CCBLAS::free_buffer()
00122 {
00123   // Delete the temporary buffer space
00124   for(int n=0;n<buffer.size();n++){
00125     if(buffer[n]!=NULL){
00126       delete[] buffer[n];
00127     }
00128   }
00129 }
00130 
00131 void CCBLAS::free_matrices()
00132 {
00133   for(MatrixMap::iterator iter=matrices.begin();iter!=matrices.end();++iter){
00134     delete iter->second;
00135   }
00136 }
00137 
00138 void CCBLAS::free_indices()
00139 {
00140   for(IndexMap::iterator iter=indices.begin();iter!=indices.end();++iter){ 
00141     delete iter->second;
00142   }
00143 }
00144 
00145 void CCBLAS::add_indices()
00146 {
00147   add_index("[]");
00148   add_index("[o]");
00149   add_index("[v]");
00150   add_index("[a]");
00151   add_index("[o>o]");
00152   add_index("[v>v]");
00153   add_index("[v>=v]");
00154   add_index("[oo]");
00155   add_index("[ov]");
00156   add_index("[vo]");
00157   add_index("[vv]");
00158   add_index("[aa]");
00159   add_index("[aaa]");
00160   add_index("[ooo]");
00161   add_index("[oov]");
00162   add_index("[voo]");
00163   add_index("[ovv]");
00164   add_index("[vvo]");
00165   add_index("[ovo]");
00166   if(options->get_str_option("CORR_WFN")!="PT2"){
00167     add_index("[vvv]");
00168   }
00169 
00170   // Mk-MRPT2
00171   add_index("[ao]");
00172   add_index("[av]");
00173 
00174   // Not useful
00175   add_index("[oa]");
00176   add_index("[va]");
00177 
00178 
00179 }
00180 
00181 // void CCBLAS::allocate_matrices_in_core()
00182 // {
00183 //   for(MatrixMap::iterator iter=matrices.begin();iter!=matrices.end();++iter){
00184 //     CCMatrix* Matrix = iter->second;
00185 //     fprintf(outfile,"\n%s(analyzing)",Matrix->get_label().c_str());
00186 //     fflush(outfile);
00187 //     if(Matrix->get_out_of_core()){
00188 //       Matrix->load();
00189 //       fprintf(outfile,"\n%s <- reading from disk",Matrix->get_label().c_str());
00190 //       fflush(outfile);
00191 //     }else if(!Matrix->is_allocated())
00192 //       Matrix->allocate_memory();
00193 //   }
00194 // }
00195 
00196 void CCBLAS::print(char* cstr)
00197 {
00198   string str(cstr);
00199   vector<string> names = moinfo->get_matrix_names(str);
00200   for(int n=0;n<names.size();n++)
00201     print_ref(names[n]);
00202 }
00203 
00204 void CCBLAS::print_ref(string& str)
00205 {
00206   get_Matrix(str)->print();
00207 }
00208 
00209 void CCBLAS::print_memory()
00210 {
00211   double total_memory_required =0.0;
00212   fprintf(outfile,"\n\n\t-----------------------------------------------------------------------------");
00213   fprintf(outfile,"\n\tMatrix ID    Memory(MB)   Cumulative Memory(MB)  Accessed    Label");
00214   fprintf(outfile,"\n\t------------------------------------------------------------------------------");
00215 
00216   for(MatrixMap::iterator iter=matrices.begin();iter!=matrices.end();++iter){
00217     total_memory_required += iter->second->get_memory();
00218     fprintf(outfile,"\n\t%4d",distance(matrices.begin(),iter));
00219     fprintf(outfile,"     %10.2f",iter->second->get_memory());
00220     fprintf(outfile,"        %10.2f",total_memory_required);
00221     fprintf(outfile,"             %4d",iter->second->get_naccess());
00222     fprintf(outfile,"         %s",iter->second->get_label().c_str());
00223   }
00224   fprintf(outfile,"\n\t------------------------------------------------------------------------------");
00225   fprintf(outfile,"\n\n\tTotal memory required for matrices = %10.2f (Mb)\n",total_memory_required);
00226 
00227   total_memory_required =0.0;
00228 
00229   fprintf(outfile,"\n\n\t-------------------------------------------------------------");
00230   fprintf(outfile,"\n\tIndex ID    Memory(MB)   Cumulative Memory(MB)     Label");
00231   fprintf(outfile,"\n\t--------------------------------------------------------------");
00232 
00233   for(IndexMap::iterator iter=indices.begin();iter!=indices.end();++iter){
00234     total_memory_required += iter->second->get_memory();
00235     fprintf(outfile,"\n\t%4d",distance(indices.begin(),iter));
00236     fprintf(outfile,"     %10.2f",iter->second->get_memory());
00237     fprintf(outfile,"        %10.2f",total_memory_required);
00238     fprintf(outfile,"         %s",iter->second->get_label().c_str());
00239   }
00240   fprintf(outfile,"\n\t--------------------------------------------------------------");
00241 
00242   fprintf(outfile,"\n\n\tTotal memory required for indexing = %10.2f (Mb)\n",total_memory_required);
00243 }
00244 
00248 int CCBLAS::compute_storage_strategy()
00249 {
00250   fprintf(outfile,"\n#CC ----------------------------------");
00251   fprintf(outfile,"\n#CC    Computing Storage Strategy");
00252   fprintf(outfile,"\n#CC ----------------------------------");
00253 
00254   double available_memory     = mem->get_free_memory();
00255   double fraction_for_in_core = 0.97; // Fraction of the total available memory that may be used
00256   double storage_memory       = available_memory * fraction_for_in_core;
00257   double fully_in_core_memory = 0.0;
00258   double integrals_memory     = 0.0;
00259   double fock_memory          = 0.0;
00260   double others_memory        = 0.0;
00261 
00262   fprintf(outfile,"\n#CC Input memory                                = %10.2f Mb",mem->get_total_memory());
00263   fprintf(outfile,"\n#CC Free memory                                 = %10.2f Mb",available_memory);
00264   fprintf(outfile,"\n#CC Free memory available for matrices          = %10.2f Mb (%3.0f%s of free_memory)",storage_memory,fraction_for_in_core*100.0,"%");
00265 
00266   // Gather the memory requirements for all the CCMAtrix object
00267   // and divide the integrals from all the other matrices.
00268   // At the same time compute the memory requirements for
00269   // a fully in-core algorithm.
00270   vector<pair<double,pair<CCMatrix*,int> > > integrals;
00271   vector<pair<double,pair<CCMatrix*,int> > > fock;
00272   vector<pair<double,pair<CCMatrix*,int> > > others;
00273   for(MatrixMap::iterator it=matrices.begin();it!=matrices.end();++it){
00274     for(int h=0;h<moinfo->get_nirreps();++h){
00275       double block_memory = it->second->get_memorypi(h);
00276       if(it->second->is_integral()){
00277         integrals.push_back(make_pair(block_memory,make_pair(it->second,h)));
00278         integrals_memory += block_memory;
00279       }else if(it->second->is_fock()){
00280         fock.push_back(make_pair(block_memory,make_pair(it->second,h)));
00281         fock_memory += block_memory;
00282       }else{
00283         others.push_back(make_pair(block_memory,make_pair(it->second,h)));
00284         others_memory += block_memory;
00285       }
00286       fully_in_core_memory += block_memory;
00287     }
00288   }
00289   fprintf(outfile,"\n#CC Memory required by fock matrices            = %10.2f Mb",fock_memory);
00290   fprintf(outfile,"\n#CC Memory required by integrals                = %10.2f Mb",integrals_memory);
00291   fprintf(outfile,"\n#CC Memory required by other matrices           = %10.2f Mb",others_memory);
00292   fprintf(outfile,"\n#CC Memory required for full in-core algorithm  = %10.2f Mb",fully_in_core_memory);
00293 
00294   // Check if you may use a fully in core algorithm
00295   full_in_core = false;
00296   int strategy = 0;
00297   if(fully_in_core_memory < storage_memory ){
00298     full_in_core = true;
00299     fprintf(outfile,"\n#CC PSIMRCC will perform a full in-core computation");
00300     strategy = 0;
00301   }else{
00302     if(others_memory < storage_memory ){
00303       fprintf(outfile,"\n#CC PSIMRCC will store some integrals out-of-core");
00304       strategy = 1;
00305     }else{
00306       fprintf(outfile,"\n#CC PSIMRCC will store all integrals and some other matrices out-of-core");
00307       strategy = 2;
00308       fprintf(outfile,"\n#CC CCBLAS::compute_storage_strategy(): Strategy #2 is not implemented yet");
00309       fflush(outfile);
00310       exit(1);
00311     }
00312   }
00313   sort(integrals.begin(),integrals.end());
00314   sort(others.begin(),others.end());
00315   for(int i=0;i<fock.size();i++){
00316     // Store all the fock matrices in core and allocate them
00317     storage_memory -= fock[i].first;
00318     load_irrep(fock[i].second.first,fock[i].second.second);
00319   }
00320   // Let the CCBlas class worry about allocating matrices
00321   int number_of_others_on_disk = 0;
00322   for(int i=0;i<others.size();i++){
00323     // Check if this matrix can be stored in core
00324     if(others[i].first < storage_memory){
00325       storage_memory -= others[i].first;
00326       load_irrep(others[i].second.first,others[i].second.second);
00327     }else{
00328       number_of_others_on_disk++;
00329     }
00330   }
00331   int number_of_integrals_on_disk = 0;
00332   for(int i=0;i<integrals.size();i++){
00333     // Check if this matrix can be stored in core
00334     if(integrals[i].first < storage_memory){
00335       storage_memory -= integrals[i].first;
00336       load_irrep(integrals[i].second.first,integrals[i].second.second);
00337     }else{
00338       number_of_integrals_on_disk++;
00339     }
00340   }
00341 
00342   DEBUGGING(1,
00343     fprintf(outfile,"\n#CC -------------------- Fock matrices -------------------------");
00344     for(int i=0;i<fock.size();i++){
00345       if(fock[i].first > 1.0e-5){
00346         fprintf(outfile,"\n#CC %-32s irrep %d   %6.2f Mb --> ",fock[i].second.first->get_label().c_str(),
00347                                                       fock[i].second.second,
00348                                                       fock[i].first);
00349         fprintf(outfile,"%s",fock[i].second.first->is_block_allocated(fock[i].second.second) ? "in-core" : "out-of-core");
00350       }
00351     }
00352     fprintf(outfile,"\n#CC -------------------- Other matrices ------------------------");    
00353     for(int i=0;i<others.size();i++){
00354       if(others[i].first > 1.0e-5){
00355         fprintf(outfile,"\n#CC %-32s irrep %d   %6.2f Mb --> ",others[i].second.first->get_label().c_str(),
00356                                                       others[i].second.second,
00357                                                       others[i].first);
00358         fprintf(outfile,"%s",others[i].second.first->is_block_allocated(others[i].second.second) ? "in-core" : "out-of-core");
00359       }
00360     }
00361     fprintf(outfile,"\n#CC -------------------- Integrals -----------------------------");
00362     for(int i=0;i<integrals.size();i++){
00363       if(integrals[i].first > 1.0e-5){
00364         fprintf(outfile,"\n#CC %-32s irrep %d   %6.2f Mb --> ",integrals[i].second.first->get_label().c_str(),
00365                                                       integrals[i].second.second,
00366                                                       integrals[i].first);
00367         fprintf(outfile,"%s",integrals[i].second.first->is_block_allocated(integrals[i].second.second) ? "in-core" : "out-of-core");
00368       }
00369     }
00370     fprintf(outfile,"\n\n");
00371   );
00372 
00373   if(!full_in_core){
00374     fprintf(outfile,"\n#CC Out-of-core algorithm will store %d other matrices on disk",number_of_others_on_disk);
00375     fprintf(outfile,"\n#CC Out-of-core algorithm will store %d integrals on disk",number_of_integrals_on_disk);
00376   }
00377   return(strategy);
00378 }
00379 
00383 void CCBLAS::show_storage()
00384 {
00385   DEBUGGING(1,
00386     fprintf(outfile,"\n#CC ----------------------------------");
00387     fprintf(outfile,"\n#CC            Show Storage ");
00388     fprintf(outfile,"\n#CC ----------------------------------");
00389 
00390     for(MatrixMap::iterator it=matrices.begin();it!=matrices.end();++it){
00391       for(int h=0;h<moinfo->get_nirreps();++h){
00392         double block_memory = it->second->get_memorypi(h);
00393         fprintf(outfile,"\n#CC %-32s irrep %d   %6.2f Mb",it->second->get_label().c_str(),h,block_memory);
00394         fprintf(outfile," is %s",it->second->is_block_allocated(h) ? "allocated" : "not allocated");
00395         fprintf(outfile,"%s",it->second->is_out_of_core(h) ? "(out-of-core)" : "");
00396   
00397       }
00398     }
00399   )
00400 }
00401 
00402 }} /* End Namespaces */

Generated on Wed Feb 13 16:35:39 2008 for PSI by  doxygen 1.5.4