let3d_mpi.cpp

Go to the documentation of this file.
00001 /* Kernel Independent Fast Multipole Method
00002    Copyright (C) 2004 Lexing Ying, New York University
00003 
00004 This program is free software; you can redistribute it and/or modify
00005 it under the terms of the GNU General Public License as published by
00006 the Free Software Foundation; either version 2, or (at your option)
00007 any later version.
00008 
00009 This program is distributed in the hope that it will be useful, but WITHOUT
00010 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00011 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00012 for more details.
00013 
00014 You should have received a copy of the GNU General Public License
00015 along with this program; see the file COPYING.  If not, write to the Free
00016 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
00017 02111-1307, USA.  */
00018 #include "let3d_mpi.hpp"
00019 
00020 using std::min;
00021 using std::max;
00022 using std::set;
00023 using std::queue;
00024 using std::ofstream;
00025 using std::cerr;
00026 using std::endl;
00027 
00028 //-----------------------------------------------------------------------
00029 Let3d_MPI::Let3d_MPI(const string& p):
00030   ComObject_MPI(p), _srcPos(NULL), _trgPos(NULL), _ctr(0.0), _rootLevel(0),
00031   _ptsMax(150), _maxLevel(10)
00032 {
00033 }
00034 
00035 Let3d_MPI::~Let3d_MPI()
00036 {
00037 }
00038 
00039 // ----------------------------------------------------------------------
00040 /* Setup builds the basic structures needed for processing source data and building target data */
00041 #undef __FUNCT__
00042 #define __FUNCT__ "Let3d_MPI::setup"
00043 int Let3d_MPI::setup()
00044 {
00045   //begin
00046   //--------
00047   PetscTruth flg = PETSC_FALSE;
00048   /* Get the maximum number of points per leaf level box and max number of levels in the tree */
00049   pC( PetscOptionsGetInt( prefix().c_str(), "-ptsMax",   &_ptsMax,   &flg) ); pA(flg==true);
00050   pC( PetscOptionsGetInt( prefix().c_str(), "-maxLevel", &_maxLevel, &flg) ); pA(flg==true);
00051   //--------
00052   pC( srcData() );  
00053   pC( trgData() );  
00054   return(0);
00055 }
00056 
00057 // ----------------------------------------------------------------------
00058 /* This function computes variables for source data computation
00059  * as well as constructing and partitioning the local essential tree
00060  * among processors
00061  */
00062 #undef __FUNCT__
00063 #define __FUNCT__ "Let3d_MPI::srcData"
00064 int Let3d_MPI::srcData()
00065 {
00066   /* Set the sample DENSITY to 1/10 th of the original positions */
00067   double DENSITY = 1.0/10.0;
00068   
00069   /* sample data with density - local sample positions vector */
00070   vector<Point3> lclSamPosVec;
00071   {
00072          Vec pos = _srcPos;
00073     double* posArr; pC( VecGetArray(pos, &posArr) );
00074          /* number of local positions for this processor */
00075          int numLclPos = procLclNum(pos);
00076          /* number of local sample positions for this processor */
00077          int numLclSamPos = int(ceil(procLclNum(pos) * DENSITY));
00078          /* sample a fraction of the original sources and
00079           * store these points in lclSamPosVec - local sample positions vector */
00080          for(int k=0; k<numLclSamPos; k++) {
00081                 int tmpIdx = int(floor(drand48() * numLclPos));
00082                 Point3 tmppt(posArr + tmpIdx*dim());
00083                 lclSamPosVec.push_back( tmppt );
00084          }
00085          pA(lclSamPosVec.size()==numLclSamPos); /* Verify that the local sample positions vector is the size we want */
00086          pC( VecRestoreArray(pos, &posArr) ); /* Restore the full global vector of positions */
00087   } /* end of this scope - psArr, numLclcPos, numLclSamPos, pos no longer available */
00088 
00089   
00090   
00091   /* all procs send sample positions to proc 0 */
00092   vector<Point3> allSamPosVec; /* will store all sample positions for all processors */
00093   {
00094          int numLcl = lclSamPosVec.size(); /* number = numLclSamPos */
00095          int sizLcl = numLcl * dim(); /* local size = number of local sample positions times the dimension (3) */
00096 
00097          /* vector storing size of local vectors.  i.e., if mpisize()==2
00098           * sizLclVec is a vector of length 2, where sizLclVec[i] stores the
00099           * number of sample positions at the processor of rank i */
00100          vector<int> sizLclVec(mpiSize()); 
00101          /* MPI_Gather information:  http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_Gather.html
00102          * Here, we're gathering all sizLcl values at the buffer located in memory at &(sizLclVec[0]) onr int at a time */
00103          pC( MPI_Gather(&sizLcl, 1, MPI_INT, &(sizLclVec[0]), 1, MPI_INT, 0, mpiComm()) );
00104          
00105          int sizGlb = 0; /* sizGlb will collect all sizLcl values */
00106          for(int i=0; i<sizLclVec.size(); i++){
00107                 sizGlb += sizLclVec[i];
00108          }
00109          /* number of global sample positions for all processors */
00110          int numGlb = sizGlb / dim(); 
00111          /* displacement global vector - tells how far into allSamPosVec need to go to get to processor i's samples */
00112          vector<int> disGlbVec(mpiSize()); 
00113          int cnt = 0;
00114          for(int i=0; i<sizLclVec.size(); i++) {
00115                 disGlbVec[i] = cnt;
00116                 cnt += sizLclVec[i];     }
00117          pA(cnt==sizGlb);
00118          
00119          if(mpiRank()==0)               allSamPosVec.resize(numGlb); /* allSamPosVec stores numGlb points of type Point3 */
00120          /* Information on MPI_Gatherv http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_Gatherv.html
00121          * Like MPI_Gather - all lclSamPosVec points are sent to the allSamPosVec vector.  sizLclVec[i] and disGlbVec[i] will
00122          * indicate how to navigate allSamPosVec for processor i */
00123          pC( MPI_Gatherv(&(lclSamPosVec[0]), sizLcl, MPI_DOUBLE, &(allSamPosVec[0]), &(sizLclVec[0]), &(disGlbVec[0]), MPI_DOUBLE, 0, mpiComm()) );
00124          
00125          lclSamPosVec.clear();
00126   } /* end of this scope - sizLclcVec and disGlbVec no longer available */
00127   
00128   /* processor 0  builds the tree 
00129         * The tree is build based on sample positions since we will
00130         * have a max number of points per box.  Sampling positions for
00131         * LET building makes the build faster */
00132   vector<Node>& nodeVec = this->nodeVec(); nodeVec.clear();
00133   if(mpiRank()==0) {
00134          /* vector of vector of indices */
00135          vector< vector<int> > vecIdxs;
00136          /* local source number - number of source points in each box */
00137          vector<int> lclSrcNumVec;
00138 
00139          /* Push the root node.  No parent or child, path2Node = (0,0,0), depth 0 */
00140          nodeVec.push_back( Node(-1,-1,Index3(0,0,0), 0) );
00141          vecIdxs.push_back( vector<int>() );
00142          /* vecIdxs[0] will store all indices put into curVecIdxs */
00143          vector<int>& curVecIdxs = vecIdxs[0];
00144          Point3 bbmin(ctr()-Point3(radius()));
00145          Point3 bbmax(ctr()+Point3(radius()));
00146          /* For all sample points, make sure that it is within the root's radius distance from the center of the tree */
00147          for(int k=0; k<allSamPosVec.size(); k++) {
00148                 Point3 tmp(allSamPosVec[k]);
00149                 /* Assertion kills implementation at runtime of point not within prescribed box */
00150                 pA( tmp>=bbmin && tmp<=bbmax );
00151                 curVecIdxs.push_back(k);
00152          }
00153          /* the local number of sources vector stores the size of all sample points in the 0 position for the root */
00154          lclSrcNumVec.push_back( curVecIdxs.size() );
00155 
00156          /* Sample points max number of points per leaf box */
00157          int PTSMAX = (int)ceil(ptsMax() * DENSITY);
00158          
00159          int level = 0;
00160          int arrBeg = 0;
00161          int arrEnd = 1;
00162          while(arrBeg < arrEnd) {
00163                 int arrCnt = arrEnd;
00164                 /* In this for loop, we build each level as necessary
00165                 * arrBeg will be set to the number of points already parsed after this for loop*/
00166                 for(int k=arrBeg; k<arrEnd; k++) {
00167                   //--
00168                   /* lclSrcNumVec[0] is already set to the size of all of the sample positions within radius of the center
00169                   * So, lclSrcNumVec[k] at each level is set such that the maximum number of points is in the subsequent
00170                   * children boxes at a maximum level */
00171                   if( lclSrcNumVec[k]>PTSMAX && level<maxLevel()-1) {
00172                          /* the current node's child locator is set to arrCnt (the number of boxes already built)
00173                           * For example, when k = 0, arrCnt = 1.  The location of the root's lead child */
00174                          nodeVec[k].chd() = arrCnt;
00175                          /* Increment arrCnt by 8 */
00176                          arrCnt = arrCnt + pow2(dim());
00177                          /* Build all 8 of nodeVec[k]'s children */
00178                          for(int a=0; a<2; a++) {
00179                                 for(int b=0; b<2; b++) {
00180                                   for(int c=0; c<2; c++) {
00181                                          /* Create a new node with parent at location "k"
00182                                           * child initially set to -1 (changed when new node is looked at)
00183                                           * path set to 2*"k's path" + the binary index of the new node's location relative to k
00184                                           * depth is set to "k's" depth + 1
00185                                           */
00186                                          nodeVec.push_back( Node(k,-1, 2*nodeVec[k].path2Node()+Index3(a,b,c), nodeVec[k].depth()+1) );
00187                                          /* push a new vector of integers onto vecIdxs */
00188                                          vecIdxs.push_back( vector<int>() );
00189                                          /* Push an extra level onto lclSrcNumVec - set to zero for now and will be changed later */
00190                                          lclSrcNumVec.push_back( 0 );
00191                                   }
00192                                 }
00193                          }
00194                          /* children's vector of indices */ 
00195                          Point3 curctr( center(k) ); /* get center of current node */
00196                          /* vecIdxs[k] stores indices of sample points that node k has in it
00197                           * for vecIdxs[0], all of the sample points indices are stored */
00198                          for(vector<int>::iterator pi=vecIdxs[k].begin(); pi!=vecIdxs[k].end(); pi++) {
00199                                 /* Build a point out of the index *pi which is in the current node's vector of indices */
00200                                 Point3 tmp( allSamPosVec[*pi] );
00201                                 Index3 idx;
00202                                 /* Build an index based on the point tmp's location relative to the center of the node k */
00203                                 for(int j=0; j<dim(); j++)
00204                                   idx(j) = (tmp(j) >= curctr(j));
00205                                 /* return the index of the child of node k located at idx */
00206                                 int chdgNodeIdx = child(k, idx);
00207                                 /* put the point references by *pi into the index of points that the node at chgNodeIdx has access to */
00208                                 vecIdxs[chdgNodeIdx].push_back(*pi);
00209                          }
00210                          vecIdxs[k].clear(); //VERY IMPORTANT
00211                          /* children's lclSrcNum */
00212                          for(int a=0; a<2; a++) {
00213                                 for(int b=0; b<2; b++) {
00214                                   for(int c=0; c<2; c++) {
00215                                          int chdgNodeIdx = child( k, Index3(a,b,c) );
00216                                          /* the local number of sources for the node at chdgNodeIdx is the size of its vector of indices */
00217                                          lclSrcNumVec[chdgNodeIdx] = vecIdxs[chdgNodeIdx].size();
00218                                   }
00219                                 } 
00220                          }
00221                   } /* end of if */
00222                 } /* end of for */
00223                 /* the previous level has been built so increment the levels and update arr counters */
00224                 level++;
00225                 arrBeg = arrEnd;
00226                 arrEnd = arrCnt;
00227          } /* end of while */
00228          
00229          allSamPosVec.clear(); //SAVE SPACE
00230   } /* End of building the tree on processor zero */
00231   /* end of this scope.  everything in this scope, including lclSrcNumVec and vecIdxs no longer available */
00232   
00233   /* processor 0 sends the tree to all processors */
00234   {
00235          int size = nodeVec.size();
00236          /* Broadcast the size of NodeVec to all processes.  Then, brodcast
00237           * the address of the beginning of nodeVec to all processor.
00238           * More information at 
00239           * http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_Bcast.html
00240           */
00241          pC( MPI_Bcast(&size, 1, MPI_INT, 0, mpiComm()) );
00242          /* Nust be resized for parallel operation, so that all nodeVec's of same size.  Does nothing for uniprocessor operation */
00243          nodeVec.resize(size, Node(0,0,Index3(0,0,0),0));
00244          pC( MPI_Bcast(&(nodeVec[0]), size*sizeof(Node), MPI_CHAR, 0, mpiComm()) );
00245   }
00246 
00247   /* Each processor puts its own srcdata into the tree  - new scope */
00248   /* local source number - number of source points in each box; different than above in differnent scope */
00249   vector<int> lclSrcNumVec;      lclSrcNumVec.resize(nodeVec.size(), 0);
00250   /* new vecIdxs - previous one lost in previous scope.  Here vecIdxs[i] stores all indices for source positions
00251         * which are available to node i.  For example, vecIdxs[i].begin() to vecIdxs[i].end() store the indices of points
00252         * in node i or its descendants.  So, vecIdxs[0] stores the vector of indices of all points in the LET */
00253   vector< vector<int> > vecIdxs;         vecIdxs.resize( nodeVec.size() );
00254   { 
00255          /* get all source positions */
00256          Vec pos = _srcPos;
00257          double* posArr;         pC( VecGetArray(pos, &posArr) );
00258          /* get the number of positions for local porocessor */
00259          int numLclPos = procLclNum(pos);
00260          /* get the range of indices that this processor is responsible for */
00261          int begLclPos, endLclPos;  procLclRan(pos, begLclPos, endLclPos);
00262 
00263          /* push all local indices from this processor's range into current indices list */
00264          vector<int>& curVecIdxs = vecIdxs[0];
00265          Point3 bbmin(ctr()-Point3(radius()));
00266          Point3 bbmax(ctr()+Point3(radius()));  
00267          for(int k=begLclPos; k<endLclPos; k++) {
00268                 Point3 tmp(posArr+(k-begLclPos)*dim());
00269                 pA(tmp>=bbmin && tmp<=bbmax);    /* Assert this point is within the desired radius of the center */
00270                 curVecIdxs.push_back(k);
00271          }
00272          /* lclSrcNumVec[0] stores number of all indices of points available to root of tree for this processor
00273           * Specifically, the number of points this processor is in charge of storing in LET */
00274          lclSrcNumVec[0] = curVecIdxs.size();
00275 
00276          /* ordVec - ordering of the boxes in down->up fashion */
00277          vector<int> ordVec;     pC( dwnOrderCollect(ordVec) );
00278          
00279          for(int i=0; i<ordVec.size(); i++) {
00280                 int curgNodeIdx = ordVec[i]; /* current node index */
00281                 /* store all indices put into curVecIdxs into the current Node's vector of indices */
00282                 vector<int>& curVecIdxs = vecIdxs[curgNodeIdx];
00283 
00284                 /* If the current node is NOT a leaf (i.e., has children in the LET)
00285                  * then go through current vector of indices at build its childrens'
00286                  * vector of indices */
00287                 if(terminal(curgNodeIdx)==false) {
00288                   Point3 curctr( center(curgNodeIdx) );
00289                   for(vector<int>::iterator pi=curVecIdxs.begin(); pi!=curVecIdxs.end(); pi++) {
00290                          Point3 tmp(posArr+(*pi-begLclPos)*dim());
00291                          Index3 idx;
00292                          for(int j=0; j<dim(); j++) {
00293                                 idx(j) = (tmp(j)>=curctr(j));
00294                          }
00295                          int chdgNodeIdx = child(curgNodeIdx, idx);
00296                          vector<int>& chdVecIdxs = vecIdxs[chdgNodeIdx];
00297                          chdVecIdxs.push_back(*pi);
00298                   }
00299                   curVecIdxs.clear(); //VERY IMPORTANT
00300                   /* For all of the current Node's children, put the vector of indices
00301                         * into the local source number vector */
00302                   for(int a=0; a<2; a++) {
00303                          for(int b=0; b<2; b++) {
00304                                 for(int c=0; c<2; c++) {
00305                                   int chdgNodeIdx = child(curgNodeIdx, Index3(a,b,c));
00306                                   lclSrcNumVec[chdgNodeIdx] = vecIdxs[chdgNodeIdx].size();
00307                                 }
00308                          }
00309                   }
00310                 } /* end if */
00311          } /* end for loop through downward ordering of the nodes in the LET */
00312          pC( VecRestoreArray(pos, &posArr) );
00313   } /* At the end of this scope, we've built lclSrcNumVec and VecIdxs for this processor */
00314   
00315   /* decide owner of each node in LET by mpi_operator max */
00316   vector<int> ownVec;  ownVec.resize(nodeVec.size(), -1);
00317   vector<int> gsmVec;  gsmVec.resize(nodeVec.size(), 0);
00318   {
00319          /* max to gsmVec */
00320          vector<double> tmpVec;  tmpVec.resize(nodeVec.size(), 0);
00321          double extra = 1.0 - double(mpiRank()+1) / double(mpiSize());
00322          /* If lclSrcNumVec for some node is zero, then set tmpVec[i] to zero
00323           * Otherwise, set it to lclSrcNumVec there and add extra based on the processor rank */
00324          for(int i=0; i<tmpVec.size(); i++) {
00325                 if(lclSrcNumVec[i]==0) {
00326                   tmpVec[i] = 0;
00327                 }
00328                 else {
00329                   tmpVec[i] = lclSrcNumVec[i] + extra; /* extra is used to ? */
00330                 }
00331          }
00332 
00333          /* maxVec will store the maximum value among processors for a specific node in the LET.
00334           * That is, among N processors, if processor k owns more sources that will be allocated to
00335           * Node m, then maxVec[m] = tempVec[m] = lclSrcNumVec[m] + extra for processor k */
00336          vector<double> maxVec;  maxVec.resize(nodeVec.size(), 0);
00337          /* Combine values from all processors and then broadcast it back out to all processors
00338           * Here, take the max value among all processors from each tmpVec[i] and store it in maxVec[i]
00339           * See http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_Allreduce.html for more information */
00340          pC( MPI_Allreduce( &(tmpVec[0]), &(maxVec[0]), nodeVec.size(), MPI_DOUBLE, MPI_MAX, mpiComm() ) );
00341 
00342          /* Store in gsmVec the rank of the processor with the maximum number of local sources for a specific node */ 
00343          for(int i=0; i<maxVec.size(); i++)
00344                 if(maxVec[i]==tmpVec[i] && tmpVec[i]>0)
00345                   gsmVec[i] = mpiRank();
00346                 else
00347                   gsmVec[i] = -1; /* gsmVec contains part of the owner information */
00348 
00349          /* set owner - can be -1.  From gsmVec get which processors "owns" each node.  All processors have this information */
00350          pC( MPI_Allreduce( &(gsmVec[0]), &(ownVec[0]), nodeVec.size(), MPI_INT, MPI_MAX, mpiComm() ) );
00351          /* total number of each box.  Uses MPI_SUM as its operation handle.  All processors know the total number now */
00352          pC( MPI_Allreduce( &(lclSrcNumVec[0]), &(gsmVec[0]), nodeVec.size(), MPI_INT, MPI_SUM, mpiComm() ) );
00353 
00354          /* turn on the appropraite bits for each node */
00355          for(int i=0; i<nodeVec.size(); i++)
00356                 if(ownVec[i]==mpiRank()) /* This node is owned by this processor */
00357                   node(i).tag() |= LET_OWNRNODE;
00358          for(int i=0; i<nodeVec.size(); i++)
00359                 if(gsmVec[i]>0) /* This node has source points in it */
00360                   node(i).tag() |= LET_SRCENODE;
00361   } /* End of building gsmVec=number of sources for each node, ownVec=rank of node owner */
00362 
00363 
00364   
00365   /* based on the owner info, assign glbSrcNodeIdx, glbSrcExaBeg, glbSrcExaNum */
00366   {
00367          /* numNodeVec will store how many nodes are owned by each processor */
00368          vector<int> numNodeVec;         numNodeVec.resize(mpiSize(), 0);
00369          /* numNodeVec will store how many sources are owned by each processor */
00370          vector<int> numExaVec;  numExaVec.resize(mpiSize(), 0);
00371          for(int i=0; i<nodeVec.size(); i++) {
00372                 /* Assign owner to which rank owns nodeVec[i].  If none, then owner = -1 */
00373                 int owner = ownVec[i];
00374                 if(owner>=0) { /* Owner exists */
00375                   numNodeVec[owner] ++; /* Increment number of nodes processor "owner" has */
00376                   if(terminal(i)==true)
00377                          numExaVec[owner] += gsmVec[i]; /* Increment number of sources processor "owner" has */
00378                 }
00379          }       
00380 
00381          /* begNodeVec gives the beginning node index for each processor */
00382          vector<int> begNodeVec;         begNodeVec.resize(mpiSize(), 0);
00383          /* begNodeVec gives the beginning source index for each processor */
00384          vector<int> begExaVec;  begExaVec.resize(mpiSize(), 0);
00385          int disNode = 0; /* a displacement counter for nodes */
00386          int disExa = 0; /* a displacement counter for sources */
00387          /* For each processor set where that processor begins "ownership" in the indices */
00388          for(int i=0; i<mpiSize(); i++) { 
00389                 begNodeVec[i] = disNode;
00390                 disNode += numNodeVec[i];
00391                 begExaVec[i] = disExa;
00392                 disExa += numExaVec[i];
00393                  }
00394 
00395          /* For each node, the following sets a way to index the nodes and sources using parameters set in each node
00396           * and available globally */
00397          for(int i=0; i<nodeVec.size(); i++) {
00398                 /* Find the owner of node(i) */
00399                 int owner = ownVec[i];
00400                 if(owner>=0) {
00401                   /* Set the global source node index for node(i) to begNodeVec[owner] and increment begNodeVec[owner] */
00402                   node(i).glbSrcNodeIdx() = begNodeVec[owner];
00403                   begNodeVec[owner] ++;
00404                   /* set "exact" source position variables for terminal/leaf nodes */
00405                   if(terminal(i)==true) {
00406                          /* beginning index of source positions */
00407                          node(i).glbSrcExaBeg() = begExaVec[owner];
00408                          /* exact number of sources */
00409                          node(i).glbSrcExaNum() = gsmVec[i]; 
00410                          begExaVec[owner] += gsmVec[i];
00411                   }     
00412                 }
00413                 /* If this node has no owner, then there are no sources in it */
00414                 else { 
00415                   node(i).glbSrcNodeIdx() = -1; 
00416                 }
00417          }
00418 
00419          /* _glbSrcNodeCnt is the total number of nodes "owned" by a processor (i.e., nodes with sources in them where ownVec != -1 */
00420          _glbGlbSrcNodeCnt = 0;
00421          for(int i=0; i<ownVec.size(); i++){
00422                 if(ownVec[i]>=0)
00423                   _glbGlbSrcNodeCnt++;
00424          }
00425          /* global count of exact number of sources is just the size of _srcPos */
00426          _glbGlbSrcExaCnt = procGlbNum(_srcPos);
00427          /* local number of nodes from the global count is the the number of nodes for this processor */
00428          _lclGlbSrcNodeCnt = numNodeVec[mpiRank()];
00429          /* local number of sources from the global count is the the number of sources for this processor */
00430          _lclGlbSrcExaCnt = numExaVec[mpiRank()];
00431 
00432          /* ownVec and gsmVec no longer needed, so they are cleared */
00433          ownVec.clear();
00434          gsmVec.clear();
00435   }
00436   
00437   /* mpi_scan local number info to accumulative number info and write ctbSrcNodeIdx, ctbSrcExaBeg, ctbSrcExanum */
00438   {
00439          vector<int> accVec;     accVec.resize(nodeVec.size(), 0);
00440          /* The following MPI_Scan puts in accVec[i] the total sum of lclSrcNumVec[i] ( i is a node in the LET )
00441           * of all previous processors.  For example, if lclSrcNumVec[i] = 7 on processor 1,
00442           *                                              lclSrcNumVec[i] = 3 on processor 2,
00443           *                                              lclSrcNumVec[i] = 8 on processor 3,
00444           * then, MPI_Scan computes the following:       accVec[i] = 7 on processor 1,
00445           *                                              accVec[i] = 10 on processor 2,
00446           *                                              accVec[i] = 18 on processor 3
00447           * For more information on MPI_Scan:
00448           * http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_Scan.html
00449           */
00450          pC( MPI_Scan(&(lclSrcNumVec[0]), &(accVec[0]), nodeVec.size(), MPI_INT, MPI_SUM, mpiComm()) );
00451          /* For each node on each processor, subtract lclSrcNumVec[i] from accVec[i], such that
00452           * accVec[i] contains the number of local sources for node i only on processors
00453           * of lower rank */
00454          for(int i=0; i<accVec.size(); i++){
00455                 accVec[i] -= lclSrcNumVec[i];
00456          }
00457 
00458          /* At end, will give total count of all nodes which have sources which contribute for this processor */
00459          _ctbSrcNodeCnt = 0;
00460          /* At end, will give total count of all source positions which contribute for this processor */
00461          _ctbSrcExaCnt = 0; 
00462          for(int i=0; i<nodeVec.size(); i++) {
00463                 if(lclSrcNumVec[i]>0) {
00464                   /* Turn on Contributor bit - this processor contributes to this node */
00465                   node(i).tag() |= LET_CBTRNODE;
00466                   /* Number of additional nodes (ancestors) thus far which contribute to this node with non-zero source numbers */
00467                   node(i).ctbSrcNodeIdx() = _ctbSrcNodeCnt; 
00468                   _ctbSrcNodeCnt ++;
00469                   /* push map for contributor node to global source node index */
00470                   _ctb2GlbSrcNodeMap.push_back( node(i).glbSrcNodeIdx() );
00471 
00472                   /* For leaves set beginning index and exact number for contributor source positions */
00473                   if(terminal(i)==true) { 
00474                          node(i).ctbSrcExaBeg() = _ctbSrcExaCnt;
00475                          node(i).ctbSrcExaNum() = lclSrcNumVec[i];
00476                          /* Accumulate sum of the exact number of positions that contribute for this processor */
00477                          _ctbSrcExaCnt += lclSrcNumVec[i];
00478                          /* Set node(i)'s list of contributing source positions for this processor */
00479                          node(i).ctbSrcOwnVecIdxs() = vecIdxs[i];
00480                          /* Create a mapping from contributor source positions to global source position indices */
00481                          for(int k=0; k < vecIdxs[i].size(); k++){
00482                                 _ctb2GlbSrcExaMap.push_back( node(i).glbSrcExaBeg()+accVec[i]+k );
00483                          }
00484                   }
00485                 }
00486          }
00487   }  
00488   return(0);
00489 }
00490 
00491 // ---------------------------------------------------------------------- 
00492 #undef __FUNCT__
00493 #define __FUNCT__ "Let3d_MPI::trgData"
00494 int Let3d_MPI::trgData()
00495 {
00496   /* each proc put its own target data into the tree */
00497   vector<Node>& nodeVec = this->nodeVec();
00498   vector<int> lclSrcNumVec;  lclSrcNumVec.resize(nodeVec.size(), 0);
00499   vector< vector<int> > vecIdxs;  vecIdxs.resize(nodeVec.size());
00500   {
00501          Vec pos = _trgPos;
00502          double* posArr;         pC( VecGetArray(pos, &posArr) );
00503          /* Get the number of positions for this processor */
00504          int numLclPos = procLclNum(pos);
00505          /* Get the beginning and ending range values this processor is responsible for */
00506          int begLclPos, endLclPos;  procLclRan(pos, begLclPos, endLclPos);
00507          
00508          /* Create vector of indices of points for root of the LET */
00509          vector<int>& curVecIdxs = vecIdxs[0];
00510          Point3 bbmin(ctr()-Point3(radius()));
00511          Point3 bbmax(ctr()+Point3(radius())); 
00512          for(int k=begLclPos; k<endLclPos; k++) {
00513                 Point3 tmp(posArr+(k-begLclPos)*dim());  
00514                 pA(tmp>=bbmin && tmp<=bbmax);    //LEXING: IMPORTANT : Asserts each point is within range of the center of the root
00515                 curVecIdxs.push_back(k);
00516          }
00517          /* local number of sources for the root */
00518          lclSrcNumVec[0] = curVecIdxs.size();
00519 
00520          /* ordVec - ordering of the boxes in down->up fashion */
00521          vector<int> ordVec;     pC( dwnOrderCollect(ordVec) );
00522          for(int i=0; i<ordVec.size(); i++) {
00523                 int curgNodeIdx = ordVec[i]; /* current node index */
00524                 vector<int>& curVecIdxs = vecIdxs[curgNodeIdx];
00525 
00526                 /* Do the following for non-leaf nodes */
00527                 if(terminal(curgNodeIdx)==false) {
00528                   Point3 curctr( center(curgNodeIdx) );
00529                   for(vector<int>::iterator pi=curVecIdxs.begin(); pi!=curVecIdxs.end(); pi++) {
00530                          /* Construct a point from the pointer as required for the current vector of indices
00531                           * Begins for root which is pre-built from above, and then all children indices of points will be built
00532                           * for use in as we traverse the ordVec list */
00533                          Point3 tmp(posArr+(*pi-begLclPos)*dim());
00534                          Index3 idx;
00535                          for(int j=0; j<dim(); j++) {
00536                                 idx(j) = (tmp(j)>=curctr(j));
00537                          }
00538                          /* Retrieve the child of the current node based on its location relative to the point tmp
00539                          * in order to find where the point *pi resides (which child node) and then push that point
00540                          * into the child's vector of indices
00541                          */
00542                          int chdgNodeIdx = child(curgNodeIdx, idx);
00543                          vector<int>& chdVecIdxs = vecIdxs[chdgNodeIdx];
00544                          chdVecIdxs.push_back(*pi);
00545                   } /* End of for loop for curVecIdxs */
00546                   curVecIdxs.clear(); /* VERY IMPORTANT */
00547                   /* For all 8 children of the current node, set the local number of sources based
00548                         * on the size of its vector of indices */
00549                   for(int a=0; a<2; a++) {
00550                          for(int b=0; b<2; b++) {
00551                                 for(int c=0; c<2; c++) {
00552                                   int chdgNodeIdx = child(curgNodeIdx, Index3(a,b,c));
00553                                   lclSrcNumVec[chdgNodeIdx] = vecIdxs[chdgNodeIdx].size();
00554                                 }
00555                          }
00556                   }
00557                 }
00558          } /* End of for loop for ordVec traversal */
00559          pC( VecRestoreArray(pos, &posArr) );
00560   } /* End of this scope:  vecIdxs and lclSrcNumVec built */
00561   
00562   /* write evaTrgNodeIdx, evaTrgExaBeg, evaTrgExaNum
00563         * Here, for all nodes, we look to see if the local processor actually stores
00564         * any local sources there such that we know if the local processor is responsible
00565         * for evaluations at these nodes for these points */
00566   {
00567          _evaTrgNodeCnt = 0;
00568          _evaTrgExaCnt = 0;
00569          for(int i=0; i<nodeVec.size(); i++) {
00570                 if(lclSrcNumVec[i]>0) {
00571                   node(i).tag() |= LET_EVTRNODE; //LEXING - Turn on evaluator built
00572                   node(i).evaTrgNodeIdx() = _evaTrgNodeCnt;
00573                   _evaTrgNodeCnt ++;
00574                   if(terminal(i)==true) {
00575                          node(i).evaTrgExaBeg() = _evaTrgExaCnt;
00576                          node(i).evaTrgExaNum() = lclSrcNumVec[i];
00577                          _evaTrgExaCnt += lclSrcNumVec[i];
00578                          node(i).evaTrgOwnVecIdxs() = vecIdxs[i];
00579                   }
00580                 }
00581          }
00582   }
00583   
00584   /* based on the owner info write usrSrcNodeIdx, usrSrcExaBeg, usrSrcExaNum
00585         * We will build the U,V,W, and X lists for each node in the LET, such that any box B
00586         * in these lists is used by the current processor.  That is, processor P is a "user" of B  */
00587   {
00588          for(int i=0; i<nodeVec.size(); i++){
00589                 if(lclSrcNumVec[i]>0) {
00590                   /* make sure that U,V,W, and X lists can be calculated/built properly.  See calGlbNodeLists(Node i) for more information */
00591                   pC( calGlbNodeLists(i) );
00592                   /* For all nodes in U,V,W, and X lists, turn on the bit to indicate that this processor
00593                         * uses these nodes for computation */
00594                   for(vector<int>::iterator vi=node(i).Unodes().begin(); vi!=node(i).Unodes().end(); vi++)
00595                          node(*vi).tag() |= LET_USERNODE;
00596                   for(vector<int>::iterator vi=node(i).Vnodes().begin(); vi!=node(i).Vnodes().end(); vi++)
00597                          node(*vi).tag() |= LET_USERNODE;
00598                   for(vector<int>::iterator vi=node(i).Wnodes().begin(); vi!=node(i).Wnodes().end(); vi++)
00599                          node(*vi).tag() |= LET_USERNODE;
00600                   for(vector<int>::iterator vi=node(i).Xnodes().begin(); vi!=node(i).Xnodes().end(); vi++)
00601                          node(*vi).tag() |= LET_USERNODE;
00602                 }
00603          }
00604 
00605          /* Count the number of nodes being used by the current processor
00606           * and build a map between how these nodes are number and the global node indices.
00607           * Do the same for the source positions available to this processor
00608           */
00609          _usrSrcNodeCnt = 0;
00610          _usrSrcExaCnt = 0;
00611          for(int i=0; i<nodeVec.size(); i++) {
00612                 if(node(i).tag() & LET_USERNODE) {
00613                   node(i).usrSrcNodeIdx() = _usrSrcNodeCnt;
00614                   _usrSrcNodeCnt ++;
00615                   _usr2GlbSrcNodeMap.push_back( node(i).glbSrcNodeIdx() );
00616                   if(terminal(i)==true) {
00617                          node(i).usrSrcExaBeg() = _usrSrcExaCnt;
00618                          node(i).usrSrcExaNum() = node(i).glbSrcExaNum();
00619                          _usrSrcExaCnt += node(i).glbSrcExaNum();
00620                          for(int k=0; k<node(i).glbSrcExaNum(); k++){
00621                                 _usr2GlbSrcExaMap.push_back( node(i).glbSrcExaBeg()+k );
00622                          }
00623                   }
00624                 }
00625          }
00626   } 
00627   
00628   return(0);
00629 } /* end of trgData()
00630 
00631 // ----------------------------------------------------------------------
00632 /* Build/Calculate the global node lists (U,V,W, and X lists) for a specific node */
00633 #undef __FUNCT__
00634 #define __FUNCT__ "Let3d_MPI::calGlbNodeLists"
00635 int Let3d_MPI::calGlbNodeLists(int gNodeIdx)
00636 {
00637   //begin
00638   /* We use sets here since the values will be unnecessary to keep associated with the keys.
00639         * Can also use a C++ map, but it is unnecessary here to maintain unique key/value pairs
00640         * See let3d_mpi.hpp for descriptions of what U,B,W, and X lsists are. */
00641   set<int> Uset, Vset, Wset, Xset;  
00642   int curgNodeIdx = gNodeIdx;
00643   /* Make sure current node is not the root as the root has no ancestors */
00644   if(root(curgNodeIdx)==false) {
00645          /* get parent of node we're interested in */
00646          int pargNodeIdx = parent(curgNodeIdx);
00647          Index3 minIdx(0); /* = (0,0,0) */
00648          Index3 maxIdx(pow2(depth(curgNodeIdx))); /* = (2^d, 2^d, 2^d) */
00649          
00650          /* Try several different paths.  Here we are looking for paths which
00651           * will lead to children of neighbors of the parent of the current node
00652           * in order to build U,V,W, and X lists */
00653          for(int i=-2; i<4; i++) {
00654                 for(int j=-2; j<4; j++) {
00655                   for(int k=-2; k<4; k++) {
00656                          /* New Path to try */
00657                          Index3 tryPath2Node( 2*path2Node(pargNodeIdx) + Index3(i,j,k) );
00658                          /* Verify new path does not exceed a maximu index and is greater than (0,0,0) and is not equal to current node's path */
00659                          if(tryPath2Node >= minIdx && tryPath2Node <  maxIdx && tryPath2Node != path2Node(curgNodeIdx)) {
00660                                 /* Look for the children of the neighbors of the current node's parent */
00661                                 int resgNodeIdx = findGlbNode(depth(curgNodeIdx), tryPath2Node);
00662                                 /* adj = true if nodes have at least one edge touching */
00663                                 bool adj = adjacent(resgNodeIdx, curgNodeIdx);
00664                                 /* When test node is higher in the tree than current node */
00665                                 if( depth(resgNodeIdx)<depth(curgNodeIdx) ) {
00666                                   /* If adjacent and current node is a leaf, put test node in the Uset if the current node is a leaf */
00667                                   if(adj){
00668                                          if(terminal(curgNodeIdx)){ /* need to test if res a leaf?? HARPER */
00669                                                 Uset.insert(resgNodeIdx);
00670                                          }
00671                                          else {;} /* non-leaves do not have U-lists */
00672                                   }
00673                                   else{
00674                                          /* The nodes are not adjacent, but resgNodeIdx is still a child of the current node's parent's neighbors.
00675                                           * Hence, the current node's parent is adjacent to resgNodeIdx.  IN general. the current node is in the
00676                                           * W-list of resgNodeIdx */
00677                                          Xset.insert(resgNodeIdx);
00678                                   }
00679                                 } /* End of "if depth(res) < depth(current) " */
00680                                 
00681                                 /* Current node and test node at same depth */
00682                                 if( depth(resgNodeIdx)==depth(curgNodeIdx) ) {
00683                                   /* Two nodes at same depth not adjacent */
00684                                   if(!adj) {
00685                                          Index3 bb(path2Node(resgNodeIdx)-path2Node(curgNodeIdx));
00686                                          /* Verify that no single component of the two paths exceed a limit */
00687                                          assert( bb.linfty()<=3 );
00688                                          /* resgNodeIdx is a child of the neighbor's of the currentr node's parents and not-adjacent to current node.
00689                                           * Hence, it is in the V-list */
00690                                          Vset.insert(resgNodeIdx);
00691                                   }
00692                                   /* nodes at same depth and are adjacent, so resgNodeIdx could be in U OR W lists */
00693                                   else {
00694                                          if(terminal(curgNodeIdx)) {
00695                                                 /* queue:  elements added/pushed to the back and removed/popped from the front */ 
00696                                                 queue<int> rest;
00697                                                 /* push resgNodeIdx into the queue */
00698                                                 rest.push(resgNodeIdx);
00699                                                 while(rest.empty()==false) {
00700                                                   int fntgNodeIdx = rest.front(); rest.pop(); /* Set front temp node index and pop the list */
00701                                                   /* If the current node and temp node are not adjacent, temp node is in W-list of current node */
00702                                                   if(adjacent(fntgNodeIdx, curgNodeIdx)==false) {
00703                                                          Wset.insert( fntgNodeIdx );
00704                                                   }
00705                                                   /* Current node and temp node ARE adjacent */
00706                                                   else {
00707                                                          /* If temp node is a leaf, it is in current node's U-list */
00708                                                          if(terminal(fntgNodeIdx)) {
00709                                                                 Uset.insert(fntgNodeIdx);
00710                                                          }
00711                                                          /* If temp node is not a leaf, then one of its descendants may be a leaf in the U or W lists of current node
00712                                                          * So, we push those into the queue
00713                                                          */
00714                                                          else { 
00715                                                                 for(int a=0; a<2; a++) {
00716                                                                   for(int b=0; b<2; b++) {
00717                                                                          for(int c=0; c<2; c++) {
00718                                                                                 rest.push( child(fntgNodeIdx, Index3(a,b,c)) );
00719                                                                          }
00720                                                                   }
00721                                                                 }
00722                                                          }
00723                                                   }
00724                                                 } /* End of while loop for rest queue */
00725                                          } /* End of "if current node a leaf" */ 
00726                                   } /* End of res and current nodes at same depth and adjacent */
00727                                 } /* End of "if depth(res) == depth(current)" */
00728                          } /* End of trypath */
00729                   }
00730                 }
00731          }
00732   } /* End of building sets for non-root node, curgNodeIdx */
00733 
00734   /* If the current node is a leaf, then it is in its own U-list */
00735   if(terminal(curgNodeIdx))
00736          Uset.insert(curgNodeIdx);
00737 
00738   /* For all sets, check to make sure all of the nodes actually have sources in them.  If not
00739         * we do not need to put them in the lists.  If so, build the U,V,W, and X lists from respective sets
00740         */
00741   for(set<int>::iterator si=Uset.begin(); si!=Uset.end(); si++)
00742          if(node(*si).tag() & LET_SRCENODE) /* Check if sources bit set for this node's tag.  Same below */
00743                 node(gNodeIdx).Unodes().push_back(*si);
00744   for(set<int>::iterator si=Vset.begin(); si!=Vset.end(); si++)
00745          if(node(*si).tag() & LET_SRCENODE)
00746                 node(gNodeIdx).Vnodes().push_back(*si);
00747   for(set<int>::iterator si=Wset.begin(); si!=Wset.end(); si++)
00748          if(node(*si).tag() & LET_SRCENODE)
00749                 node(gNodeIdx).Wnodes().push_back(*si);
00750   for(set<int>::iterator si=Xset.begin(); si!=Xset.end(); si++)
00751          if(node(*si).tag() & LET_SRCENODE)
00752                 node(gNodeIdx).Xnodes().push_back(*si);
00753   
00754   return(0);
00755 }
00756 // ---------------------------------------------------------------------- 
00757 #undef __FUNCT__
00758 #define __FUNCT__ "Let3d_MPI::dwnOrderCollect"
00759 int Let3d_MPI::dwnOrderCollect(vector<int>& ordVec)
00760 {
00761   /* ordVec - ordering of the boxes in downward fashion */
00762   ordVec.clear();
00763   for(int i=0; i<_nodeVec.size(); i++)
00764          ordVec.push_back(i);
00765   return(0);
00766 }
00767 // ---------------------------------------------------------------------- 
00768 #undef __FUNCT__
00769 #define __FUNCT__ "Let3d_MPI::upwOrderCollect"
00770 int Let3d_MPI::upwOrderCollect(vector<int>& ordVec)
00771 {
00772   /* ordVec - ordering of the boxes in upward fashion */
00773   ordVec.clear();
00774   for(int i=_nodeVec.size()-1; i>=0; i--)
00775          ordVec.push_back(i);
00776   return(0);
00777 }
00778 // ----------------------------------------------------------------------
00779 /* Return one of the eight children of a node based on a binary index, idx.  For example,
00780  * if idx = (0,0,0), the first child is returned.
00781  * If idx = (0,0,1), the second child is returned.
00782  * If idx = (0,1,0), the third child is returned.
00783  * If idx = (0,1,1), the fourth child is returned.
00784  * ...
00785  * If idx = (1,1,1), the eighth child is returned.
00786  */
00787 int Let3d_MPI::child(int gNodeIdx, const Index3& idx)
00788 {
00789   assert(idx>=Index3(0) && idx<Index3(2));
00790   return node(gNodeIdx).chd() + (idx(0)*4+idx(1)*2+idx(2));
00791 }
00792 /* Construct the center for a specific node */
00793 Point3 Let3d_MPI::center(int gNodeIdx) 
00794 {
00795   Point3 ll( ctr() - Point3(radius()) );
00796   int tmp = pow2(depth(gNodeIdx));
00797   Index3 path2Node_Lcl(path2Node(gNodeIdx));
00798   Point3 res;
00799   for(int d=0; d<dim(); d++) {
00800          res(d) = ll(d) + (2*radius()) * (path2Node_Lcl(d)+0.5) / double(tmp);
00801   }
00802   return res;
00803 }
00804 /* Radius of a node =
00805  * radius(root)/(2^depth(node)).
00806  * When radius of root is 1 (most often),
00807  * radius of a node is 2^(-d)
00808  */
00809 double Let3d_MPI::radius(int gNodeIdx) //radius of a node
00810 {
00811   return radius()/double(pow2(depth(gNodeIdx)));
00812 }
00813 // ----------------------------------------------------------------------
00814 /* Find a node based on a depth and some path.
00815  * This function is used to try to find children of the neighbors of the parent of some
00816  * node at depth wntDepth by trying several paths
00817  */
00818 int Let3d_MPI::findGlbNode(int wntDepth, const Index3& wntPath2Node)
00819 {
00820   int cur = 0;  
00821   Index3 tmpPath2Node(wntPath2Node);
00822   /* Keep trying to find a node cur which has depth greater than/equal to the given depth
00823         * OR is a terminal/leaf */
00824   while(depth(cur)<wntDepth && terminal(cur)==false) {
00825          /* Difference in the depths */
00826          int dif = wntDepth-depth(cur);
00827          /* 2^(dif-1):  2^(difference in depths > 0) */
00828          int tmp = pow2(dif-1);
00829          /* Returns a binary index because of tmp's size */
00830          Index3 choice( tmpPath2Node/tmp );
00831 
00832          /* Get new path to follow */
00833          tmpPath2Node -= choice*tmp;
00834          /* Choose new node - hopefully this node is at the same depth as our current node */
00835          cur = child(cur, choice);       
00836   }  //cerr<<endl;
00837   return cur;
00838 }
00839 // ----------------------------------------------------------------------
00840 /* Returns true if two nodes have an edge or face which touch.  False otherwise */
00841 bool Let3d_MPI::adjacent(int me, int you)
00842 {
00843   int maxDepth = max(depth(me),depth(you)); /* Which node is at a greater depth */
00844   Index3 one(1); /* = (1,1,1) */
00845   /* Construct the center for both nodes */
00846   Index3 meCtr(  (2*path2Node(me)+one) * pow2(maxDepth - depth(me))  );
00847   Index3 youCtr(  (2*path2Node(you)+one) * pow2(maxDepth - depth(you))  );
00848   /* construct radii for both nodes */
00849   int meRad = pow2(maxDepth - depth(me));
00850   int youRad = pow2(maxDepth - depth(you));
00851   /* absolute value of difference of centers */
00852   Index3 dif( abs(meCtr-youCtr) );
00853   /* sum of radii */
00854   int radius  = meRad + youRad;
00855   /* if the abs. value of difference of centers is less than the sum of the radii
00856         * AND the infinity norm equals the sum of the radii, then the two nodes
00857         * are not too far away AND at least one edge touches */
00858   return
00859          ( dif <= Index3(radius) ) && //not too far
00860          ( dif.linfty() == radius ); //at least one edge touch
00861 }
00862 
00863 // ---------------------------------------------------------------------- 
00864 #undef __FUNCT__
00865 #define __FUNCT__ "Let3d_MPI::print"
00866 int Let3d_MPI::print()
00867 {
00868   //begin  //pC( PetscPrintf(MPI_COMM_WORLD, "nodeVec %d\n", _nodeVec.size()) );
00869   pC( PetscPrintf(MPI_COMM_SELF, "%d: %d glbGlbSrc %d %d  lclGlbSrc %d %d ctbSrc %d %d evaTrg %d %d usrSrc %d %d\n",
00870                                                 mpiRank(), _nodeVec.size(),
00871                                                 _glbGlbSrcNodeCnt, _glbGlbSrcExaCnt,   _lclGlbSrcNodeCnt, _lclGlbSrcExaCnt,
00872                                                 _ctbSrcNodeCnt, _ctbSrcExaCnt,   _evaTrgNodeCnt, _evaTrgExaCnt,                 _usrSrcNodeCnt, _usrSrcExaCnt) );
00873   return(0);
00874 }
00875 
00876 

Generated on Sun Dec 4 21:12:40 2005 for fmm3d_mpi by  doxygen 1.4.5